001/*
002 * Licensed to the Apache Software Foundation (ASF) under one or more
003 * contributor license agreements.  See the NOTICE file distributed with
004 * this work for additional information regarding copyright ownership.
005 * The ASF licenses this file to You under the Apache License, Version 2.0
006 * (the "License"); you may not use this file except in compliance with
007 * the License.  You may obtain a copy of the License at
008 *
009 *      http://www.apache.org/licenses/LICENSE-2.0
010 *
011 * Unless required by applicable law or agreed to in writing, software
012 * distributed under the License is distributed on an "AS IS" BASIS,
013 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
014 * See the License for the specific language governing permissions and
015 * limitations under the License.
016 */
017
018package org.apache.commons.net.examples.mail;
019
020import java.io.BufferedWriter;
021import java.io.File;
022import java.io.FileWriter;
023import java.io.IOException;
024import java.net.URI;
025import java.net.URISyntaxException;
026import java.text.ParseException;
027import java.text.SimpleDateFormat;
028import java.util.ArrayList;
029import java.util.Date;
030import java.util.Iterator;
031import java.util.List;
032import java.util.TimeZone;
033import java.util.concurrent.atomic.AtomicInteger;
034import java.util.regex.Matcher;
035import java.util.regex.Pattern;
036
037import org.apache.commons.net.PrintCommandListener;
038import org.apache.commons.net.ProtocolCommandEvent;
039import org.apache.commons.net.imap.IMAP;
040import org.apache.commons.net.imap.IMAP.IMAPChunkListener;
041import org.apache.commons.net.imap.IMAPClient;
042import org.apache.commons.net.imap.IMAPReply;
043
044/**
045 * This is an example program demonstrating how to use the IMAP[S]Client class. This program connects to a IMAP[S] server and exports selected messages from a
046 * folder into an mbox file.
047 * <p>
048 * Usage: IMAPExportMbox imap[s]://user:password@host[:port]/folder/path <mboxfile> [sequence-set] [item-names]
049 * <p>
050 * An example sequence-set might be:
051 * <ul>
052 * <li>11,2,3:10,20:*</li>
053 * <li>1:* - this is the default</li>
054 * </ul>
055 * <p>
056 * Some example item-names might be:
057 * <ul>
058 * <li>BODY.PEEK[HEADER]</li>
059 * <li>'BODY.PEEK[HEADER.FIELDS (SUBJECT)]'</li>
060 * <li>ALL - macro equivalent to '(FLAGS INTERNALDATE RFC822.SIZE ENVELOPE)'</li>
061 * <li>FAST - macro equivalent to '(FLAGS INTERNALDATE RFC822.SIZE)'</li>
062 * <li>FULL - macro equivalent to '(FLAGS INTERNALDATE RFC822.SIZE ENVELOPE BODY)'</li>
063 * <li>ENVELOPE X-GM-LABELS</li>
064 * <li>'(INTERNALDATE BODY.PEEK[])' - this is the default</li>
065 * </ul>
066 * <p>
067 * Macro names cannot be combined with anything else; they must be used alone.<br>
068 * Note that using BODY will set the \Seen flag. This is why the default uses BODY.PEEK[].<br>
069 * The item name X-GM-LABELS is a Google Mail extension; it shows the labels for a message.<br>
070 * For example:<br>
071 * IMAPExportMbox imaps://username:password@imap.googlemail.com/messages_for_export exported.mbox 1:10,20<br>
072 * IMAPExportMbox imaps://username:password@imap.googlemail.com/messages_for_export exported.mbox 3 ENVELOPE X-GM-LABELS<br>
073 * <p>
074 * The sequence-set is passed unmodified to the FETCH command.<br>
075 * The item names are wrapped in parentheses if more than one is provided. Otherwise, the parameter is assumed to be wrapped if necessary.<br>
076 * Parameters with spaces must be quoted otherwise the OS shell will normally treat them as separate parameters.<br>
077 * Also the listener that writes the mailbox only captures the multi-line responses (e.g. ones that include BODY references). It does not capture the output
078 * from FETCH commands using item names such as ENVELOPE or FLAGS that return a single line response.
079 */
080public final class IMAPExportMbox {
081
082    private static class MboxListener implements IMAPChunkListener {
083
084        private final BufferedWriter bufferedWriter;
085        volatile AtomicInteger total = new AtomicInteger();
086        volatile String lastFetched;
087        volatile List<String> missingIds = new ArrayList<>();
088        volatile long lastSeq = -1;
089        private final String lineSeparator;
090        private final SimpleDateFormat DATE_FORMAT // for mbox From_ lines
091                = new SimpleDateFormat("EEE MMM dd HH:mm:ss yyyy");
092
093        // e.g. INTERNALDATE "27-Oct-2013 07:43:24 +0000"
094        // for parsing INTERNALDATE
095        private final SimpleDateFormat IDPARSE = new SimpleDateFormat("dd-MMM-yyyy HH:mm:ss Z");
096        private final boolean printHash;
097        private final boolean printMarker;
098        private final boolean checkSequence;
099
100        MboxListener(final BufferedWriter bufferedWriter, final String lineSeparator, final boolean printHash, final boolean printMarker,
101                final boolean checkSequence) {
102            this.lineSeparator = lineSeparator;
103            this.printHash = printHash;
104            this.printMarker = printMarker;
105            DATE_FORMAT.setTimeZone(TimeZone.getTimeZone("GMT"));
106            this.bufferedWriter = bufferedWriter;
107            this.checkSequence = checkSequence;
108        }
109
110        @Override
111        public boolean chunkReceived(final IMAP imap) {
112            final String[] replyStrings = imap.getReplyStrings();
113            Date received = new Date();
114            final String firstLine = replyStrings[0];
115            Matcher m = PATID.matcher(firstLine);
116            if (m.lookingAt()) { // found a match
117                final String date = m.group(PATID_DATE_GROUP);
118                try {
119                    received = IDPARSE.parse(date);
120                } catch (final ParseException e) {
121                    System.err.println(e);
122                }
123            } else {
124                System.err.println("No timestamp found in: " + firstLine + "  - using current time");
125            }
126            String replyTo = "MAILER-DAEMON"; // default
127            for (int i = 1; i < replyStrings.length - 1; i++) {
128                final String line = replyStrings[i];
129                if (line.startsWith("Return-Path: ")) {
130                    final String[] parts = line.split(" ", 2);
131                    if (!parts[1].equals("<>")) {// Don't replace default with blank
132                        replyTo = parts[1];
133                        if (replyTo.startsWith("<")) {
134                            if (replyTo.endsWith(">")) {
135                                replyTo = replyTo.substring(1, replyTo.length() - 1); // drop <> wrapper
136                            } else {
137                                System.err.println("Unexpected Return-path: '" + line + "' in " + firstLine);
138                            }
139                        }
140                    }
141                    break;
142                }
143            }
144            try {
145                // Add initial mbox header line
146                bufferedWriter.append("From ");
147                bufferedWriter.append(replyTo);
148                bufferedWriter.append(' ');
149                bufferedWriter.append(DATE_FORMAT.format(received));
150                bufferedWriter.append(lineSeparator);
151                // Debug
152                bufferedWriter.append("X-IMAP-Response: ").append(firstLine).append(lineSeparator);
153                if (printMarker) {
154                    System.err.println("[" + total + "] " + firstLine);
155                }
156                // Skip first and last lines
157                for (int i = 1; i < replyStrings.length - 1; i++) {
158                    final String line = replyStrings[i];
159                    if (startsWith(line, PATFROM)) {
160                        bufferedWriter.append('>'); // Escape a From_ line
161                    }
162                    bufferedWriter.append(line);
163                    bufferedWriter.append(lineSeparator);
164                }
165                // The last line ends with the trailing closing ")" which needs to be stripped
166                final String lastLine = replyStrings[replyStrings.length - 1];
167                final int lastLength = lastLine.length();
168                if (lastLength > 1) { // there's some content, we need to save it
169                    bufferedWriter.append(lastLine, 0, lastLength - 1);
170                    bufferedWriter.append(lineSeparator);
171                }
172                bufferedWriter.append(lineSeparator); // blank line between entries
173            } catch (final IOException e) {
174                e.printStackTrace();
175                throw new RuntimeException(e); // chunkReceived cannot throw a checked Exception
176            }
177            lastFetched = firstLine;
178            total.incrementAndGet();
179            if (checkSequence) {
180                m = PATSEQ.matcher(firstLine);
181                if (m.lookingAt()) { // found a match
182                    final long msgSeq = Long.parseLong(m.group(PATSEQ_SEQUENCE_GROUP)); // Cannot fail to parse
183                    if (lastSeq != -1) {
184                        final long missing = msgSeq - lastSeq - 1;
185                        if (missing != 0) {
186                            for (long j = lastSeq + 1; j < msgSeq; j++) {
187                                missingIds.add(String.valueOf(j));
188                            }
189                            System.err.println("*** Sequence error: current=" + msgSeq + " previous=" + lastSeq + " Missing=" + missing);
190                        }
191                    }
192                    lastSeq = msgSeq;
193                }
194            }
195            if (printHash) {
196                System.err.print(".");
197            }
198            return true;
199        }
200
201        public void close() throws IOException {
202            if (bufferedWriter != null) {
203                bufferedWriter.close();
204            }
205        }
206    }
207
208    private static final String CRLF = "\r\n";
209    private static final String LF = "\n";
210
211    private static final String EOL_DEFAULT = System.lineSeparator();
212    private static final Pattern PATFROM = Pattern.compile(">*From "); // unescaped From_
213    // e.g. * nnn (INTERNALDATE "27-Oct-2013 07:43:24 +0000" BODY[] {nn} ...)
214    private static final Pattern PATID = // INTERNALDATE
215            Pattern.compile(".*INTERNALDATE \"(\\d\\d-\\w{3}-\\d{4} \\d\\d:\\d\\d:\\d\\d [+-]\\d+)\"");
216
217    private static final int PATID_DATE_GROUP = 1;
218    private static final Pattern PATSEQ = Pattern.compile("\\* (\\d+) "); // Sequence number
219
220    private static final int PATSEQ_SEQUENCE_GROUP = 1;
221
222    // e.g. * 382 EXISTS
223    private static final Pattern PATEXISTS = Pattern.compile("\\* (\\d+) EXISTS"); // Response from SELECT
224
225    // AAAC NO [TEMPFAIL] FETCH Temporary failure on server [CODE: WBL]
226    private static final Pattern PATTEMPFAIL = Pattern.compile("[A-Z]{4} NO \\[TEMPFAIL\\] FETCH .*");
227    private static final int CONNECT_TIMEOUT = 10; // Seconds
228
229    private static final int READ_TIMEOUT = 10;
230
231    public static void main(final String[] args) throws IOException, URISyntaxException {
232        int connect_timeout = CONNECT_TIMEOUT;
233        int read_timeout = READ_TIMEOUT;
234
235        int argIdx = 0;
236        String eol = EOL_DEFAULT;
237        boolean printHash = false;
238        boolean printMarker = false;
239        int retryWaitSecs = 0;
240
241        for (argIdx = 0; argIdx < args.length; argIdx++) {
242            if (args[argIdx].equals("-c")) {
243                connect_timeout = Integer.parseInt(args[++argIdx]);
244            } else if (args[argIdx].equals("-r")) {
245                read_timeout = Integer.parseInt(args[++argIdx]);
246            } else if (args[argIdx].equals("-R")) {
247                retryWaitSecs = Integer.parseInt(args[++argIdx]);
248            } else if (args[argIdx].equals("-LF")) {
249                eol = LF;
250            } else if (args[argIdx].equals("-CRLF")) {
251                eol = CRLF;
252            } else if (args[argIdx].equals("-.")) {
253                printHash = true;
254            } else if (args[argIdx].equals("-X")) {
255                printMarker = true;
256            } else {
257                break;
258            }
259        }
260
261        final int argCount = args.length - argIdx;
262
263        if (argCount < 2) {
264            System.err.println("Usage: IMAPExportMbox [-LF|-CRLF] [-c n] [-r n] [-R n] [-.] [-X]"
265                    + " imap[s]://user:password@host[:port]/folder/path [+|-]<mboxfile> [sequence-set] [itemnames]");
266            System.err.println("\t-LF | -CRLF set end-of-line to LF or CRLF (default is the line.separator system property)");
267            System.err.println("\t-c connect timeout in seconds (default 10)");
268            System.err.println("\t-r read timeout in seconds (default 10)");
269            System.err.println("\t-R temporary failure retry wait in seconds (default 0; i.e. disabled)");
270            System.err.println("\t-. print a . for each complete message received");
271            System.err.println("\t-X print the X-IMAP line for each complete message received");
272            System.err.println("\tthe mboxfile is where the messages are stored; use '-' to write to standard output.");
273            System.err.println("\tPrefix file name with '+' to append to the file. Prefix with '-' to allow overwrite.");
274            System.err.println("\ta sequence-set is a list of numbers/number ranges e.g. 1,2,3-10,20:* - default 1:*");
275            System.err.println("\titemnames are the message data item name(s) e.g. BODY.PEEK[HEADER.FIELDS (SUBJECT)]"
276                    + " or a macro e.g. ALL - default (INTERNALDATE BODY.PEEK[])");
277            System.exit(1);
278        }
279
280        final String uriString = args[argIdx++];
281        URI uri;
282        try {
283            uri = URI.create(uriString);
284        } catch (final IllegalArgumentException e) { // cannot parse the path as is; let's pull it apart and try again
285            final Matcher m = Pattern.compile("(imaps?://[^/]+)(/.*)").matcher(uriString);
286            if (!m.matches()) {
287                throw e;
288            }
289            uri = URI.create(m.group(1)); // Just the scheme and auth parts
290            uri = new URI(uri.getScheme(), uri.getAuthority(), m.group(2), null, null);
291        }
292        final String file = args[argIdx++];
293        String sequenceSet = argCount > 2 ? args[argIdx++] : "1:*";
294        final String itemNames;
295        // Handle 0, 1 or multiple item names
296        if (argCount > 3) {
297            if (argCount > 4) {
298                final StringBuilder sb = new StringBuilder();
299                sb.append("(");
300                for (int i = 4; i <= argCount; i++) {
301                    if (i > 4) {
302                        sb.append(" ");
303                    }
304                    sb.append(args[argIdx++]);
305                }
306                sb.append(")");
307                itemNames = sb.toString();
308            } else {
309                itemNames = args[argIdx++];
310            }
311        } else {
312            itemNames = "(INTERNALDATE BODY.PEEK[])";
313        }
314
315        final boolean checkSequence = sequenceSet.matches("\\d+:(\\d+|\\*)"); // are we expecting a sequence?
316        final MboxListener mboxListener;
317        if (file.equals("-")) {
318            mboxListener = null;
319        } else if (file.startsWith("+")) {
320            final File mbox = new File(file.substring(1));
321            System.out.println("Appending to file " + mbox);
322            mboxListener = new MboxListener(new BufferedWriter(new FileWriter(mbox, true)), eol, printHash, printMarker, checkSequence);
323        } else if (file.startsWith("-")) {
324            final File mbox = new File(file.substring(1));
325            System.out.println("Writing to file " + mbox);
326            mboxListener = new MboxListener(new BufferedWriter(new FileWriter(mbox, false)), eol, printHash, printMarker, checkSequence);
327        } else {
328            final File mboxFile = new File(file);
329            if (mboxFile.exists() && mboxFile.length() > 0) {
330                throw new IOException("mailbox file: " + mboxFile + " already exists and is non-empty!");
331            }
332            System.out.println("Creating file " + mboxFile);
333            mboxListener = new MboxListener(new BufferedWriter(new FileWriter(mboxFile)), eol, printHash, printMarker, checkSequence);
334        }
335
336        final String path = uri.getPath();
337        if (path == null || path.length() < 1) {
338            throw new IllegalArgumentException("Invalid folderPath: '" + path + "'");
339        }
340        final String folder = path.substring(1); // skip the leading /
341
342        // suppress login details
343        final PrintCommandListener listener = new PrintCommandListener(System.out, true) {
344            @Override
345            public void protocolReplyReceived(final ProtocolCommandEvent event) {
346                if (event.getReplyCode() != IMAPReply.PARTIAL) { // This is dealt with by the chunk listener
347                    super.protocolReplyReceived(event);
348                }
349            }
350        };
351
352        // Connect and login
353        final IMAPClient imap = IMAPUtils.imapLogin(uri, connect_timeout * 1000, listener);
354
355        String maxIndexInFolder = null;
356
357        try {
358
359            imap.setSoTimeout(read_timeout * 1000);
360
361            if (!imap.select(folder)) {
362                throw new IOException("Could not select folder: " + folder);
363            }
364
365            for (final String line : imap.getReplyStrings()) {
366                maxIndexInFolder = matches(line, PATEXISTS, 1);
367                if (maxIndexInFolder != null) {
368                    break;
369                }
370            }
371
372            if (mboxListener != null) {
373                imap.setChunkListener(mboxListener);
374            } // else the command listener displays the full output without processing
375
376            while (true) {
377                final boolean ok = imap.fetch(sequenceSet, itemNames);
378                // If the fetch failed, can we retry?
379                if (ok || (retryWaitSecs <= 0) || (mboxListener == null) || !checkSequence) {
380                    break;
381                }
382                final String replyString = imap.getReplyString(); // includes EOL
383                if (!startsWith(replyString, PATTEMPFAIL)) {
384                    throw new IOException("FETCH " + sequenceSet + " " + itemNames + " failed with " + replyString);
385                }
386                System.err.println("Temporary error detected, will retry in " + retryWaitSecs + "seconds");
387                sequenceSet = mboxListener.lastSeq + 1 + ":*";
388                try {
389                    Thread.sleep(retryWaitSecs * 1000);
390                } catch (final InterruptedException e) {
391                    // ignored
392                }
393            }
394
395        } catch (final IOException ioe) {
396            final String count = mboxListener == null ? "?" : mboxListener.total.toString();
397            System.err.println("FETCH " + sequenceSet + " " + itemNames + " failed after processing " + count + " complete messages ");
398            if (mboxListener != null) {
399                System.err.println("Last complete response seen: " + mboxListener.lastFetched);
400            }
401            throw ioe;
402        } finally {
403
404            if (printHash) {
405                System.err.println();
406            }
407
408            if (mboxListener != null) {
409                mboxListener.close();
410                final Iterator<String> missingIds = mboxListener.missingIds.iterator();
411                if (missingIds.hasNext()) {
412                    final StringBuilder sb = new StringBuilder();
413                    for (;;) {
414                        sb.append(missingIds.next());
415                        if (!missingIds.hasNext()) {
416                            break;
417                        }
418                        sb.append(",");
419                    }
420                    System.err.println("*** Missing ids: " + sb.toString());
421                }
422            }
423            imap.logout();
424            imap.disconnect();
425        }
426        if (mboxListener != null) {
427            System.out.println("Processed " + mboxListener.total + " messages.");
428        }
429        if (maxIndexInFolder != null) {
430            System.out.println("Folder contained " + maxIndexInFolder + " messages.");
431        }
432    }
433
434    private static String matches(final String input, final Pattern pat, final int index) {
435        final Matcher m = pat.matcher(input);
436        if (m.lookingAt()) {
437            return m.group(index);
438        }
439        return null;
440    }
441
442    private static boolean startsWith(final String input, final Pattern pat) {
443        final Matcher m = pat.matcher(input);
444        return m.lookingAt();
445    }
446}