OpenConcerto

Dépôt officiel du code source de l'ERP OpenConcerto
sonarqube

svn://code.openconcerto.org/openconcerto

Rev

Blame | Last modification | View Log | RSS feed

/*
 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
 * 
 * Copyright 2011-2019 OpenConcerto, by ILM Informatique. All rights reserved.
 * 
 * The contents of this file are subject to the terms of the GNU General Public License Version 3
 * only ("GPL"). You may not use this file except in compliance with the License. You can obtain a
 * copy of the License at http://www.gnu.org/licenses/gpl-3.0.html See the License for the specific
 * language governing permissions and limitations under the License.
 * 
 * When distributing the software, include this License Header Notice in each file.
 */
 
 package org.openconcerto.utils.prog;

import org.openconcerto.utils.CollectionUtils;
import org.openconcerto.utils.text.CSVReader;

import java.io.IOException;
import java.io.Writer;
import java.nio.file.CopyOption;
import java.nio.file.Files;
import java.nio.file.OpenOption;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.nio.file.StandardCopyOption;
import java.nio.file.StandardOpenOption;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.regex.Pattern;

import javax.mail.internet.InternetAddress;
import javax.mail.internet.MimeUtility;
import javax.mail.internet.ParseException;

import com.zimbra.common.util.BEncoding;
import com.zimbra.common.util.BEncoding.BEncodingException;

/**
 * Load a CSV file and replace some {@link BEncoding zimbra benconded map} columns by some of their
 * toplevel values. Sample SQL to generate the CSV :
 * 
 * <pre>
 *SELECT 'ID', 'sujet', 'date', 'metadonnées'
UNION ALL
SELECT id, subject, from_unixtime(floor(date)), metadata
FROM mail_item
WHERE mailbox_id=70 and type=5
ORDER BY date DESC
INTO OUTFILE '/tmp/zimbraMails.csv' FIELDS TERMINATED BY ',' ENCLOSED BY '\"' ESCAPED BY '\\' LINES TERMINATED BY '\n' ;
 * </pre>
 * 
 * @author sylvain
 */
public class ExtractFromBEncoding {

    static public final class ExtractInfo {
        private final int fieldIndex;
        private final List<String> keys, names;

        public ExtractInfo(int fieldIndex, String[] keysAndNames) {
            super();
            this.fieldIndex = fieldIndex;
            final List<String> keys = new ArrayList<>(keysAndNames.length);
            final List<String> names = new ArrayList<>(keysAndNames.length);
            for (final String kAn : keysAndNames) {
                final int colonIndex = kAn.indexOf(':');
                keys.add(kAn.substring(0, colonIndex));
                names.add(kAn.substring(colonIndex + 1));
            }
            this.keys = Collections.unmodifiableList(keys);
            this.names = Collections.unmodifiableList(names);
            assert this.keys.size() > 0 && this.keys.size() == this.names.size();
        }

        public final int getFieldIndex() {
            return this.fieldIndex;
        }

        public final List<String> getKeys() {
            return this.keys;
        }

        public final List<String> getNames() {
            return this.names;
        }
    }

    private static final boolean OVERWRITE_FILE = Boolean.getBoolean("overwrite");
    private static final Pattern DOUBLE_QUOTE_PATTERN = Pattern.compile("\"", Pattern.LITERAL);
    private static final Pattern SINGLE_QUOTED_ENCODEDWORD_PATTERN = Pattern.compile("'(=\\?.+?\\?=)'");

    public static void main(String[] args) throws IOException, BEncodingException, ParseException {
        if (args.length == 0) {
            System.out.println("Load a CSV file and replace some {@link BEncoding zimbra benconded map} columns by some of their toplevel values");
            System.out.println("The bencoded colums are removed and the new columns are added at the end");
            System.out.println("inputFile outputFile [bencodedIndex key1:label1,key2:label2,...]...");
            System.exit(1);
        }

        final Path in = Paths.get(args[0]);
        final Path out = Paths.get(args[1]);
        if (Files.exists(out) && Files.isSameFile(in, out))
            throw new IllegalArgumentException("Same file");

        final List<ExtractInfo> extractInfos = new ArrayList<>();
        // can't have the same index more than one time because we remove it
        final Set<Integer> indexes = new HashSet<>();
        for (int i = 2; i < args.length; i += 2) {
            final int index = Integer.parseInt(args[i]);
            if (!indexes.add(index))
                throw new IllegalArgumentException("Duplicate index " + index);
            final String[] keys = args[i + 1].split(",");
            extractInfos.add(new ExtractInfo(index, keys));
        }

        System.out.println("From " + in + " to " + out);
        final int lines = replace(in, out, extractInfos);
        System.out.println("Processed " + lines + " line(s)");
    }

    /**
     * Read from <code>in</code> and write to <code>out</code>.
     * 
     * @param in the input CSV.
     * @param out the output CSV.
     * @param extractInfos what to extract.
     * @return number of processed lines.
     * @throws IOException if an error occurs while reading or writing.
     * @throws BEncodingException if a value isn't b-encoded.
     * @throws ParseException if a value isn't a valid "encoded-word".
     */
    public static int replace(final Path in, final Path out, final List<ExtractInfo> extractInfos) throws IOException, ParseException, BEncodingException {
        if (extractInfos.isEmpty()) {
            Files.copy(in, out, OVERWRITE_FILE ? new CopyOption[] { StandardCopyOption.REPLACE_EXISTING } : new CopyOption[0]);
            return 0;
        }

        int totalLines = 0;
        final OpenOption[] openOptions = OVERWRITE_FILE ? new OpenOption[] { StandardOpenOption.CREATE, StandardOpenOption.TRUNCATE_EXISTING } : new OpenOption[] { StandardOpenOption.CREATE_NEW };
        final StringBuilder sb = new StringBuilder(512);
        try (final CSVReader r = new CSVReader(Files.newBufferedReader(in), ',', '"', '\\', 0, true);
                // CSVWriter doesn't handle escaping quotes by doubling them (as needed by
                // LibreOffice), so output CSV ourselves
                final Writer w = Files.newBufferedWriter(out, openOptions)) {

            // column names
            String[] line = r.readNext();
            final int itemsCount = line.length;

            int additionalCols = 0;

            final List<String> listToWrite = new ArrayList<>(itemsCount);
            listToWrite.addAll(Arrays.asList(line));
            for (final ExtractInfo e : extractInfos) {
                listToWrite.remove(e.getFieldIndex());
                for (final String n : e.getNames()) {
                    listToWrite.add(n);
                }
                // we remove the metadata
                additionalCols += e.getKeys().size() - 1;
            }
            final int outputColCount = itemsCount + additionalCols;
            writeCSV(sb, listToWrite, outputColCount);
            w.append(sb);

            while ((line = r.readNext()) != null) {
                totalLines++;
                if (itemsCount != line.length) {
                    throw new IllegalStateException("Expected " + itemsCount + " items but got " + line.length + " : " + Arrays.asList(line));
                }
                listToWrite.clear();
                listToWrite.addAll(Arrays.asList(line));

                for (final ExtractInfo e : extractInfos) {
                    String item = line[e.getFieldIndex()];
                    if (item.startsWith("=?")) {
                        item = MimeUtility.decodeWord(item);
                    }
                    final Map<?, ?> bdecoded = (Map<?, ?>) BEncoding.decode(item);

                    listToWrite.remove(e.getFieldIndex());
                    for (final String key : e.getKeys()) {
                        // BEncoding cannot encode null but a key can be missing
                        final Object val = bdecoded.get(key);
                        final String itemToWrite;
                        if (val == null) {
                            itemToWrite = null;
                        } else {
                            String toParse = val.toString();
                            // illegal single quoting, replace with double quote
                            if (toParse.contains("'=?"))
                                toParse = SINGLE_QUOTED_ENCODEDWORD_PATTERN.matcher(toParse).replaceAll("\"$1\"");
                            // strict=false because we don't care if there's "illegal character"
                            // (e.g. annabelle@testaud@ac-paris.fr or
                            // <'jeromebarral83@yahoo.fr'>), we want it to be legible
                            final InternetAddress[] mimeDecoded = InternetAddress.parseHeader(toParse, false);

                            itemToWrite = CollectionUtils.join(Arrays.asList(mimeDecoded), ", ", InternetAddress::toUnicodeString);
                        }
                        listToWrite.add(itemToWrite);
                    }
                }

                writeCSV(sb, listToWrite, outputColCount);
                w.append(sb);
            }
        }
        return totalLines;
    }

    private static void writeCSV(final StringBuilder sb, final List<String> l, final int outputColCount) {
        if (l.size() != outputColCount)
            throw new IllegalStateException("Wrong column count");
        sb.setLength(0);
        for (final String i : l) {
            if (i == null) {
                sb.append("\\N");
            } else {
                sb.append('"');
                sb.append(DOUBLE_QUOTE_PATTERN.matcher(i).replaceAll("\"\""));
                sb.append('"');
            }
            sb.append(',');
        }
        // replace last field separator
        sb.setCharAt(sb.length() - 1, '\n');
    }
}