Dépôt officiel du code source de l'ERP OpenConcerto
Blame | Last modification | View Log | RSS feed
/*
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
*
* Copyright 2011-2019 OpenConcerto, by ILM Informatique. All rights reserved.
*
* The contents of this file are subject to the terms of the GNU General Public License Version 3
* only ("GPL"). You may not use this file except in compliance with the License. You can obtain a
* copy of the License at http://www.gnu.org/licenses/gpl-3.0.html See the License for the specific
* language governing permissions and limitations under the License.
*
* When distributing the software, include this License Header Notice in each file.
*/
package org.openconcerto.utils.prog;
import org.openconcerto.utils.CollectionUtils;
import org.openconcerto.utils.text.CSVReader;
import java.io.IOException;
import java.io.Writer;
import java.nio.file.CopyOption;
import java.nio.file.Files;
import java.nio.file.OpenOption;
import java.nio.file.Path;
import java.nio.file.Paths;
import java.nio.file.StandardCopyOption;
import java.nio.file.StandardOpenOption;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.regex.Pattern;
import javax.mail.internet.InternetAddress;
import javax.mail.internet.MimeUtility;
import javax.mail.internet.ParseException;
import com.zimbra.common.util.BEncoding;
import com.zimbra.common.util.BEncoding.BEncodingException;
/**
* Load a CSV file and replace some {@link BEncoding zimbra benconded map} columns by some of their
* toplevel values. Sample SQL to generate the CSV :
*
* <pre>
*SELECT 'ID', 'sujet', 'date', 'metadonnées'
UNION ALL
SELECT id, subject, from_unixtime(floor(date)), metadata
FROM mail_item
WHERE mailbox_id=70 and type=5
ORDER BY date DESC
INTO OUTFILE '/tmp/zimbraMails.csv' FIELDS TERMINATED BY ',' ENCLOSED BY '\"' ESCAPED BY '\\' LINES TERMINATED BY '\n' ;
* </pre>
*
* @author sylvain
*/
public class ExtractFromBEncoding {
static public final class ExtractInfo {
private final int fieldIndex;
private final List<String> keys, names;
public ExtractInfo(int fieldIndex, String[] keysAndNames) {
super();
this.fieldIndex = fieldIndex;
final List<String> keys = new ArrayList<>(keysAndNames.length);
final List<String> names = new ArrayList<>(keysAndNames.length);
for (final String kAn : keysAndNames) {
final int colonIndex = kAn.indexOf(':');
keys.add(kAn.substring(0, colonIndex));
names.add(kAn.substring(colonIndex + 1));
}
this.keys = Collections.unmodifiableList(keys);
this.names = Collections.unmodifiableList(names);
assert this.keys.size() > 0 && this.keys.size() == this.names.size();
}
public final int getFieldIndex() {
return this.fieldIndex;
}
public final List<String> getKeys() {
return this.keys;
}
public final List<String> getNames() {
return this.names;
}
}
private static final boolean OVERWRITE_FILE = Boolean.getBoolean("overwrite");
private static final Pattern DOUBLE_QUOTE_PATTERN = Pattern.compile("\"", Pattern.LITERAL);
private static final Pattern SINGLE_QUOTED_ENCODEDWORD_PATTERN = Pattern.compile("'(=\\?.+?\\?=)'");
public static void main(String[] args) throws IOException, BEncodingException, ParseException {
if (args.length == 0) {
System.out.println("Load a CSV file and replace some {@link BEncoding zimbra benconded map} columns by some of their toplevel values");
System.out.println("The bencoded colums are removed and the new columns are added at the end");
System.out.println("inputFile outputFile [bencodedIndex key1:label1,key2:label2,...]...");
System.exit(1);
}
final Path in = Paths.get(args[0]);
final Path out = Paths.get(args[1]);
if (Files.exists(out) && Files.isSameFile(in, out))
throw new IllegalArgumentException("Same file");
final List<ExtractInfo> extractInfos = new ArrayList<>();
// can't have the same index more than one time because we remove it
final Set<Integer> indexes = new HashSet<>();
for (int i = 2; i < args.length; i += 2) {
final int index = Integer.parseInt(args[i]);
if (!indexes.add(index))
throw new IllegalArgumentException("Duplicate index " + index);
final String[] keys = args[i + 1].split(",");
extractInfos.add(new ExtractInfo(index, keys));
}
System.out.println("From " + in + " to " + out);
final int lines = replace(in, out, extractInfos);
System.out.println("Processed " + lines + " line(s)");
}
/**
* Read from <code>in</code> and write to <code>out</code>.
*
* @param in the input CSV.
* @param out the output CSV.
* @param extractInfos what to extract.
* @return number of processed lines.
* @throws IOException if an error occurs while reading or writing.
* @throws BEncodingException if a value isn't b-encoded.
* @throws ParseException if a value isn't a valid "encoded-word".
*/
public static int replace(final Path in, final Path out, final List<ExtractInfo> extractInfos) throws IOException, ParseException, BEncodingException {
if (extractInfos.isEmpty()) {
Files.copy(in, out, OVERWRITE_FILE ? new CopyOption[] { StandardCopyOption.REPLACE_EXISTING } : new CopyOption[0]);
return 0;
}
int totalLines = 0;
final OpenOption[] openOptions = OVERWRITE_FILE ? new OpenOption[] { StandardOpenOption.CREATE, StandardOpenOption.TRUNCATE_EXISTING } : new OpenOption[] { StandardOpenOption.CREATE_NEW };
final StringBuilder sb = new StringBuilder(512);
try (final CSVReader r = new CSVReader(Files.newBufferedReader(in), ',', '"', '\\', 0, true);
// CSVWriter doesn't handle escaping quotes by doubling them (as needed by
// LibreOffice), so output CSV ourselves
final Writer w = Files.newBufferedWriter(out, openOptions)) {
// column names
String[] line = r.readNext();
final int itemsCount = line.length;
int additionalCols = 0;
final List<String> listToWrite = new ArrayList<>(itemsCount);
listToWrite.addAll(Arrays.asList(line));
for (final ExtractInfo e : extractInfos) {
listToWrite.remove(e.getFieldIndex());
for (final String n : e.getNames()) {
listToWrite.add(n);
}
// we remove the metadata
additionalCols += e.getKeys().size() - 1;
}
final int outputColCount = itemsCount + additionalCols;
writeCSV(sb, listToWrite, outputColCount);
w.append(sb);
while ((line = r.readNext()) != null) {
totalLines++;
if (itemsCount != line.length) {
throw new IllegalStateException("Expected " + itemsCount + " items but got " + line.length + " : " + Arrays.asList(line));
}
listToWrite.clear();
listToWrite.addAll(Arrays.asList(line));
for (final ExtractInfo e : extractInfos) {
String item = line[e.getFieldIndex()];
if (item.startsWith("=?")) {
item = MimeUtility.decodeWord(item);
}
final Map<?, ?> bdecoded = (Map<?, ?>) BEncoding.decode(item);
listToWrite.remove(e.getFieldIndex());
for (final String key : e.getKeys()) {
// BEncoding cannot encode null but a key can be missing
final Object val = bdecoded.get(key);
final String itemToWrite;
if (val == null) {
itemToWrite = null;
} else {
String toParse = val.toString();
// illegal single quoting, replace with double quote
if (toParse.contains("'=?"))
toParse = SINGLE_QUOTED_ENCODEDWORD_PATTERN.matcher(toParse).replaceAll("\"$1\"");
// strict=false because we don't care if there's "illegal character"
// (e.g. annabelle@testaud@ac-paris.fr or
// <'jeromebarral83@yahoo.fr'>), we want it to be legible
final InternetAddress[] mimeDecoded = InternetAddress.parseHeader(toParse, false);
itemToWrite = CollectionUtils.join(Arrays.asList(mimeDecoded), ", ", InternetAddress::toUnicodeString);
}
listToWrite.add(itemToWrite);
}
}
writeCSV(sb, listToWrite, outputColCount);
w.append(sb);
}
}
return totalLines;
}
private static void writeCSV(final StringBuilder sb, final List<String> l, final int outputColCount) {
if (l.size() != outputColCount)
throw new IllegalStateException("Wrong column count");
sb.setLength(0);
for (final String i : l) {
if (i == null) {
sb.append("\\N");
} else {
sb.append('"');
sb.append(DOUBLE_QUOTE_PATTERN.matcher(i).replaceAll("\"\""));
sb.append('"');
}
sb.append(',');
}
// replace last field separator
sb.setCharAt(sb.length() - 1, '\n');
}
}