19 |
ilm |
1 |
/*
|
|
|
2 |
* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
|
|
|
3 |
*
|
|
|
4 |
* Copyright 2011 OpenConcerto, by ILM Informatique. All rights reserved.
|
|
|
5 |
*
|
|
|
6 |
* The contents of this file are subject to the terms of the GNU General Public License Version 3
|
|
|
7 |
* only ("GPL"). You may not use this file except in compliance with the License. You can obtain a
|
|
|
8 |
* copy of the License at http://www.gnu.org/licenses/gpl-3.0.html See the License for the specific
|
|
|
9 |
* language governing permissions and limitations under the License.
|
|
|
10 |
*
|
|
|
11 |
* When distributing the software, include this License Header Notice in each file.
|
|
|
12 |
*/
|
|
|
13 |
|
|
|
14 |
package org.openconcerto.utils.text;
|
|
|
15 |
|
|
|
16 |
/**
|
|
|
17 |
* Copyright 2005 Bytecode Pty Ltd.
|
|
|
18 |
*
|
|
|
19 |
* Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except
|
|
|
20 |
* in compliance with the License. You may obtain a copy of the License at
|
|
|
21 |
*
|
|
|
22 |
* http://www.apache.org/licenses/LICENSE-2.0
|
|
|
23 |
*
|
|
|
24 |
* Unless required by applicable law or agreed to in writing, software distributed under the License
|
|
|
25 |
* is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express
|
|
|
26 |
* or implied. See the License for the specific language governing permissions and limitations under
|
|
|
27 |
* the License.
|
|
|
28 |
*/
|
|
|
29 |
|
|
|
30 |
import java.io.BufferedReader;
|
|
|
31 |
import java.io.Closeable;
|
|
|
32 |
import java.io.IOException;
|
|
|
33 |
import java.io.Reader;
|
|
|
34 |
import java.util.ArrayList;
|
|
|
35 |
import java.util.List;
|
|
|
36 |
|
|
|
37 |
/**
|
|
|
38 |
* A very simple CSV reader released under a commercial-friendly license.
|
|
|
39 |
*
|
|
|
40 |
* @author Glen Smith
|
|
|
41 |
*
|
|
|
42 |
*/
|
|
|
43 |
public class CSVReader implements Closeable {
|
|
|
44 |
|
|
|
45 |
private BufferedReader br;
|
|
|
46 |
|
|
|
47 |
private boolean hasNext = true;
|
|
|
48 |
|
|
|
49 |
private CSVParser parser;
|
|
|
50 |
|
|
|
51 |
private int skipLines;
|
|
|
52 |
|
|
|
53 |
private boolean linesSkiped;
|
|
|
54 |
|
|
|
55 |
/**
|
|
|
56 |
* The default line to start reading.
|
|
|
57 |
*/
|
|
|
58 |
public static final int DEFAULT_SKIP_LINES = 0;
|
|
|
59 |
|
|
|
60 |
/**
|
|
|
61 |
* Constructs CSVReader using a comma for the separator.
|
|
|
62 |
*
|
|
|
63 |
* @param reader the reader to an underlying CSV source.
|
|
|
64 |
*/
|
|
|
65 |
public CSVReader(Reader reader) {
|
|
|
66 |
this(reader, CSVParser.DEFAULT_SEPARATOR, CSVParser.DEFAULT_QUOTE_CHARACTER, CSVParser.DEFAULT_ESCAPE_CHARACTER);
|
|
|
67 |
}
|
|
|
68 |
|
|
|
69 |
/**
|
|
|
70 |
* Constructs CSVReader with supplied separator.
|
|
|
71 |
*
|
|
|
72 |
* @param reader the reader to an underlying CSV source.
|
|
|
73 |
* @param separator the delimiter to use for separating entries.
|
|
|
74 |
*/
|
|
|
75 |
public CSVReader(Reader reader, char separator) {
|
|
|
76 |
this(reader, separator, CSVParser.DEFAULT_QUOTE_CHARACTER, CSVParser.DEFAULT_ESCAPE_CHARACTER);
|
|
|
77 |
}
|
|
|
78 |
|
|
|
79 |
/**
|
|
|
80 |
* Constructs CSVReader with supplied separator and quote char.
|
|
|
81 |
*
|
|
|
82 |
* @param reader the reader to an underlying CSV source.
|
|
|
83 |
* @param separator the delimiter to use for separating entries
|
|
|
84 |
* @param quotechar the character to use for quoted elements
|
|
|
85 |
*/
|
|
|
86 |
public CSVReader(Reader reader, char separator, char quotechar) {
|
|
|
87 |
this(reader, separator, quotechar, CSVParser.DEFAULT_ESCAPE_CHARACTER, DEFAULT_SKIP_LINES, CSVParser.DEFAULT_STRICT_QUOTES);
|
|
|
88 |
}
|
|
|
89 |
|
|
|
90 |
/**
|
|
|
91 |
* Constructs CSVReader with supplied separator, quote char and quote handling behavior.
|
|
|
92 |
*
|
|
|
93 |
* @param reader the reader to an underlying CSV source.
|
|
|
94 |
* @param separator the delimiter to use for separating entries
|
|
|
95 |
* @param quotechar the character to use for quoted elements
|
|
|
96 |
* @param strictQuotes sets if characters outside the quotes are ignored
|
|
|
97 |
*/
|
|
|
98 |
public CSVReader(Reader reader, char separator, char quotechar, boolean strictQuotes) {
|
|
|
99 |
this(reader, separator, quotechar, CSVParser.DEFAULT_ESCAPE_CHARACTER, DEFAULT_SKIP_LINES, strictQuotes);
|
|
|
100 |
}
|
|
|
101 |
|
|
|
102 |
/**
|
|
|
103 |
* Constructs CSVReader with supplied separator and quote char.
|
|
|
104 |
*
|
|
|
105 |
* @param reader the reader to an underlying CSV source.
|
|
|
106 |
* @param separator the delimiter to use for separating entries
|
|
|
107 |
* @param quotechar the character to use for quoted elements
|
|
|
108 |
* @param escape the character to use for escaping a separator or quote
|
|
|
109 |
*/
|
|
|
110 |
|
|
|
111 |
public CSVReader(Reader reader, char separator, char quotechar, char escape) {
|
|
|
112 |
this(reader, separator, quotechar, escape, DEFAULT_SKIP_LINES, CSVParser.DEFAULT_STRICT_QUOTES);
|
|
|
113 |
}
|
|
|
114 |
|
|
|
115 |
/**
|
|
|
116 |
* Constructs CSVReader with supplied separator and quote char.
|
|
|
117 |
*
|
|
|
118 |
* @param reader the reader to an underlying CSV source.
|
|
|
119 |
* @param separator the delimiter to use for separating entries
|
|
|
120 |
* @param quotechar the character to use for quoted elements
|
|
|
121 |
* @param line the line number to skip for start reading
|
|
|
122 |
*/
|
|
|
123 |
public CSVReader(Reader reader, char separator, char quotechar, int line) {
|
|
|
124 |
this(reader, separator, quotechar, CSVParser.DEFAULT_ESCAPE_CHARACTER, line, CSVParser.DEFAULT_STRICT_QUOTES);
|
|
|
125 |
}
|
|
|
126 |
|
|
|
127 |
/**
|
|
|
128 |
* Constructs CSVReader with supplied separator and quote char.
|
|
|
129 |
*
|
|
|
130 |
* @param reader the reader to an underlying CSV source.
|
|
|
131 |
* @param separator the delimiter to use for separating entries
|
|
|
132 |
* @param quotechar the character to use for quoted elements
|
|
|
133 |
* @param escape the character to use for escaping a separator or quote
|
|
|
134 |
* @param line the line number to skip for start reading
|
|
|
135 |
*/
|
|
|
136 |
public CSVReader(Reader reader, char separator, char quotechar, char escape, int line) {
|
|
|
137 |
this(reader, separator, quotechar, escape, line, CSVParser.DEFAULT_STRICT_QUOTES);
|
|
|
138 |
}
|
|
|
139 |
|
|
|
140 |
/**
|
|
|
141 |
* Constructs CSVReader with supplied separator and quote char.
|
|
|
142 |
*
|
|
|
143 |
* @param reader the reader to an underlying CSV source.
|
|
|
144 |
* @param separator the delimiter to use for separating entries
|
|
|
145 |
* @param quotechar the character to use for quoted elements
|
|
|
146 |
* @param escape the character to use for escaping a separator or quote
|
|
|
147 |
* @param line the line number to skip for start reading
|
|
|
148 |
* @param strictQuotes sets if characters outside the quotes are ignored
|
|
|
149 |
*/
|
|
|
150 |
public CSVReader(Reader reader, char separator, char quotechar, char escape, int line, boolean strictQuotes) {
|
|
|
151 |
this(reader, separator, quotechar, escape, line, strictQuotes, CSVParser.DEFAULT_IGNORE_LEADING_WHITESPACE);
|
|
|
152 |
}
|
|
|
153 |
|
|
|
154 |
/**
|
|
|
155 |
* Constructs CSVReader with supplied separator and quote char.
|
|
|
156 |
*
|
|
|
157 |
* @param reader the reader to an underlying CSV source.
|
|
|
158 |
* @param separator the delimiter to use for separating entries
|
|
|
159 |
* @param quotechar the character to use for quoted elements
|
|
|
160 |
* @param escape the character to use for escaping a separator or quote
|
|
|
161 |
* @param line the line number to skip for start reading
|
|
|
162 |
* @param strictQuotes sets if characters outside the quotes are ignored
|
|
|
163 |
* @param ignoreLeadingWhiteSpace it true, parser should ignore white space before a quote in a
|
|
|
164 |
* field
|
|
|
165 |
*/
|
|
|
166 |
public CSVReader(Reader reader, char separator, char quotechar, char escape, int line, boolean strictQuotes, boolean ignoreLeadingWhiteSpace) {
|
|
|
167 |
this.br = new BufferedReader(reader);
|
|
|
168 |
this.parser = new CSVParser(separator, quotechar, escape, strictQuotes, ignoreLeadingWhiteSpace);
|
|
|
169 |
this.skipLines = line;
|
|
|
170 |
}
|
|
|
171 |
|
|
|
172 |
/**
|
|
|
173 |
* Reads the entire file into a List with each element being a String[] of tokens.
|
|
|
174 |
*
|
|
|
175 |
* @return a List of String[], with each String[] representing a line of the file.
|
|
|
176 |
*
|
|
|
177 |
* @throws IOException if bad things happen during the read
|
|
|
178 |
*/
|
|
|
179 |
public List<String[]> readAll() throws IOException {
|
|
|
180 |
|
|
|
181 |
List<String[]> allElements = new ArrayList<String[]>();
|
|
|
182 |
while (hasNext) {
|
|
|
183 |
String[] nextLineAsTokens = readNext();
|
|
|
184 |
if (nextLineAsTokens != null)
|
|
|
185 |
allElements.add(nextLineAsTokens);
|
|
|
186 |
}
|
|
|
187 |
return allElements;
|
|
|
188 |
|
|
|
189 |
}
|
|
|
190 |
|
|
|
191 |
/**
|
|
|
192 |
* Reads the next line from the buffer and converts to a string array.
|
|
|
193 |
*
|
|
|
194 |
* @return a string array with each comma-separated element as a separate entry.
|
|
|
195 |
*
|
|
|
196 |
* @throws IOException if bad things happen during the read
|
|
|
197 |
*/
|
|
|
198 |
public String[] readNext() throws IOException {
|
|
|
199 |
|
|
|
200 |
String[] result = null;
|
|
|
201 |
do {
|
|
|
202 |
String nextLine = getNextLine();
|
|
|
203 |
if (!hasNext) {
|
|
|
204 |
return result; // should throw if still pending?
|
|
|
205 |
}
|
|
|
206 |
String[] r = parser.parseLineMulti(nextLine);
|
|
|
207 |
if (r.length > 0) {
|
|
|
208 |
if (result == null) {
|
|
|
209 |
result = r;
|
|
|
210 |
} else {
|
|
|
211 |
String[] t = new String[result.length + r.length];
|
|
|
212 |
System.arraycopy(result, 0, t, 0, result.length);
|
|
|
213 |
System.arraycopy(r, 0, t, result.length, r.length);
|
|
|
214 |
result = t;
|
|
|
215 |
}
|
|
|
216 |
}
|
|
|
217 |
} while (parser.isPending());
|
|
|
218 |
return result;
|
|
|
219 |
}
|
|
|
220 |
|
|
|
221 |
/**
|
|
|
222 |
* Reads the next line from the file.
|
|
|
223 |
*
|
|
|
224 |
* @return the next line from the file without trailing newline
|
|
|
225 |
* @throws IOException if bad things happen during the read
|
|
|
226 |
*/
|
|
|
227 |
private String getNextLine() throws IOException {
|
|
|
228 |
if (!this.linesSkiped) {
|
|
|
229 |
for (int i = 0; i < skipLines; i++) {
|
|
|
230 |
br.readLine();
|
|
|
231 |
}
|
|
|
232 |
this.linesSkiped = true;
|
|
|
233 |
}
|
|
|
234 |
String nextLine = br.readLine();
|
|
|
235 |
if (nextLine == null) {
|
|
|
236 |
hasNext = false;
|
|
|
237 |
}
|
|
|
238 |
return hasNext ? nextLine : null;
|
|
|
239 |
}
|
|
|
240 |
|
|
|
241 |
/**
|
|
|
242 |
* Closes the underlying reader.
|
|
|
243 |
*
|
|
|
244 |
* @throws IOException if the close fails
|
|
|
245 |
*/
|
|
|
246 |
public void close() throws IOException {
|
|
|
247 |
br.close();
|
|
|
248 |
}
|
|
|
249 |
|
|
|
250 |
}
|