1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16 package au.csiro.netcdf.util;
17
18 import java.util.ArrayList;
19 import java.util.List;
20 import java.util.StringTokenizer;
21
22
23
24
25
26
27
28
29
30
31 public class CSVTokenizer extends StringTokenizer
32 {
33
34 private boolean returnTokens;
35
36 private String majorDelimiter;
37
38
39 private String cachedToken = null;
40
41 private boolean haveCachedToken = false;
42
43 private String pushedBackToken = null;
44
45 private boolean atEOL = false;
46
47
48
49
50
51
52
53
54 public CSVTokenizer(String str)
55 {
56 this(str, ',', false);
57 }
58
59
60
61
62
63
64
65
66
67
68
69
70
71 public CSVTokenizer(String str, boolean returnTokens)
72 {
73 this(str, ',', returnTokens);
74 }
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91 public CSVTokenizer(String str, char majorDelimiter, boolean returnTokens)
92 {
93 super(str, "\"\n\015"+ majorDelimiter, true);
94
95 this.returnTokens = returnTokens;
96 this.majorDelimiter = String.valueOf(majorDelimiter);
97 }
98
99
100
101
102
103
104
105 public String[] getAllColumns()
106 {
107 List<String> columns = new ArrayList<String>();
108 while (hasMoreElements())
109 {
110 columns.add(nextToken());
111 }
112
113 return columns.toArray(new String[]{});
114 }
115
116
117
118
119
120
121 public int countTokens()
122 {
123 return 0;
124 }
125
126 public boolean hasMoreElements()
127 {
128 return super.hasMoreElements() || haveCachedToken || pushedBackToken != null;
129 }
130
131 public boolean hasMoreTokens()
132 {
133 return super.hasMoreTokens() || haveCachedToken || pushedBackToken != null;
134 }
135
136 public boolean atEOL()
137 {
138 return atEOL || !hasMoreTokens();
139 }
140
141 public Object nextElement()
142 {
143 return nextToken();
144 }
145
146 public String nextToken()
147 {
148
149 if (returnTokens && haveCachedToken)
150 {
151 String temp = cachedToken;
152 cachedToken = null;
153 haveCachedToken = false;
154 return temp;
155 }
156
157
158 boolean inQuoteBlock = false;
159 boolean wordFinished = false;
160 String currWord = "";
161 String token = null;
162 atEOL = false;
163
164 while ((pushedBackToken != null || super.hasMoreTokens()) && !wordFinished)
165 {
166 if (pushedBackToken != null)
167 {
168 token = pushedBackToken;
169 pushedBackToken = null;
170 }
171 else
172 {
173 token = super.nextToken();
174 }
175
176
177 if (token.equals("\""))
178 {
179 if (inQuoteBlock)
180 {
181 if (!super.hasMoreTokens())
182 {
183 inQuoteBlock = false;
184 wordFinished = true;
185 token = null;
186 }
187 else
188 {
189 String nextToken = super.nextToken();
190 if (nextToken.equals("\""))
191 {
192 currWord += "\"";
193 }
194 else
195 {
196 inQuoteBlock = false;
197 pushedBackToken = nextToken;
198 }
199 }
200 }
201 else
202 {
203 inQuoteBlock = true;
204 }
205 }
206 else if (token.equals(majorDelimiter))
207 {
208 if (inQuoteBlock)
209 {
210 currWord += token;
211 }
212 else
213 {
214 wordFinished = true;
215 }
216 }
217 else if (token.equals("\n"))
218 {
219 if (inQuoteBlock)
220 {
221 currWord += "\n";
222 }
223 else
224 {
225 wordFinished = true;
226 atEOL = true;
227 }
228 }
229 else if (token.equals("\015"))
230 {
231 if (inQuoteBlock)
232 {
233 currWord += "\015";
234 }
235 else
236 {
237 if (super.hasMoreTokens())
238 {
239 String nextToken = super.nextToken();
240 if (!nextToken.equals("\n"))
241 {
242 currWord += "\015";
243 }
244 pushedBackToken = nextToken;
245 }
246 }
247 }
248 else
249 {
250 currWord += token;
251 token = null;
252 }
253 }
254
255 if (returnTokens)
256 {
257 cachedToken = token;
258 haveCachedToken = cachedToken != null;
259 return currWord;
260 }
261 else
262 {
263 cachedToken = null;
264 return currWord;
265 }
266 }
267 }