source: osm/applications/editors/josm/plugins/surveyor/src/org/dinopolis/util/io/Tokenizer.java@ 13497

Last change on this file since 13497 was 13497, checked in by skela, 16 years ago

applications/editors/josm: Set svn:eol-style native on all *.java files
in plugins. Normalize the eol-style in
plugins/lakewalker/src/org/openstreetmap/josm/plugins/lakewalker/StringEnumConfigurer.java.

  • Property svn:eol-style set to native
File size: 27.4 KB
Line 
1/***********************************************************************
2 * @(#)$RCSfile: Tokenizer.java,v $ $Revision: 1.6 $$Date: 2006/04/21 14:14:56 $
3 *
4 * Copyright (c) Christof Dallermassl
5 *
6 * This program is free software; you can redistribute it and/or modify
7 * it under the terms of the GNU Lesser General Public License (LGPL)
8 * as published by the Free Software Foundation; either version 2.1 of
9 * the License, or (at your option) any later version.
10 *
11 * This program is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
14 * GNU Lesser General Public License for more details.
15 *
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this program; if not, write to the
18 * Free Software Foundation, Inc.,
19 * 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
20 ***********************************************************************/
21
22package org.dinopolis.util.io;
23
24import java.io.IOException;
25import java.io.InputStream;
26import java.io.InputStreamReader;
27import java.io.PushbackReader;
28import java.io.Reader;
29import java.io.StringReader;
30import java.util.ArrayList;
31import java.util.Iterator;
32import java.util.List;
33
34//----------------------------------------------------------------------
35/**
36
37 * This tokenizer merges the benefits of the java.lang.StringTokenizer
38 * class and the java.io.StreamTokenizer class. It provides a low
39 * level and a high level interface to the tokenizer. The low level
40 * interface consists of the method pair nextToken() and getWord(),
41 * where the first returns the type of token in the parsing process,
42 * and the latter returns the String element itself.
43 * <p>
44 * The high level interface consists of the methods hasNextLine() and
45 * nextLine(). They use the low level interface to parse the data line
46 * by line and create a list of strings from it.
47 * <p>
48 * It is unsure, if it is wise to mix the usage of the high and
49 * the low level interface. For normal usage, the high level interface
50 * should be more comfortable to use and does not provide any
51 * drawbacks.
52 * <p>
53
54 * An example for the high level interface:
55 * <pre>
56 * try
57 * {
58 * // simple example, tokenizing string, no escape, but quoted
59 * // works:
60 * System.out.println("example 1");
61 * Tokenizer tokenizer = new Tokenizer("text,,,\"another,text\"");
62 * List tokens;
63 * while(tokenizer.hasNextLine())
64 * {
65 * tokens = tokenizer.nextLine();
66 * System.out.println(tokens.get(0)); // prints 'text'
67 * System.out.println(tokens.get(1)); // prints ''
68 * System.out.println(tokens.get(2)); // prints ''
69 * System.out.println(tokens.get(3)); // prints 'another,text'
70 * }
71 *
72 * System.out.println("example 2");
73 * // simple example, tokenizing string, using escape char and
74 * // quoted strings:
75 * tokenizer = new Tokenizer("text,text with\\,comma,,\"another,text\"");
76 * tokenizer.respectEscapedCharacters(true);
77 * while(tokenizer.hasNextLine())
78 * {
79 * tokens = tokenizer.nextLine();
80 * System.out.println(tokens.get(0)); // prints 'text'
81 * System.out.println(tokens.get(1)); // prints 'text with, comma'
82 * System.out.println(tokens.get(2)); // prints ''
83 * System.out.println(tokens.get(3)); // prints 'another,text'
84 * }
85 * }
86 * catch(Exception ioe)
87 * {
88 * ioe.printStackTrace();
89 * }
90 * </pre>
91 * <p>
92 * The advantages compared to the StreamTokenizer class are: Unlike
93 * the StreamTokenizer, this Tokenizer class returns the delimiters as
94 * tokens and therefore may be used to tokenize e.g. comma separated
95 * files with empty fields (the StreamTokenizer handles multiple
96 * delimiters in a row like one delimiter).
97 * <p>
98 * The tokenizer respect quoted words, so the delimiter is ignored if
99 * inside quotes. And it may handle escaped characters (like an
100 * escaped quote character, or an escaped new line). So the line
101 * <code>eric,"he said, \"great!\""</code> returns <code>eric</code>
102 * and <code>he said, "great!"</code> as words.
103 * <p>
104 * Low level interface: The design of the Tokenizer allows to get
105 * empty columns as well as treat multiple delimiters in a row as one
106 * delimiter. For the first approach trigger the values on every
107 * DELIMITER and EOF token whereas for the second, trigger only on
108 * WORD tokens.
109 * <p>
110 * If one wants to be informed about empty words as well, use the
111 * Tokenizer like in the following code fragment:
112 * <pre>
113 * Tokenizer tokenizer = new Tokenizer("text,,,another text");
114 * String word = "";
115 * int token;
116 * while((token = tokenizer.nextToken()) != Tokenizer.EOF)
117 * {
118 * switch(token)
119 * {
120 * case Tokenizer.EOL:
121 * System.out.println("word: "+word);
122 * word = "";
123 * System.out.println("-------------");
124 * break;
125 * case Tokenizer.WORD:
126 * word = tokenizer.getWord();
127 * break;
128 * case Tokenizer.QUOTED_WORD:
129 * word = tokenizer.getWord() + " (quoted)";
130 * break;
131 * case Tokenizer.DELIMITER:
132 * System.out.println("word: "+word);
133 * word = "";
134 * break;
135 * default:
136 * System.err.println("Unknown Token: "+token);
137 * }
138 * }
139 * </pre>
140 * In this example, if the delimiter is set to a comma, a line like
141 * <code>column1,,,"column4,partofcolumn4"</code> would be treated correctly.
142 * <p>
143 * This tokenizer uses the LF character as end of line characters. It
144 * ignores any CR characters, so it can be used in windows
145 * environments as well.
146 *
147 * @author Christof Dallermassl
148 * @version $Revision: 1.6 $
149 */
150
151public class Tokenizer
152{
153 /** the reader to read from */
154 protected PushbackReader reader_;
155 /** the buffer to create the tokens */
156 protected StringBuffer buffer_;
157 /** all characters in this string are used as delimiters */
158 protected String delimiters_ = ",";
159 /** the escape character */
160 protected int escapeChar_ = '\\';
161 /** the quote character */
162 protected int quoteChar_ = '"';
163
164 /** if true, characters are treated as escaped */
165 protected boolean escapeMode_ = false;
166
167 /** if true, end of line is respected */
168 protected boolean eolIsSignificant_ = true;
169 /** if true, escape characters are respected */
170 protected boolean respectEscapedChars_ = false;
171 /** if true, quoted words are respected */
172 protected boolean respectQuotedWords_ = true;
173
174 /** line count */
175 protected int lineCount_ = 1;
176
177 /** end of file marker */
178 protected boolean eofReached_ = false;
179
180 /** the last token that was found */
181 protected int lastToken_ = NOT_STARTED;
182
183 /** end of file token */
184 public static final int EOF = -1;
185 /** end of line token */
186 public static final int EOL = 0;
187 /** word token */
188 public static final int WORD = 1;
189 /** quoted word token */
190 public static final int QUOTED_WORD = 2;
191 /** delimiter token */
192 public static final int DELIMITER = 3;
193 /** error token */
194 public static final int ERROR = 4;
195 /** not started token */
196 public static final int NOT_STARTED = 5;
197
198
199//----------------------------------------------------------------------
200/**
201 * Creates a tokenizer that reads from the given string. It uses the
202 * comma as delimiter, does not respect escape characters but respects
203 * quoted words.
204 *
205 * @param string the string to read from.
206 */
207 public Tokenizer(String string)
208 {
209 this(new StringReader(string));
210 }
211
212//----------------------------------------------------------------------
213/**
214 * Creates a tokenizer that reads from the given string. All
215 * characters in the given delimiters string are used as
216 * delimiter. The tokenizer does not respect escape characters but
217 * respects quoted words.
218 *
219 * @param string the string to read from.
220 * @param delimiters the delimiters to use.
221 */
222 public Tokenizer(String string, String delimiters)
223 {
224 this(new StringReader(string));
225 setDelimiters(delimiters);
226 }
227
228//----------------------------------------------------------------------
229/**
230 * Creates a tokenizer that reads from the given string. It uses the
231 * comma as delimiter, does not respect escape characters but respects
232 * quoted words.
233 *
234 * @param inStream the stream to read from.
235 */
236 public Tokenizer(InputStream inStream)
237 {
238 this(new InputStreamReader(inStream));
239 }
240
241//----------------------------------------------------------------------
242/**
243 * Creates a tokenizer that reads from the given reader. It uses the
244 * comma as delimiter, does not respect escape characters but respects
245 * quoted words.
246 *
247 * @param reader the reader to read from.
248 */
249 public Tokenizer(Reader reader)
250 {
251 reader_ = new PushbackReader(reader,2);
252 buffer_ = new StringBuffer();
253 }
254
255//----------------------------------------------------------------------
256/**
257 * Set the delimiter character. The default is the comma.
258 *
259 * @param delimiterChar the delimiter character.
260 */
261 public void setDelimiter(int delimiterChar)
262 {
263 delimiters_ = new String(new char[]{(char)delimiterChar});
264 }
265
266//----------------------------------------------------------------------
267/**
268 * Get the first delimiter character.
269 *
270 * @return the delimiter character.
271 * @deprecated use the getDelimiters() method now
272 */
273 public int getDelimiter()
274 {
275 return(delimiters_.charAt(0));
276 }
277
278//----------------------------------------------------------------------
279/**
280 * Set the delimiter characters. All characters in the delimiters are
281 * used as delimiter.
282 *
283 * @param delimiters the delimiter characters.
284 */
285 public void setDelimiters(String delimiters)
286 {
287 delimiters_ = delimiters;
288 }
289
290//----------------------------------------------------------------------
291/**
292 * Get the delimiter character.
293 *
294 * @return the delimiter character.
295 */
296 public String getDelimiters()
297 {
298 return(delimiters_);
299 }
300
301//----------------------------------------------------------------------
302/**
303 * Set the escape character. The default is the backslash.
304 *
305 * @param escapeChar the escape character.
306 */
307 public void setEscapeChar(int escapeChar)
308 {
309 escapeChar_ = escapeChar;
310 }
311
312//----------------------------------------------------------------------
313/**
314 * Get the escape character.
315 *
316 * @return the escape character.
317 */
318 public int getEscapeChar()
319 {
320 return(escapeChar_);
321 }
322
323//----------------------------------------------------------------------
324/**
325 * If escape characters should be respected, set the param to
326 * <code>true</code>. The default is to ignore escape characters.
327 *
328 * @param respectEscaped If escape characters should be respected,
329 * set the param to <code>true</code>.
330 */
331 public void respectEscapedCharacters(boolean respectEscaped)
332 {
333 respectEscapedChars_ = respectEscaped;
334 }
335
336//----------------------------------------------------------------------
337/**
338 * Returns <code>true</code>, if escape character is respected.
339 *
340 * @return <code>true</code>, if escape character is respected.
341 */
342 public boolean respectEscapedCharacters()
343 {
344 return(respectEscapedChars_);
345 }
346
347//----------------------------------------------------------------------
348/**
349 * Get the quote character.
350 *
351 * @return the quote character.
352 */
353 public int getQuoteChar()
354 {
355 return (quoteChar_);
356 }
357
358//----------------------------------------------------------------------
359/**
360 * Set the quote character. The default is the double quote.
361 *
362 * @param quoteChar the quote character.
363 */
364 public void setQuoteChar(int quoteChar)
365 {
366 quoteChar_ = quoteChar;
367 }
368
369//----------------------------------------------------------------------
370/**
371 * If quoted words should be respected, set the param to
372 * <code>true</code>. The default is to respect quoted words.
373 *
374 * @param respectQuotes If quoted words should be respected,
375 * set the param to <code>true</code>.
376 */
377 public void respectQuotedWords(boolean respectQuotes)
378 {
379 respectQuotedWords_ = respectQuotes;
380 }
381
382//----------------------------------------------------------------------
383/**
384 * Returns <code>true</code>, if quoted words are respected.
385 *
386 * @return <code>true</code>, if quoted words are respected.
387 */
388 public boolean respectQuotedWords()
389 {
390 return(respectQuotedWords_);
391 }
392
393//----------------------------------------------------------------------
394/**
395 * If set to <code>true</code> the end of line is signaled by the EOL
396 * token. If set to <code>false</code> end of line is treated as a
397 * normal delimiter. The default value is true;
398 *
399 * @param significant if the end of line is treated as a special token
400 * or as a delimiter.
401 */
402 public void eolIsSignificant(boolean significant)
403 {
404 eolIsSignificant_ = significant;
405 }
406
407//----------------------------------------------------------------------
408/**
409 * Returns <code>true</code>, if in case of an end of line detected,
410 * an EOL token is returned. If <code>false</code>, the end of line is
411 * treated as a normal delimiter.
412 *
413 * @return <code>true</code>, if in case of an end of line detected,
414 * an EOL token is returned. If <code>false</code>, the end of line is
415 * treated as a normal delimiter.
416 */
417 public boolean isEolSignificant()
418 {
419 return(eolIsSignificant_);
420 }
421
422
423//----------------------------------------------------------------------
424/**
425 * Returns the current line number of the reader.
426 *
427 * @return the current line number of the reader.
428 */
429 public int getLineNumber()
430 {
431 return(lineCount_);
432 }
433
434//----------------------------------------------------------------------
435/**
436 * Returns the value of the token. If the token was of the type WORD,
437 * the word is returned.
438 *
439 * @return the value of the token.
440 */
441 public String getWord()
442 {
443 return(buffer_.toString());
444 }
445
446//----------------------------------------------------------------------
447/**
448 * Returns the last token that was returned from the nextToken() method.
449 *
450 * @return the last token.
451 */
452 public int getLastToken()
453 {
454 return(lastToken_);
455 }
456
457//----------------------------------------------------------------------
458/**
459 * Returns true, if the given character is seen as a delimiter. This
460 * method respects escape_mode, so if the escape character was found
461 * before, it has to act accordingly (usually, return false, even if
462 * the character is a delimiter).
463 *
464 * @param character the character to check for delimiter
465 * @return true, if the given character is seen as a delimiter.
466 */
467 protected boolean isDelimiter(int character)
468 {
469 // check for escape mode:
470 if(escapeMode_)
471 return(false);
472
473 return(delimiters_.indexOf(character) >= 0);
474 }
475
476//----------------------------------------------------------------------
477/**
478 * Returns true, if the given character is seen as a quote
479 * character. This method respects escape_mode, so if the escape
480 * character was found before, it has to act accordingly (usually,
481 * return false, even if the character is a quote character).
482 *
483 * @param character the character to check for quote.
484 * @return true, if the given character is seen as a quote character.
485 */
486 protected boolean isQuoteChar(int character)
487 {
488 if(!respectQuotedWords_)
489 return(false);
490
491 // check for escape mode:
492 if(escapeMode_)
493 return(false);
494
495 return(character == quoteChar_);
496 }
497
498//----------------------------------------------------------------------
499/**
500 * Returns true, if the given character is seen as a escape
501 * character. This method respects escape_mode, so if the escape
502 * character was found before, it has to act accordingly (usually,
503 * return false, even if the character is a escape character).
504 * @param character the character to check for escape character.
505 * @return true, if the given character is seen as a escape character.
506 */
507 protected boolean isEscapeChar(int character)
508 {
509 if(!respectEscapedChars_)
510 return(false);
511
512 // check for escape mode:
513 if(escapeMode_)
514 return(false);
515
516 return(character == escapeChar_);
517 }
518
519//----------------------------------------------------------------------
520/**
521 * Returns true, if the given character is seen as a end of line
522 * character. This method respects end of line_mode, so if the end of
523 * line character was found before, it has to act accordingly
524 * (usually, return false, even if the character is a end of line
525 * character).
526 * @param character the character to check for end of line.
527 * @return true, if the given character is seen as a end of line
528 * character.
529 */
530 protected boolean isEndOfLine(int character)
531 {
532 // check for escape mode:
533 if(escapeMode_)
534 {
535 if(character == '\n') // add line count, even if in escape mode!
536 lineCount_++;
537 return(false);
538 }
539 if(character == -1)
540 eofReached_ = true;
541
542 return((character=='\n') || (character=='\r') || (character == -1));
543 }
544
545//----------------------------------------------------------------------
546/**
547 * Closes the tokenizer (and the reader is uses internally).
548 *
549 * @exception IOException if an error occured.
550 */
551 public void close()
552 throws IOException
553 {
554 reader_.close();
555 }
556
557//----------------------------------------------------------------------
558/**
559 * Reads and returns the next character from the reader and checks for
560 * the escape character. If an escape character is read, a flag is set
561 * and the next character is read. A newline following the escape
562 * character is ignored.
563 *
564 * @return the next character.
565 * @exception IOException if an error occured.
566 */
567 protected int readNextChar()
568 throws IOException
569 {
570 int next_char = reader_.read();
571 if(escapeMode_)
572 {
573 escapeMode_ = false;
574 }
575 else
576 {
577 if(isEscapeChar(next_char))
578 {
579 // ignore escape char itself:
580 next_char = reader_.read();
581
582 // check for newline and ignore it:
583 if(isEndOfLine(next_char))
584 {
585 lineCount_++;
586 next_char = reader_.read();
587 // ignore CR:
588 if(next_char == '\r')
589 {
590 next_char = readNextChar();
591 }
592 }
593 escapeMode_ = true;
594 }
595 }
596 // ignore CR:
597 if(next_char == '\r')
598 {
599 next_char = readNextChar();
600 }
601 return(next_char);
602 }
603
604//----------------------------------------------------------------------
605/**
606 * Returns the next token from the reader. The token's value may be
607 * WORD, QUOTED_WORD, EOF, EOL, or DELIMITER. In the case or WORD or
608 * QUOTED_WORD the actual word can be obtained by the use of the
609 * getWord method.
610 *
611 * @return the next token.
612 * @exception IOException if an error occured.
613 */
614 public int nextToken()
615 throws IOException
616 {
617 buffer_.setLength(0);
618
619 int next_char;
620 next_char = readNextChar();
621
622 // handle EOF:
623 if(eofReached_)
624 {
625 lastToken_ = EOF;
626 return(EOF);
627 }
628
629 // handle EOL:
630 if(isEndOfLine(next_char))
631 {
632 lineCount_++;
633 if(eolIsSignificant_)
634 {
635 lastToken_ = EOL;
636 return(EOL);
637 }
638 else
639 {
640 lastToken_ = DELIMITER;
641 return(DELIMITER);
642 }
643 }
644
645 // handle DELIMITER
646 if(isDelimiter(next_char))
647 {
648 lastToken_ = DELIMITER;
649 return(DELIMITER);
650 }
651
652 // handle quoted words:
653 if(isQuoteChar(next_char))
654 {
655 while(true)
656 {
657 next_char = readNextChar();
658 if(isEndOfLine(next_char))
659 {
660 lastToken_ = ERROR;
661 return(ERROR);
662 }
663 else
664 {
665 if(isQuoteChar(next_char))
666 {
667 lastToken_ = QUOTED_WORD;
668 return(QUOTED_WORD);
669 }
670
671 // no special char, then append to buffer:
672 buffer_.append((char)next_char);
673 }
674 }
675 }
676
677 // handle 'normal' words:
678 while(true)
679 {
680 buffer_.append((char)next_char);
681 next_char = readNextChar();
682 if(isDelimiter(next_char) || isEndOfLine(next_char))
683 {
684 reader_.unread(next_char);
685 lastToken_ = WORD;
686 return(WORD);
687 }
688 }
689 }
690
691//----------------------------------------------------------------------
692/**
693 * Returns true, if the tokenizer can return another line.
694 *
695 * @return true, if the tokenizer can return another line.
696 * @exception IOException if an error occured.
697 */
698 public boolean hasNextLine()
699 throws IOException
700 {
701 if(lastToken_ == EOF)
702 return(false);
703
704 if((lastToken_ == EOL) || (lastToken_ == NOT_STARTED))
705 {
706 int next_char = readNextChar();
707 if(next_char == -1)
708 return(false);
709
710 reader_.unread(next_char);
711 }
712 return(true);
713 }
714
715
716//----------------------------------------------------------------------
717/**
718 * Returns a list of elements (Strings) from the next line of the
719 * tokenizer. If there are multiple delimiters without any values in
720 * between, empty (zero length) strings are added to the list. They
721 * may be removed by the use of the {@link
722 * #removeZeroLengthElements(List)} method.
723 *
724 * @return a list of elements (Strings) from the next line of the
725 * tokenizer.
726 * @exception IOException if an error occured.
727 */
728 public List<String> nextLine()
729 throws IOException
730 {
731 int token = nextToken();
732 List<String> list = new ArrayList<String>();
733 String word = "";
734// while(token != Tokenizer.EOF)
735 while(true)
736 {
737 switch(token)
738 {
739 case Tokenizer.WORD:
740 word = getWord();
741 break;
742 case Tokenizer.QUOTED_WORD:
743 word = getWord();
744 break;
745 case Tokenizer.DELIMITER:
746 list.add(word);
747 word = "";
748 break;
749 case Tokenizer.EOL:
750 case Tokenizer.EOF:
751 list.add(word);
752 return(list);
753 default:
754 System.err.println("Unknown Token: "+token);
755 }
756 token = nextToken();
757 }
758// return(list);
759 }
760
761//----------------------------------------------------------------------
762/**
763 * This helper method removes all zero length elements from the given
764 * list and returns it. The given list is not changed!
765 *
766 * @param list the list of String objects to remove the zero elements from.
767 * @return a copy of the given list where all zero length elements are removed.
768 */
769 public static List<String> removeZeroLengthElements(List<String> list)
770 {
771 return removeZeroLengthElements(list, false);
772 }
773
774//----------------------------------------------------------------------
775 /**
776 * This helper method trims all elements and removes all zero length
777 * (length is taken after trimming leading and trailing spaces) elements from the given
778 * list and returns it. This method copies the (trimmed and) non-zero elements to a
779 * new list.
780 *
781 * @param list the list of String objects to remove the zero elements from.
782 * @param trim if set to <code>true</code>, all leading and trailing spaces are removed from
783 * the elements. This is done, before the length is compared to zero (and the element
784 * may be removed if the length is zero). If set to <code>true</code>, elements
785 * that only consist of spaces are removed as well!
786 * @return the list where all zero length elements are remove.
787 */
788 public static List<String> removeZeroLengthElements(List<String> list, boolean trim)
789 {
790 Iterator<String> iterator = list.iterator();
791 String value;
792 List<String> new_list = new ArrayList<String>();
793 while(iterator.hasNext())
794 {
795 value = iterator.next();
796 if (trim)
797 value = value.trim();
798 if(value.length() != 0)
799 new_list.add(value);
800 }
801 return(new_list);
802 }
803
804// /**
805// * Demonstrates the low level interface.
806// * @param args command line arguments.
807// */
808// protected static void testLowLevel(String[] args)
809// {
810// try
811// {
812// String filename;
813// if(args.length > 0)
814// filename = args[0];
815// else
816// filename = "/filer/cdaller/tmp/test.csv";
817//
818// Tokenizer tokenizer = new Tokenizer(new BufferedReader(new FileReader(filename)));
819//// Tokenizer tokenizer = new Tokenizer("column1,\"quoted column2\",column3\\, with quoted comma");
820// tokenizer.setDelimiter(',');
821//// tokenizer.eolIsSignificant(false);
822// tokenizer.respectEscapedCharacters(true);
823// tokenizer.respectQuotedWords(true);
824//
825// int token;
826// while((token = tokenizer.nextToken()) != Tokenizer.EOF)
827// {
828// switch(token)
829// {
830// case Tokenizer.EOL:
831// System.out.println("------------- ");
832// break;
833// case Tokenizer.WORD:
834// System.out.println("line" +tokenizer.getLineNumber() +" word: "+tokenizer.getWord());
835// break;
836// case Tokenizer.QUOTED_WORD:
837// System.out.println("line" +tokenizer.getLineNumber() +" quoted word: "+tokenizer.getWord());
838// break;
839// case Tokenizer.DELIMITER:
840// System.out.println("delimiter");
841// break;
842// default:
843// System.err.println("Unknown Token: "+token);
844// }
845// }
846// tokenizer.close();
847// }
848// catch(Exception ioe)
849// {
850// ioe.printStackTrace();
851// }
852// }
853//
854//
855// /**
856// * Demonstration of the high level interface.
857// * @param args command line arguments.
858// */
859// protected static void testHighLevel(String[] args)
860// {
861// try
862// {
863// String filename;
864// if(args.length > 0)
865// filename = args[0];
866// else
867// filename = "/filer/cdaller/tmp/test.csv";
868//
869// Tokenizer tokenizer = new Tokenizer(new BufferedReader(new FileReader(filename)));
870//// Tokenizer tokenizer = new Tokenizer("column1,\"quoted column2\",column3\\, with quoted comma");
871// tokenizer.setDelimiter(',');
872//// tokenizer.eolIsSignificant(false);
873// tokenizer.respectEscapedCharacters(true);
874// tokenizer.respectQuotedWords(true);
875//
876// List list;
877// while(tokenizer.hasNextLine())
878// {
879// list = tokenizer.nextLine();
880// System.out.println("List: "+list);
881// System.out.println("List w/o zero length elements: "+removeZeroLengthElements(list));
882// System.out.println("--");
883// }
884//
885// }
886// catch(Exception ioe)
887// {
888// ioe.printStackTrace();
889// }
890// }
891//
892// /**
893// * Demo code for the high level interface.
894// */
895// protected static void testHighLevelExample()
896// {
897// try
898// {
899// // simple example, tokenizing string, no escape, but quoted
900// // works:
901// System.out.println("example 1");
902// Tokenizer tokenizer = new Tokenizer("text,,,\"another,text\"");
903// List tokens;
904// while(tokenizer.hasNextLine())
905// {
906// tokens = tokenizer.nextLine();
907// System.out.println(tokens.get(0)); // prints 'text'
908// System.out.println(tokens.get(1)); // prints ''
909// System.out.println(tokens.get(2)); // prints ''
910// System.out.println(tokens.get(3)); // prints 'another,text'
911// }
912//
913// System.out.println("example 2");
914// // simple example, tokenizing string, using escape char and
915// // quoted strings:
916// tokenizer = new Tokenizer("text,text with\\,comma,,\"another,text\"");
917// tokenizer.respectEscapedCharacters(true);
918// while(tokenizer.hasNextLine())
919// {
920// tokens = tokenizer.nextLine();
921// System.out.println(tokens.get(0)); // prints 'text'
922// System.out.println(tokens.get(1)); // prints 'text with, comma'
923// System.out.println(tokens.get(2)); // prints ''
924// System.out.println(tokens.get(3)); // prints 'another,text'
925// }
926// }
927// catch(Exception ioe)
928// {
929// ioe.printStackTrace();
930// }
931// }
932//
933// public static void main(String[] args)
934// {
935//// testLowLevel(args);
936//// testHighLevel(args);
937//// testGeonetUTF8(args);
938// testHighLevelExample();
939// }
940}
941
942
Note: See TracBrowser for help on using the repository browser.