source: josm/trunk/src/org/glassfish/json/JsonTokenizer.java@ 13254

Last change on this file since 13254 was 13231, checked in by Don-vip, 7 years ago

see #15682 - upgrade to JSR 374 (JSON Processing) API 1.1.2

File size: 18.7 KB
Line 
1/*
2 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
3 *
4 * Copyright (c) 2012-2017 Oracle and/or its affiliates. All rights reserved.
5 *
6 * The contents of this file are subject to the terms of either the GNU
7 * General Public License Version 2 only ("GPL") or the Common Development
8 * and Distribution License("CDDL") (collectively, the "License"). You
9 * may not use this file except in compliance with the License. You can
10 * obtain a copy of the License at
11 * https://oss.oracle.com/licenses/CDDL+GPL-1.1
12 * or LICENSE.txt. See the License for the specific
13 * language governing permissions and limitations under the License.
14 *
15 * When distributing the software, include this License Header Notice in each
16 * file and include the License file at LICENSE.txt.
17 *
18 * GPL Classpath Exception:
19 * Oracle designates this particular file as subject to the "Classpath"
20 * exception as provided by Oracle in the GPL Version 2 section of the License
21 * file that accompanied this code.
22 *
23 * Modifications:
24 * If applicable, add the following below the License Header, with the fields
25 * enclosed by brackets [] replaced by your own identifying information:
26 * "Portions Copyright [year] [name of copyright owner]"
27 *
28 * Contributor(s):
29 * If you wish your version of this file to be governed by only the CDDL or
30 * only the GPL Version 2, indicate your decision by adding "[Contributor]
31 * elects to include this software in this distribution under the [CDDL or GPL
32 * Version 2] license." If you don't indicate a single choice of license, a
33 * recipient has the option to distribute your version of this file under
34 * either the CDDL, the GPL Version 2 or to extend the choice of license to
35 * its licensees as provided above. However, if you add GPL Version 2 code
36 * and therefore, elected the GPL Version 2 license, then the option applies
37 * only if the new code is made subject to such option by the copyright
38 * holder.
39 */
40
41package org.glassfish.json;
42
43import org.glassfish.json.api.BufferPool;
44
45import javax.json.JsonException;
46import javax.json.stream.JsonLocation;
47import javax.json.stream.JsonParser;
48import javax.json.stream.JsonParsingException;
49import java.io.*;
50import java.math.BigDecimal;
51import java.util.Arrays;
52
53import javax.json.stream.JsonParser.Event;
54
55/**
56 * JSON Tokenizer
57 *
58 * @author Jitendra Kotamraju
59 */
60final class JsonTokenizer implements Closeable {
61 // Table to look up hex ch -> value (for e.g HEX['F'] = 15, HEX['5'] = 5)
62 private final static int[] HEX = new int[128];
63 static {
64 Arrays.fill(HEX, -1);
65 for (int i='0'; i <= '9'; i++) {
66 HEX[i] = i-'0';
67 }
68 for (int i='A'; i <= 'F'; i++) {
69 HEX[i] = 10+i-'A';
70 }
71 for (int i='a'; i <= 'f'; i++) {
72 HEX[i] = 10+i-'a';
73 }
74 }
75 private final static int HEX_LENGTH = HEX.length;
76
77 private final BufferPool bufferPool;
78
79 private final Reader reader;
80
81 // Internal buffer that is used for parsing. It is also used
82 // for storing current string and number value token
83 private char[] buf;
84
85 // Indexes in buffer
86 //
87 // XXXssssssssssssXXXXXXXXXXXXXXXXXXXXXXrrrrrrrrrrrrrrXXXXXX
88 // ^ ^ ^ ^
89 // | | | |
90 // storeBegin storeEnd readBegin readEnd
91 private int readBegin;
92 private int readEnd;
93 private int storeBegin;
94 private int storeEnd;
95
96 // line number of the current pointer of parsing char
97 private long lineNo = 1;
98
99 // XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
100 // ^
101 // |
102 // bufferOffset
103 //
104 // offset of the last \r\n or \n. will be used to calculate column number
105 // of a token or an error. This may be outside of the buffer.
106 private long lastLineOffset = 0;
107 // offset in the stream for the start of the buffer, will be used in
108 // calculating JsonLocation's stream offset, column no.
109 private long bufferOffset = 0;
110
111 private boolean minus;
112 private boolean fracOrExp;
113 private BigDecimal bd;
114
115 enum JsonToken {
116 CURLYOPEN(Event.START_OBJECT, false),
117 SQUAREOPEN(Event.START_ARRAY, false),
118 COLON(null, false),
119 COMMA(null, false),
120 STRING(Event.VALUE_STRING, true),
121 NUMBER(Event.VALUE_NUMBER, true),
122 TRUE(Event.VALUE_TRUE, true),
123 FALSE(Event.VALUE_FALSE, true),
124 NULL(Event.VALUE_NULL, true),
125 CURLYCLOSE(Event.END_OBJECT, false),
126 SQUARECLOSE(Event.END_ARRAY, false),
127 EOF(null, false);
128
129 private final JsonParser.Event event;
130 private final boolean value;
131
132 JsonToken(JsonParser.Event event, boolean value) {
133 this.event = event;
134 this.value = value;
135 }
136
137 JsonParser.Event getEvent() {
138 return event;
139 }
140
141 boolean isValue() {
142 return value;
143 }
144 }
145
146 JsonTokenizer(Reader reader, BufferPool bufferPool) {
147 this.reader = reader;
148 this.bufferPool = bufferPool;
149 buf = bufferPool.take();
150 }
151
152 private void readString() {
153 // when inPlace is true, no need to copy chars
154 boolean inPlace = true;
155 storeBegin = storeEnd = readBegin;
156
157 do {
158 // Write unescaped char block within the current buffer
159 if (inPlace) {
160 int ch;
161 while(readBegin < readEnd && ((ch=buf[readBegin]) >= 0x20) && ch != '\\') {
162 if (ch == '"') {
163 storeEnd = readBegin++; // ++ to consume quote char
164 return; // Got the entire string
165 }
166 readBegin++; // consume unescaped char
167 }
168 storeEnd = readBegin;
169 }
170
171 // string may be crossing buffer boundaries and may contain
172 // escaped characters.
173 int ch = read();
174 if (ch >= 0x20 && ch != 0x22 && ch != 0x5c) {
175 if (!inPlace) {
176 buf[storeEnd] = (char)ch;
177 }
178 storeEnd++;
179 continue;
180 }
181 switch (ch) {
182 case '\\':
183 inPlace = false; // Now onwards need to copy chars
184 unescape();
185 break;
186 case '"':
187 return;
188 default:
189 throw unexpectedChar(ch);
190 }
191 } while (true);
192 }
193
194 private void unescape() {
195 int ch = read();
196 switch (ch) {
197 case 'b':
198 buf[storeEnd++] = '\b';
199 break;
200 case 't':
201 buf[storeEnd++] = '\t';
202 break;
203 case 'n':
204 buf[storeEnd++] = '\n';
205 break;
206 case 'f':
207 buf[storeEnd++] = '\f';
208 break;
209 case 'r':
210 buf[storeEnd++] = '\r';
211 break;
212 case '"':
213 case '\\':
214 case '/':
215 buf[storeEnd++] = (char)ch;
216 break;
217 case 'u': {
218 int unicode = 0;
219 for (int i = 0; i < 4; i++) {
220 int ch3 = read();
221 int digit = (ch3 >= 0 && ch3 < HEX_LENGTH) ? HEX[ch3] : -1;
222 if (digit < 0) {
223 throw unexpectedChar(ch3);
224 }
225 unicode = (unicode << 4)|digit;
226 }
227 buf[storeEnd++] = (char)unicode;
228 break;
229 }
230 default:
231 throw unexpectedChar(ch);
232 }
233 }
234
235 // Reads a number char. If the char is within the buffer, directly
236 // reads from the buffer. Otherwise, uses read() which takes care
237 // of resizing, filling up the buf, adjusting the pointers
238 private int readNumberChar() {
239 if (readBegin < readEnd) {
240 return buf[readBegin++];
241 } else {
242 storeEnd = readBegin;
243 return read();
244 }
245 }
246
247 private void readNumber(int ch) {
248 storeBegin = storeEnd = readBegin-1;
249 // sign
250 if (ch == '-') {
251 this.minus = true;
252 ch = readNumberChar();
253 if (ch < '0' || ch >'9') {
254 throw unexpectedChar(ch);
255 }
256 }
257
258 // int
259 if (ch == '0') {
260 ch = readNumberChar();
261 } else {
262 do {
263 ch = readNumberChar();
264 } while (ch >= '0' && ch <= '9');
265 }
266
267 // frac
268 if (ch == '.') {
269 this.fracOrExp = true;
270 int count = 0;
271 do {
272 ch = readNumberChar();
273 count++;
274 } while (ch >= '0' && ch <= '9');
275 if (count == 1) {
276 throw unexpectedChar(ch);
277 }
278 }
279
280 // exp
281 if (ch == 'e' || ch == 'E') {
282 this.fracOrExp = true;
283 ch = readNumberChar();
284 if (ch == '+' || ch == '-') {
285 ch = readNumberChar();
286 }
287 int count;
288 for (count = 0; ch >= '0' && ch <= '9'; count++) {
289 ch = readNumberChar();
290 }
291 if (count == 0) {
292 throw unexpectedChar(ch);
293 }
294 }
295 if (ch != -1) {
296 // Only reset readBegin if eof has not been reached
297 readBegin--;
298 storeEnd = readBegin;
299 }
300 }
301
302 private void readTrue() {
303 int ch1 = read();
304 if (ch1 != 'r') {
305 throw expectedChar(ch1, 'r');
306 }
307 int ch2 = read();
308 if (ch2 != 'u') {
309 throw expectedChar(ch2, 'u');
310 }
311 int ch3 = read();
312 if (ch3 != 'e') {
313 throw expectedChar(ch3, 'e');
314 }
315 }
316
317 private void readFalse() {
318 int ch1 = read();
319 if (ch1 != 'a') {
320 throw expectedChar(ch1, 'a');
321 }
322 int ch2 = read();
323 if (ch2 != 'l') {
324 throw expectedChar(ch2, 'l');
325 }
326 int ch3 = read();
327 if (ch3 != 's') {
328 throw expectedChar(ch3, 's');
329 }
330 int ch4 = read();
331 if (ch4 != 'e') {
332 throw expectedChar(ch4, 'e');
333 }
334 }
335
336 private void readNull() {
337 int ch1 = read();
338 if (ch1 != 'u') {
339 throw expectedChar(ch1, 'u');
340 }
341 int ch2 = read();
342 if (ch2 != 'l') {
343 throw expectedChar(ch2, 'l');
344 }
345 int ch3 = read();
346 if (ch3 != 'l') {
347 throw expectedChar(ch3, 'l');
348 }
349 }
350
351 /*
352 * Could be optimized if the parser uses separate methods to match colon
353 * etc (that would avoid the switch statement cost in certain cases)
354 */
355 JsonToken nextToken() {
356 reset();
357 int ch = read();
358
359 // whitespace
360 while (ch == 0x20 || ch == 0x09 || ch == 0x0a || ch == 0x0d) {
361 if (ch == '\r') {
362 ++lineNo;
363 ch = read();
364 if (ch == '\n') {
365 lastLineOffset = bufferOffset+readBegin;
366 } else {
367 lastLineOffset = bufferOffset+readBegin-1;
368 continue;
369 }
370 } else if (ch == '\n') {
371 ++lineNo;
372 lastLineOffset = bufferOffset+readBegin;
373 }
374 ch = read();
375 }
376
377 switch (ch) {
378 case '"':
379 readString();
380 return JsonToken.STRING;
381 case '{':
382 return JsonToken.CURLYOPEN;
383 case '[':
384 return JsonToken.SQUAREOPEN;
385 case ':':
386 return JsonToken.COLON;
387 case ',':
388 return JsonToken.COMMA;
389 case 't':
390 readTrue();
391 return JsonToken.TRUE;
392 case 'f':
393 readFalse();
394 return JsonToken.FALSE;
395 case 'n':
396 readNull();
397 return JsonToken.NULL;
398 case ']':
399 return JsonToken.SQUARECLOSE;
400 case '}':
401 return JsonToken.CURLYCLOSE;
402 case '0':
403 case '1':
404 case '2':
405 case '3':
406 case '4':
407 case '5':
408 case '6':
409 case '7':
410 case '8':
411 case '9':
412 case '-':
413 readNumber(ch);
414 return JsonToken.NUMBER;
415 case -1:
416 return JsonToken.EOF;
417 default:
418 throw unexpectedChar(ch);
419 }
420 }
421
422 boolean hasNextToken() {
423 reset();
424 int ch = peek();
425
426 // whitespace
427 while (ch == 0x20 || ch == 0x09 || ch == 0x0a || ch == 0x0d) {
428 if (ch == '\r') {
429 ++lineNo;
430 ++readBegin;
431 ch = peek();
432 if (ch == '\n') {
433 lastLineOffset = bufferOffset+readBegin+1;
434 } else {
435 lastLineOffset = bufferOffset+readBegin;
436 continue;
437 }
438 } else if (ch == '\n') {
439 ++lineNo;
440 lastLineOffset = bufferOffset+readBegin+1;
441 }
442 ++readBegin;
443 ch = peek();
444 }
445 return ch != -1;
446 }
447
448 private int peek() {
449 try {
450 if (readBegin == readEnd) { // need to fill the buffer
451 int len = fillBuf();
452 if (len == -1) {
453 return -1;
454 }
455 assert len != 0;
456 readBegin = storeEnd;
457 readEnd = readBegin+len;
458 }
459 return buf[readBegin];
460 } catch (IOException ioe) {
461 throw new JsonException(JsonMessages.TOKENIZER_IO_ERR(), ioe);
462 }
463 }
464
465 // Gives the location of the last char. Used for
466 // JsonParsingException.getLocation
467 JsonLocation getLastCharLocation() {
468 // Already read the char, so subtracting -1
469 return new JsonLocationImpl(lineNo, bufferOffset +readBegin-lastLineOffset, bufferOffset +readBegin-1);
470 }
471
472 // Gives the parser location. Used for JsonParser.getLocation
473 JsonLocation getLocation() {
474 return new JsonLocationImpl(lineNo, bufferOffset +readBegin-lastLineOffset+1, bufferOffset +readBegin);
475 }
476
477 private int read() {
478 try {
479 if (readBegin == readEnd) { // need to fill the buffer
480 int len = fillBuf();
481 if (len == -1) {
482 return -1;
483 }
484 assert len != 0;
485 readBegin = storeEnd;
486 readEnd = readBegin+len;
487 }
488 return buf[readBegin++];
489 } catch (IOException ioe) {
490 throw new JsonException(JsonMessages.TOKENIZER_IO_ERR(), ioe);
491 }
492 }
493
494 private int fillBuf() throws IOException {
495 if (storeEnd != 0) {
496 int storeLen = storeEnd-storeBegin;
497 if (storeLen > 0) {
498 // there is some store data
499 if (storeLen == buf.length) {
500 // buffer is full, double the capacity
501 char[] doubleBuf = Arrays.copyOf(buf, 2 * buf.length);
502 bufferPool.recycle(buf);
503 buf = doubleBuf;
504 } else {
505 // Left shift all the stored data to make space
506 System.arraycopy(buf, storeBegin, buf, 0, storeLen);
507 storeEnd = storeLen;
508 storeBegin = 0;
509 bufferOffset += readBegin-storeEnd;
510 }
511 } else {
512 storeBegin = storeEnd = 0;
513 bufferOffset += readBegin;
514 }
515 } else {
516 bufferOffset += readBegin;
517 }
518 // Fill the rest of the buf
519 return reader.read(buf, storeEnd, buf.length-storeEnd);
520 }
521
522 // state associated with the current token is no more valid
523 private void reset() {
524 if (storeEnd != 0) {
525 storeBegin = 0;
526 storeEnd = 0;
527 bd = null;
528 minus = false;
529 fracOrExp = false;
530 }
531 }
532
533 String getValue() {
534 return new String(buf, storeBegin, storeEnd-storeBegin);
535 }
536
537 BigDecimal getBigDecimal() {
538 if (bd == null) {
539 bd = new BigDecimal(buf, storeBegin, storeEnd-storeBegin);
540 }
541 return bd;
542 }
543
544 int getInt() {
545 // no need to create BigDecimal for common integer values (1-9 digits)
546 int storeLen = storeEnd-storeBegin;
547 if (!fracOrExp && (storeLen <= 9 || (minus && storeLen <= 10))) {
548 int num = 0;
549 int i = minus ? 1 : 0;
550 for(; i < storeLen; i++) {
551 num = num * 10 + (buf[storeBegin+i] - '0');
552 }
553 return minus ? -num : num;
554 } else {
555 return getBigDecimal().intValue();
556 }
557 }
558
559 long getLong() {
560 // no need to create BigDecimal for common integer values (1-18 digits)
561 int storeLen = storeEnd-storeBegin;
562 if (!fracOrExp && (storeLen <= 18 || (minus && storeLen <= 19))) {
563 long num = 0;
564 int i = minus ? 1 : 0;
565 for(; i < storeLen; i++) {
566 num = num * 10 + (buf[storeBegin+i] - '0');
567 }
568 return minus ? -num : num;
569 } else {
570 return getBigDecimal().longValue();
571 }
572 }
573
574 // returns true for common integer values (1-9 digits).
575 // So there are cases it will return false even though the number is int
576 boolean isDefinitelyInt() {
577 int storeLen = storeEnd-storeBegin;
578 return !fracOrExp && (storeLen <= 9 || (minus && storeLen <= 10));
579 }
580
581 // returns true for common long values (1-18 digits).
582 // So there are cases it will return false even though the number is long
583 boolean isDefinitelyLong() {
584 int storeLen = storeEnd-storeBegin;
585 return !fracOrExp && (storeLen <= 18 || (minus && storeLen <= 19));
586 }
587
588 boolean isIntegral() {
589 return !fracOrExp || getBigDecimal().scale() == 0;
590 }
591
592 @Override
593 public void close() throws IOException {
594 reader.close();
595 bufferPool.recycle(buf);
596 }
597
598 private JsonParsingException unexpectedChar(int ch) {
599 JsonLocation location = getLastCharLocation();
600 return new JsonParsingException(
601 JsonMessages.TOKENIZER_UNEXPECTED_CHAR(ch, location), location);
602 }
603
604 private JsonParsingException expectedChar(int unexpected, char expected) {
605 JsonLocation location = getLastCharLocation();
606 return new JsonParsingException(
607 JsonMessages.TOKENIZER_EXPECTED_CHAR(unexpected, location, expected), location);
608 }
609
610}
Note: See TracBrowser for help on using the repository browser.