Context Navigation

source: josm/trunk/src/org/glassfish/json/JsonTokenizer.java@ 13254

Last change on this file since 13254 was 13231, checked in by Don-vip, 7 years ago
see #15682 - upgrade to JSR 374 (JSON Processing) API 1.1.2
File size: 18.7 KB

Line
1	/*
2	* DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
3	*
4	* Copyright (c) 2012-2017 Oracle and/or its affiliates. All rights reserved.
5	*
6	* The contents of this file are subject to the terms of either the GNU
7	* General Public License Version 2 only ("GPL") or the Common Development
8	* and Distribution License("CDDL") (collectively, the "License"). You
9	* may not use this file except in compliance with the License. You can
10	* obtain a copy of the License at
11	* https://oss.oracle.com/licenses/CDDL+GPL-1.1
12	* or LICENSE.txt. See the License for the specific
13	* language governing permissions and limitations under the License.
14	*
15	* When distributing the software, include this License Header Notice in each
16	* file and include the License file at LICENSE.txt.
17	*
18	* GPL Classpath Exception:
19	* Oracle designates this particular file as subject to the "Classpath"
20	* exception as provided by Oracle in the GPL Version 2 section of the License
21	* file that accompanied this code.
22	*
23	* Modifications:
24	* If applicable, add the following below the License Header, with the fields
25	* enclosed by brackets [] replaced by your own identifying information:
26	* "Portions Copyright [year] [name of copyright owner]"
27	*
28	* Contributor(s):
29	* If you wish your version of this file to be governed by only the CDDL or
30	* only the GPL Version 2, indicate your decision by adding "[Contributor]
31	* elects to include this software in this distribution under the [CDDL or GPL
32	* Version 2] license." If you don't indicate a single choice of license, a
33	* recipient has the option to distribute your version of this file under
34	* either the CDDL, the GPL Version 2 or to extend the choice of license to
35	* its licensees as provided above. However, if you add GPL Version 2 code
36	* and therefore, elected the GPL Version 2 license, then the option applies
37	* only if the new code is made subject to such option by the copyright
38	* holder.
39	*/
40
41	package org.glassfish.json;
42
43	import org.glassfish.json.api.BufferPool;
44
45	import javax.json.JsonException;
46	import javax.json.stream.JsonLocation;
47	import javax.json.stream.JsonParser;
48	import javax.json.stream.JsonParsingException;
49	import java.io.*;
50	import java.math.BigDecimal;
51	import java.util.Arrays;
52
53	import javax.json.stream.JsonParser.Event;
54
55	/**
56	* JSON Tokenizer
57	*
58	* @author Jitendra Kotamraju
59	*/
60	final class JsonTokenizer implements Closeable {
61	// Table to look up hex ch -> value (for e.g HEX['F'] = 15, HEX['5'] = 5)
62	private final static int[] HEX = new int[128];
63	static {
64	Arrays.fill(HEX, -1);
65	for (int i='0'; i <= '9'; i++) {
66	HEX[i] = i-'0';
67	}
68	for (int i='A'; i <= 'F'; i++) {
69	HEX[i] = 10+i-'A';
70	}
71	for (int i='a'; i <= 'f'; i++) {
72	HEX[i] = 10+i-'a';
73	}
74	}
75	private final static int HEX_LENGTH = HEX.length;
76
77	private final BufferPool bufferPool;
78
79	private final Reader reader;
80
81	// Internal buffer that is used for parsing. It is also used
82	// for storing current string and number value token
83	private char[] buf;
84
85	// Indexes in buffer
86	//
87	// XXXssssssssssssXXXXXXXXXXXXXXXXXXXXXXrrrrrrrrrrrrrrXXXXXX
88	// ^ ^ ^ ^
89	// \| \| \| \|
90	// storeBegin storeEnd readBegin readEnd
91	private int readBegin;
92	private int readEnd;
93	private int storeBegin;
94	private int storeEnd;
95
96	// line number of the current pointer of parsing char
97	private long lineNo = 1;
98
99	// XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
100	// ^
101	// \|
102	// bufferOffset
103	//
104	// offset of the last \r\n or \n. will be used to calculate column number
105	// of a token or an error. This may be outside of the buffer.
106	private long lastLineOffset = 0;
107	// offset in the stream for the start of the buffer, will be used in
108	// calculating JsonLocation's stream offset, column no.
109	private long bufferOffset = 0;
110
111	private boolean minus;
112	private boolean fracOrExp;
113	private BigDecimal bd;
114
115	enum JsonToken {
116	CURLYOPEN(Event.START_OBJECT, false),
117	SQUAREOPEN(Event.START_ARRAY, false),
118	COLON(null, false),
119	COMMA(null, false),
120	STRING(Event.VALUE_STRING, true),
121	NUMBER(Event.VALUE_NUMBER, true),
122	TRUE(Event.VALUE_TRUE, true),
123	FALSE(Event.VALUE_FALSE, true),
124	NULL(Event.VALUE_NULL, true),
125	CURLYCLOSE(Event.END_OBJECT, false),
126	SQUARECLOSE(Event.END_ARRAY, false),
127	EOF(null, false);
128
129	private final JsonParser.Event event;
130	private final boolean value;
131
132	JsonToken(JsonParser.Event event, boolean value) {
133	this.event = event;
134	this.value = value;
135	}
136
137	JsonParser.Event getEvent() {
138	return event;
139	}
140
141	boolean isValue() {
142	return value;
143	}
144	}
145
146	JsonTokenizer(Reader reader, BufferPool bufferPool) {
147	this.reader = reader;
148	this.bufferPool = bufferPool;
149	buf = bufferPool.take();
150	}
151
152	private void readString() {
153	// when inPlace is true, no need to copy chars
154	boolean inPlace = true;
155	storeBegin = storeEnd = readBegin;
156
157	do {
158	// Write unescaped char block within the current buffer
159	if (inPlace) {
160	int ch;
161	while(readBegin < readEnd && ((ch=buf[readBegin]) >= 0x20) && ch != '\\') {
162	if (ch == '"') {
163	storeEnd = readBegin++; // ++ to consume quote char
164	return; // Got the entire string
165	}
166	readBegin++; // consume unescaped char
167	}
168	storeEnd = readBegin;
169	}
170
171	// string may be crossing buffer boundaries and may contain
172	// escaped characters.
173	int ch = read();
174	if (ch >= 0x20 && ch != 0x22 && ch != 0x5c) {
175	if (!inPlace) {
176	buf[storeEnd] = (char)ch;
177	}
178	storeEnd++;
179	continue;
180	}
181	switch (ch) {
182	case '\\':
183	inPlace = false; // Now onwards need to copy chars
184	unescape();
185	break;
186	case '"':
187	return;
188	default:
189	throw unexpectedChar(ch);
190	}
191	} while (true);
192	}
193
194	private void unescape() {
195	int ch = read();
196	switch (ch) {
197	case 'b':
198	buf[storeEnd++] = '\b';
199	break;
200	case 't':
201	buf[storeEnd++] = '\t';
202	break;
203	case 'n':
204	buf[storeEnd++] = '\n';
205	break;
206	case 'f':
207	buf[storeEnd++] = '\f';
208	break;
209	case 'r':
210	buf[storeEnd++] = '\r';
211	break;
212	case '"':
213	case '\\':
214	case '/':
215	buf[storeEnd++] = (char)ch;
216	break;
217	case 'u': {
218	int unicode = 0;
219	for (int i = 0; i < 4; i++) {
220	int ch3 = read();
221	int digit = (ch3 >= 0 && ch3 < HEX_LENGTH) ? HEX[ch3] : -1;
222	if (digit < 0) {
223	throw unexpectedChar(ch3);
224	}
225	unicode = (unicode << 4)\|digit;
226	}
227	buf[storeEnd++] = (char)unicode;
228	break;
229	}
230	default:
231	throw unexpectedChar(ch);
232	}
233	}
234
235	// Reads a number char. If the char is within the buffer, directly
236	// reads from the buffer. Otherwise, uses read() which takes care
237	// of resizing, filling up the buf, adjusting the pointers
238	private int readNumberChar() {
239	if (readBegin < readEnd) {
240	return buf[readBegin++];
241	} else {
242	storeEnd = readBegin;
243	return read();
244	}
245	}
246
247	private void readNumber(int ch) {
248	storeBegin = storeEnd = readBegin-1;
249	// sign
250	if (ch == '-') {
251	this.minus = true;
252	ch = readNumberChar();
253	if (ch < '0' \|\| ch >'9') {
254	throw unexpectedChar(ch);
255	}
256	}
257
258	// int
259	if (ch == '0') {
260	ch = readNumberChar();
261	} else {
262	do {
263	ch = readNumberChar();
264	} while (ch >= '0' && ch <= '9');
265	}
266
267	// frac
268	if (ch == '.') {
269	this.fracOrExp = true;
270	int count = 0;
271	do {
272	ch = readNumberChar();
273	count++;
274	} while (ch >= '0' && ch <= '9');
275	if (count == 1) {
276	throw unexpectedChar(ch);
277	}
278	}
279
280	// exp
281	if (ch == 'e' \|\| ch == 'E') {
282	this.fracOrExp = true;
283	ch = readNumberChar();
284	if (ch == '+' \|\| ch == '-') {
285	ch = readNumberChar();
286	}
287	int count;
288	for (count = 0; ch >= '0' && ch <= '9'; count++) {
289	ch = readNumberChar();
290	}
291	if (count == 0) {
292	throw unexpectedChar(ch);
293	}
294	}
295	if (ch != -1) {
296	// Only reset readBegin if eof has not been reached
297	readBegin--;
298	storeEnd = readBegin;
299	}
300	}
301
302	private void readTrue() {
303	int ch1 = read();
304	if (ch1 != 'r') {
305	throw expectedChar(ch1, 'r');
306	}
307	int ch2 = read();
308	if (ch2 != 'u') {
309	throw expectedChar(ch2, 'u');
310	}
311	int ch3 = read();
312	if (ch3 != 'e') {
313	throw expectedChar(ch3, 'e');
314	}
315	}
316
317	private void readFalse() {
318	int ch1 = read();
319	if (ch1 != 'a') {
320	throw expectedChar(ch1, 'a');
321	}
322	int ch2 = read();
323	if (ch2 != 'l') {
324	throw expectedChar(ch2, 'l');
325	}
326	int ch3 = read();
327	if (ch3 != 's') {
328	throw expectedChar(ch3, 's');
329	}
330	int ch4 = read();
331	if (ch4 != 'e') {
332	throw expectedChar(ch4, 'e');
333	}
334	}
335
336	private void readNull() {
337	int ch1 = read();
338	if (ch1 != 'u') {
339	throw expectedChar(ch1, 'u');
340	}
341	int ch2 = read();
342	if (ch2 != 'l') {
343	throw expectedChar(ch2, 'l');
344	}
345	int ch3 = read();
346	if (ch3 != 'l') {
347	throw expectedChar(ch3, 'l');
348	}
349	}
350
351	/*
352	* Could be optimized if the parser uses separate methods to match colon
353	* etc (that would avoid the switch statement cost in certain cases)
354	*/
355	JsonToken nextToken() {
356	reset();
357	int ch = read();
358
359	// whitespace
360	while (ch == 0x20 \|\| ch == 0x09 \|\| ch == 0x0a \|\| ch == 0x0d) {
361	if (ch == '\r') {
362	++lineNo;
363	ch = read();
364	if (ch == '\n') {
365	lastLineOffset = bufferOffset+readBegin;
366	} else {
367	lastLineOffset = bufferOffset+readBegin-1;
368	continue;
369	}
370	} else if (ch == '\n') {
371	++lineNo;
372	lastLineOffset = bufferOffset+readBegin;
373	}
374	ch = read();
375	}
376
377	switch (ch) {
378	case '"':
379	readString();
380	return JsonToken.STRING;
381	case '{':
382	return JsonToken.CURLYOPEN;
383	case '[':
384	return JsonToken.SQUAREOPEN;
385	case ':':
386	return JsonToken.COLON;
387	case ',':
388	return JsonToken.COMMA;
389	case 't':
390	readTrue();
391	return JsonToken.TRUE;
392	case 'f':
393	readFalse();
394	return JsonToken.FALSE;
395	case 'n':
396	readNull();
397	return JsonToken.NULL;
398	case ']':
399	return JsonToken.SQUARECLOSE;
400	case '}':
401	return JsonToken.CURLYCLOSE;
402	case '0':
403	case '1':
404	case '2':
405	case '3':
406	case '4':
407	case '5':
408	case '6':
409	case '7':
410	case '8':
411	case '9':
412	case '-':
413	readNumber(ch);
414	return JsonToken.NUMBER;
415	case -1:
416	return JsonToken.EOF;
417	default:
418	throw unexpectedChar(ch);
419	}
420	}
421
422	boolean hasNextToken() {
423	reset();
424	int ch = peek();
425
426	// whitespace
427	while (ch == 0x20 \|\| ch == 0x09 \|\| ch == 0x0a \|\| ch == 0x0d) {
428	if (ch == '\r') {
429	++lineNo;
430	++readBegin;
431	ch = peek();
432	if (ch == '\n') {
433	lastLineOffset = bufferOffset+readBegin+1;
434	} else {
435	lastLineOffset = bufferOffset+readBegin;
436	continue;
437	}
438	} else if (ch == '\n') {
439	++lineNo;
440	lastLineOffset = bufferOffset+readBegin+1;
441	}
442	++readBegin;
443	ch = peek();
444	}
445	return ch != -1;
446	}
447
448	private int peek() {
449	try {
450	if (readBegin == readEnd) { // need to fill the buffer
451	int len = fillBuf();
452	if (len == -1) {
453	return -1;
454	}
455	assert len != 0;
456	readBegin = storeEnd;
457	readEnd = readBegin+len;
458	}
459	return buf[readBegin];
460	} catch (IOException ioe) {
461	throw new JsonException(JsonMessages.TOKENIZER_IO_ERR(), ioe);
462	}
463	}
464
465	// Gives the location of the last char. Used for
466	// JsonParsingException.getLocation
467	JsonLocation getLastCharLocation() {
468	// Already read the char, so subtracting -1
469	return new JsonLocationImpl(lineNo, bufferOffset +readBegin-lastLineOffset, bufferOffset +readBegin-1);
470	}
471
472	// Gives the parser location. Used for JsonParser.getLocation
473	JsonLocation getLocation() {
474	return new JsonLocationImpl(lineNo, bufferOffset +readBegin-lastLineOffset+1, bufferOffset +readBegin);
475	}
476
477	private int read() {
478	try {
479	if (readBegin == readEnd) { // need to fill the buffer
480	int len = fillBuf();
481	if (len == -1) {
482	return -1;
483	}
484	assert len != 0;
485	readBegin = storeEnd;
486	readEnd = readBegin+len;
487	}
488	return buf[readBegin++];
489	} catch (IOException ioe) {
490	throw new JsonException(JsonMessages.TOKENIZER_IO_ERR(), ioe);
491	}
492	}
493
494	private int fillBuf() throws IOException {
495	if (storeEnd != 0) {
496	int storeLen = storeEnd-storeBegin;
497	if (storeLen > 0) {
498	// there is some store data
499	if (storeLen == buf.length) {
500	// buffer is full, double the capacity
501	char[] doubleBuf = Arrays.copyOf(buf, 2 * buf.length);
502	bufferPool.recycle(buf);
503	buf = doubleBuf;
504	} else {
505	// Left shift all the stored data to make space
506	System.arraycopy(buf, storeBegin, buf, 0, storeLen);
507	storeEnd = storeLen;
508	storeBegin = 0;
509	bufferOffset += readBegin-storeEnd;
510	}
511	} else {
512	storeBegin = storeEnd = 0;
513	bufferOffset += readBegin;
514	}
515	} else {
516	bufferOffset += readBegin;
517	}
518	// Fill the rest of the buf
519	return reader.read(buf, storeEnd, buf.length-storeEnd);
520	}
521
522	// state associated with the current token is no more valid
523	private void reset() {
524	if (storeEnd != 0) {
525	storeBegin = 0;
526	storeEnd = 0;
527	bd = null;
528	minus = false;
529	fracOrExp = false;
530	}
531	}
532
533	String getValue() {
534	return new String(buf, storeBegin, storeEnd-storeBegin);
535	}
536
537	BigDecimal getBigDecimal() {
538	if (bd == null) {
539	bd = new BigDecimal(buf, storeBegin, storeEnd-storeBegin);
540	}
541	return bd;
542	}
543
544	int getInt() {
545	// no need to create BigDecimal for common integer values (1-9 digits)
546	int storeLen = storeEnd-storeBegin;
547	if (!fracOrExp && (storeLen <= 9 \|\| (minus && storeLen <= 10))) {
548	int num = 0;
549	int i = minus ? 1 : 0;
550	for(; i < storeLen; i++) {
551	num = num * 10 + (buf[storeBegin+i] - '0');
552	}
553	return minus ? -num : num;
554	} else {
555	return getBigDecimal().intValue();
556	}
557	}
558
559	long getLong() {
560	// no need to create BigDecimal for common integer values (1-18 digits)
561	int storeLen = storeEnd-storeBegin;
562	if (!fracOrExp && (storeLen <= 18 \|\| (minus && storeLen <= 19))) {
563	long num = 0;
564	int i = minus ? 1 : 0;
565	for(; i < storeLen; i++) {
566	num = num * 10 + (buf[storeBegin+i] - '0');
567	}
568	return minus ? -num : num;
569	} else {
570	return getBigDecimal().longValue();
571	}
572	}
573
574	// returns true for common integer values (1-9 digits).
575	// So there are cases it will return false even though the number is int
576	boolean isDefinitelyInt() {
577	int storeLen = storeEnd-storeBegin;
578	return !fracOrExp && (storeLen <= 9 \|\| (minus && storeLen <= 10));
579	}
580
581	// returns true for common long values (1-18 digits).
582	// So there are cases it will return false even though the number is long
583	boolean isDefinitelyLong() {
584	int storeLen = storeEnd-storeBegin;
585	return !fracOrExp && (storeLen <= 18 \|\| (minus && storeLen <= 19));
586	}
587
588	boolean isIntegral() {
589	return !fracOrExp \|\| getBigDecimal().scale() == 0;
590	}
591
592	@Override
593	public void close() throws IOException {
594	reader.close();
595	bufferPool.recycle(buf);
596	}
597
598	private JsonParsingException unexpectedChar(int ch) {
599	JsonLocation location = getLastCharLocation();
600	return new JsonParsingException(
601	JsonMessages.TOKENIZER_UNEXPECTED_CHAR(ch, location), location);
602	}
603
604	private JsonParsingException expectedChar(int unexpected, char expected) {
605	JsonLocation location = getLastCharLocation();
606	return new JsonParsingException(
607	JsonMessages.TOKENIZER_EXPECTED_CHAR(unexpected, location, expected), location);
608	}
609
610	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: