1 | /*
|
---|
2 | * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
|
---|
3 | *
|
---|
4 | * Copyright (c) 2012-2017 Oracle and/or its affiliates. All rights reserved.
|
---|
5 | *
|
---|
6 | * The contents of this file are subject to the terms of either the GNU
|
---|
7 | * General Public License Version 2 only ("GPL") or the Common Development
|
---|
8 | * and Distribution License("CDDL") (collectively, the "License"). You
|
---|
9 | * may not use this file except in compliance with the License. You can
|
---|
10 | * obtain a copy of the License at
|
---|
11 | * https://oss.oracle.com/licenses/CDDL+GPL-1.1
|
---|
12 | * or LICENSE.txt. See the License for the specific
|
---|
13 | * language governing permissions and limitations under the License.
|
---|
14 | *
|
---|
15 | * When distributing the software, include this License Header Notice in each
|
---|
16 | * file and include the License file at LICENSE.txt.
|
---|
17 | *
|
---|
18 | * GPL Classpath Exception:
|
---|
19 | * Oracle designates this particular file as subject to the "Classpath"
|
---|
20 | * exception as provided by Oracle in the GPL Version 2 section of the License
|
---|
21 | * file that accompanied this code.
|
---|
22 | *
|
---|
23 | * Modifications:
|
---|
24 | * If applicable, add the following below the License Header, with the fields
|
---|
25 | * enclosed by brackets [] replaced by your own identifying information:
|
---|
26 | * "Portions Copyright [year] [name of copyright owner]"
|
---|
27 | *
|
---|
28 | * Contributor(s):
|
---|
29 | * If you wish your version of this file to be governed by only the CDDL or
|
---|
30 | * only the GPL Version 2, indicate your decision by adding "[Contributor]
|
---|
31 | * elects to include this software in this distribution under the [CDDL or GPL
|
---|
32 | * Version 2] license." If you don't indicate a single choice of license, a
|
---|
33 | * recipient has the option to distribute your version of this file under
|
---|
34 | * either the CDDL, the GPL Version 2 or to extend the choice of license to
|
---|
35 | * its licensees as provided above. However, if you add GPL Version 2 code
|
---|
36 | * and therefore, elected the GPL Version 2 license, then the option applies
|
---|
37 | * only if the new code is made subject to such option by the copyright
|
---|
38 | * holder.
|
---|
39 | */
|
---|
40 |
|
---|
41 | package org.glassfish.json;
|
---|
42 |
|
---|
43 | import javax.json.JsonException;
|
---|
44 | import java.io.FilterInputStream;
|
---|
45 | import java.io.IOException;
|
---|
46 | import java.io.InputStream;
|
---|
47 | import java.nio.charset.Charset;
|
---|
48 | import java.nio.charset.StandardCharsets;
|
---|
49 |
|
---|
50 | /**
|
---|
51 | * A filter stream that detects the unicode encoding for the original
|
---|
52 | * stream
|
---|
53 | *
|
---|
54 | * @author Jitendra Kotamraju
|
---|
55 | */
|
---|
56 | class UnicodeDetectingInputStream extends FilterInputStream {
|
---|
57 |
|
---|
58 | private static final Charset UTF_32LE = Charset.forName("UTF-32LE");
|
---|
59 | private static final Charset UTF_32BE = Charset.forName("UTF-32BE");
|
---|
60 |
|
---|
61 | private static final byte FF = (byte)0xFF;
|
---|
62 | private static final byte FE = (byte)0xFE;
|
---|
63 | private static final byte EF = (byte)0xEF;
|
---|
64 | private static final byte BB = (byte)0xBB;
|
---|
65 | private static final byte BF = (byte)0xBF;
|
---|
66 | private static final byte NUL = (byte)0x00;
|
---|
67 |
|
---|
68 | private final byte[] buf = new byte[4];
|
---|
69 | private int bufLen;
|
---|
70 | private int curIndex;
|
---|
71 | private final Charset charset;
|
---|
72 |
|
---|
73 | UnicodeDetectingInputStream(InputStream is) {
|
---|
74 | super(is);
|
---|
75 | charset = detectEncoding();
|
---|
76 | }
|
---|
77 |
|
---|
78 | Charset getCharset() {
|
---|
79 | return charset;
|
---|
80 | }
|
---|
81 |
|
---|
82 | private void fillBuf() {
|
---|
83 | int b1;
|
---|
84 | int b2;
|
---|
85 | int b3;
|
---|
86 | int b4;
|
---|
87 |
|
---|
88 | try {
|
---|
89 | b1 = in.read();
|
---|
90 | if (b1 == -1) {
|
---|
91 | return;
|
---|
92 | }
|
---|
93 |
|
---|
94 | b2 = in.read();
|
---|
95 | if (b2 == -1) {
|
---|
96 | bufLen = 1;
|
---|
97 | buf[0] = (byte)b1;
|
---|
98 | return;
|
---|
99 | }
|
---|
100 |
|
---|
101 | b3 = in.read();
|
---|
102 | if (b3 == -1) {
|
---|
103 | bufLen = 2;
|
---|
104 | buf[0] = (byte)b1;
|
---|
105 | buf[1] = (byte)b2;
|
---|
106 | return;
|
---|
107 | }
|
---|
108 |
|
---|
109 | b4 = in.read();
|
---|
110 | if (b4 == -1) {
|
---|
111 | bufLen = 3;
|
---|
112 | buf[0] = (byte)b1;
|
---|
113 | buf[1] = (byte)b2;
|
---|
114 | buf[2] = (byte)b3;
|
---|
115 | return;
|
---|
116 | }
|
---|
117 | bufLen = 4;
|
---|
118 | buf[0] = (byte)b1;
|
---|
119 | buf[1] = (byte)b2;
|
---|
120 | buf[2] = (byte)b3;
|
---|
121 | buf[3] = (byte)b4;
|
---|
122 | } catch (IOException ioe) {
|
---|
123 | throw new JsonException(JsonMessages.PARSER_INPUT_ENC_DETECT_IOERR(), ioe);
|
---|
124 | }
|
---|
125 | }
|
---|
126 |
|
---|
127 | private Charset detectEncoding() {
|
---|
128 | fillBuf();
|
---|
129 | if (bufLen < 2) {
|
---|
130 | throw new JsonException(JsonMessages.PARSER_INPUT_ENC_DETECT_FAILED());
|
---|
131 | } else if (bufLen == 4) {
|
---|
132 | // Use BOM to detect encoding
|
---|
133 | if (buf[0] == NUL && buf[1] == NUL && buf[2] == FE && buf[3] == FF) {
|
---|
134 | curIndex = 4;
|
---|
135 | return UTF_32BE;
|
---|
136 | } else if (buf[0] == FF && buf[1] == FE && buf[2] == NUL && buf[3] == NUL) {
|
---|
137 | curIndex = 4;
|
---|
138 | return UTF_32LE;
|
---|
139 | } else if (buf[0] == FE && buf[1] == FF) {
|
---|
140 | curIndex = 2;
|
---|
141 | return StandardCharsets.UTF_16BE;
|
---|
142 | } else if (buf[0] == FF && buf[1] == FE) {
|
---|
143 | curIndex = 2;
|
---|
144 | return StandardCharsets.UTF_16LE;
|
---|
145 | } else if (buf[0] == EF && buf[1] == BB && buf[2] == BF) {
|
---|
146 | curIndex = 3;
|
---|
147 | return StandardCharsets.UTF_8;
|
---|
148 | }
|
---|
149 | // No BOM, just use JSON RFC's encoding algo to auto-detect
|
---|
150 | if (buf[0] == NUL && buf[1] == NUL && buf[2] == NUL) {
|
---|
151 | return UTF_32BE;
|
---|
152 | } else if (buf[0] == NUL && buf[2] == NUL) {
|
---|
153 | return StandardCharsets.UTF_16BE;
|
---|
154 | } else if (buf[1] == NUL && buf[2] == NUL && buf[3] == NUL) {
|
---|
155 | return UTF_32LE;
|
---|
156 | } else if (buf[1] == NUL && buf[3] == NUL) {
|
---|
157 | return StandardCharsets.UTF_16LE;
|
---|
158 | }
|
---|
159 | }
|
---|
160 | return StandardCharsets.UTF_8;
|
---|
161 | }
|
---|
162 |
|
---|
163 | @Override
|
---|
164 | public int read() throws IOException {
|
---|
165 | if (curIndex < bufLen) {
|
---|
166 | return buf[curIndex++];
|
---|
167 | }
|
---|
168 | return in.read();
|
---|
169 | }
|
---|
170 |
|
---|
171 | @Override
|
---|
172 | public int read(byte b[], int off, int len) throws IOException {
|
---|
173 | if (curIndex < bufLen) {
|
---|
174 | if (len == 0) {
|
---|
175 | return 0;
|
---|
176 | }
|
---|
177 | if (off < 0 || len < 0 || len > b.length -off) {
|
---|
178 | throw new IndexOutOfBoundsException();
|
---|
179 | }
|
---|
180 | int min = Math.min(bufLen-curIndex, len);
|
---|
181 | System.arraycopy(buf, curIndex, b, off, min);
|
---|
182 | curIndex += min;
|
---|
183 | return min;
|
---|
184 | }
|
---|
185 | return in.read(b, off, len);
|
---|
186 | }
|
---|
187 |
|
---|
188 | }
|
---|