1 | /*
|
---|
2 | * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
|
---|
3 | *
|
---|
4 | * Copyright (c) 2012-2013 Oracle and/or its affiliates. All rights reserved.
|
---|
5 | *
|
---|
6 | * The contents of this file are subject to the terms of either the GNU
|
---|
7 | * General Public License Version 2 only ("GPL") or the Common Development
|
---|
8 | * and Distribution License("CDDL") (collectively, the "License"). You
|
---|
9 | * may not use this file except in compliance with the License. You can
|
---|
10 | * obtain a copy of the License at
|
---|
11 | * https://glassfish.dev.java.net/public/CDDL+GPL_1_1.html
|
---|
12 | * or packager/legal/LICENSE.txt. See the License for the specific
|
---|
13 | * language governing permissions and limitations under the License.
|
---|
14 | *
|
---|
15 | * When distributing the software, include this License Header Notice in each
|
---|
16 | * file and include the License file at packager/legal/LICENSE.txt.
|
---|
17 | *
|
---|
18 | * GPL Classpath Exception:
|
---|
19 | * Oracle designates this particular file as subject to the "Classpath"
|
---|
20 | * exception as provided by Oracle in the GPL Version 2 section of the License
|
---|
21 | * file that accompanied this code.
|
---|
22 | *
|
---|
23 | * Modifications:
|
---|
24 | * If applicable, add the following below the License Header, with the fields
|
---|
25 | * enclosed by brackets [] replaced by your own identifying information:
|
---|
26 | * "Portions Copyright [year] [name of copyright owner]"
|
---|
27 | *
|
---|
28 | * Contributor(s):
|
---|
29 | * If you wish your version of this file to be governed by only the CDDL or
|
---|
30 | * only the GPL Version 2, indicate your decision by adding "[Contributor]
|
---|
31 | * elects to include this software in this distribution under the [CDDL or GPL
|
---|
32 | * Version 2] license." If you don't indicate a single choice of license, a
|
---|
33 | * recipient has the option to distribute your version of this file under
|
---|
34 | * either the CDDL, the GPL Version 2 or to extend the choice of license to
|
---|
35 | * its licensees as provided above. However, if you add GPL Version 2 code
|
---|
36 | * and therefore, elected the GPL Version 2 license, then the option applies
|
---|
37 | * only if the new code is made subject to such option by the copyright
|
---|
38 | * holder.
|
---|
39 | */
|
---|
40 |
|
---|
41 | package org.glassfish.json;
|
---|
42 |
|
---|
43 | import javax.json.JsonException;
|
---|
44 | import java.io.FilterInputStream;
|
---|
45 | import java.io.IOException;
|
---|
46 | import java.io.InputStream;
|
---|
47 | import java.nio.charset.Charset;
|
---|
48 |
|
---|
49 | /**
|
---|
50 | * A filter stream that detects the unicode encoding for the original
|
---|
51 | * stream
|
---|
52 | *
|
---|
53 | * @author Jitendra Kotamraju
|
---|
54 | */
|
---|
55 | class UnicodeDetectingInputStream extends FilterInputStream {
|
---|
56 | private static final Charset UTF_8 = Charset.forName("UTF-8");
|
---|
57 | private static final Charset UTF_16BE = Charset.forName("UTF-16BE");
|
---|
58 | private static final Charset UTF_16LE = Charset.forName("UTF-16LE");
|
---|
59 | private static final Charset UTF_32LE = Charset.forName("UTF-32LE");
|
---|
60 | private static final Charset UTF_32BE = Charset.forName("UTF-32BE");
|
---|
61 |
|
---|
62 | private static final byte FF = (byte)0xFF;
|
---|
63 | private static final byte FE = (byte)0xFE;
|
---|
64 | private static final byte EF = (byte)0xEF;
|
---|
65 | private static final byte BB = (byte)0xBB;
|
---|
66 | private static final byte BF = (byte)0xBF;
|
---|
67 | private static final byte NUL = (byte)0x00;
|
---|
68 |
|
---|
69 | private final byte[] buf = new byte[4];
|
---|
70 | private int bufLen;
|
---|
71 | private int curIndex;
|
---|
72 | private final Charset charset;
|
---|
73 |
|
---|
74 | UnicodeDetectingInputStream(InputStream is) {
|
---|
75 | super(is);
|
---|
76 | charset = detectEncoding();
|
---|
77 | }
|
---|
78 |
|
---|
79 | Charset getCharset() {
|
---|
80 | return charset;
|
---|
81 | }
|
---|
82 |
|
---|
83 | private void fillBuf() {
|
---|
84 | int b1;
|
---|
85 | int b2;
|
---|
86 | int b3;
|
---|
87 | int b4;
|
---|
88 |
|
---|
89 | try {
|
---|
90 | b1 = in.read();
|
---|
91 | if (b1 == -1) {
|
---|
92 | return;
|
---|
93 | }
|
---|
94 |
|
---|
95 | b2 = in.read();
|
---|
96 | if (b2 == -1) {
|
---|
97 | bufLen = 1;
|
---|
98 | buf[0] = (byte)b1;
|
---|
99 | return;
|
---|
100 | }
|
---|
101 |
|
---|
102 | b3 = in.read();
|
---|
103 | if (b3 == -1) {
|
---|
104 | bufLen = 2;
|
---|
105 | buf[0] = (byte)b1;
|
---|
106 | buf[1] = (byte)b2;
|
---|
107 | return;
|
---|
108 | }
|
---|
109 |
|
---|
110 | b4 = in.read();
|
---|
111 | if (b4 == -1) {
|
---|
112 | bufLen = 3;
|
---|
113 | buf[0] = (byte)b1;
|
---|
114 | buf[1] = (byte)b2;
|
---|
115 | buf[2] = (byte)b3;
|
---|
116 | return;
|
---|
117 | }
|
---|
118 | bufLen = 4;
|
---|
119 | buf[0] = (byte)b1;
|
---|
120 | buf[1] = (byte)b2;
|
---|
121 | buf[2] = (byte)b3;
|
---|
122 | buf[3] = (byte)b4;
|
---|
123 | } catch (IOException ioe) {
|
---|
124 | throw new JsonException("I/O error while auto-detecting the encoding of stream", ioe);
|
---|
125 | }
|
---|
126 | }
|
---|
127 |
|
---|
128 | private Charset detectEncoding() {
|
---|
129 | fillBuf();
|
---|
130 | if (bufLen < 2) {
|
---|
131 | throw new JsonException("Cannot auto-detect encoding, not enough chars");
|
---|
132 | } else if (bufLen == 4) {
|
---|
133 | // Use BOM to detect encoding
|
---|
134 | if (buf[0] == NUL && buf[1] == NUL && buf[2] == FE && buf[3] == FF) {
|
---|
135 | curIndex = 4;
|
---|
136 | return UTF_32BE;
|
---|
137 | } else if (buf[0] == FF && buf[1] == FE && buf[2] == NUL && buf[3] == NUL) {
|
---|
138 | curIndex = 4;
|
---|
139 | return UTF_32LE;
|
---|
140 | } else if (buf[0] == FE && buf[1] == FF) {
|
---|
141 | curIndex = 2;
|
---|
142 | return UTF_16BE;
|
---|
143 | } else if (buf[0] == FF && buf[1] == FE) {
|
---|
144 | curIndex = 2;
|
---|
145 | return UTF_16LE;
|
---|
146 | } else if (buf[0] == EF && buf[1] == BB && buf[2] == BF) {
|
---|
147 | curIndex = 3;
|
---|
148 | return UTF_8;
|
---|
149 | }
|
---|
150 | // No BOM, just use JSON RFC's encoding algo to auto-detect
|
---|
151 | if (buf[0] == NUL && buf[1] == NUL && buf[2] == NUL) {
|
---|
152 | return UTF_32BE;
|
---|
153 | } else if (buf[0] == NUL && buf[2] == NUL) {
|
---|
154 | return UTF_16BE;
|
---|
155 | } else if (buf[1] == NUL && buf[2] == NUL && buf[3] == NUL) {
|
---|
156 | return UTF_32LE;
|
---|
157 | } else if (buf[1] == NUL && buf[3] == NUL) {
|
---|
158 | return UTF_16LE;
|
---|
159 | }
|
---|
160 | }
|
---|
161 | return UTF_8;
|
---|
162 | }
|
---|
163 |
|
---|
164 | @Override
|
---|
165 | public int read() throws IOException {
|
---|
166 | if (curIndex < bufLen) {
|
---|
167 | return buf[curIndex++];
|
---|
168 | }
|
---|
169 | return in.read();
|
---|
170 | }
|
---|
171 |
|
---|
172 | @Override
|
---|
173 | public int read(byte b[], int off, int len) throws IOException {
|
---|
174 | if (curIndex < bufLen) {
|
---|
175 | if (len == 0) {
|
---|
176 | return 0;
|
---|
177 | }
|
---|
178 | if (off < 0 || len < 0 || len > b.length -off) {
|
---|
179 | throw new IndexOutOfBoundsException();
|
---|
180 | }
|
---|
181 | int min = Math.min(bufLen-curIndex, len);
|
---|
182 | System.arraycopy(buf, curIndex, b, off, min);
|
---|
183 | curIndex += min;
|
---|
184 | return min;
|
---|
185 | }
|
---|
186 | return in.read(b, off, len);
|
---|
187 | }
|
---|
188 |
|
---|
189 | }
|
---|