source: josm/trunk/src/org/glassfish/json/UnicodeDetectingInputStream.java@ 15970

Last change on this file since 15970 was 13231, checked in by Don-vip, 7 years ago

see #15682 - upgrade to JSR 374 (JSON Processing) API 1.1.2

File size: 6.2 KB
Line 
1/*
2 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
3 *
4 * Copyright (c) 2012-2017 Oracle and/or its affiliates. All rights reserved.
5 *
6 * The contents of this file are subject to the terms of either the GNU
7 * General Public License Version 2 only ("GPL") or the Common Development
8 * and Distribution License("CDDL") (collectively, the "License"). You
9 * may not use this file except in compliance with the License. You can
10 * obtain a copy of the License at
11 * https://oss.oracle.com/licenses/CDDL+GPL-1.1
12 * or LICENSE.txt. See the License for the specific
13 * language governing permissions and limitations under the License.
14 *
15 * When distributing the software, include this License Header Notice in each
16 * file and include the License file at LICENSE.txt.
17 *
18 * GPL Classpath Exception:
19 * Oracle designates this particular file as subject to the "Classpath"
20 * exception as provided by Oracle in the GPL Version 2 section of the License
21 * file that accompanied this code.
22 *
23 * Modifications:
24 * If applicable, add the following below the License Header, with the fields
25 * enclosed by brackets [] replaced by your own identifying information:
26 * "Portions Copyright [year] [name of copyright owner]"
27 *
28 * Contributor(s):
29 * If you wish your version of this file to be governed by only the CDDL or
30 * only the GPL Version 2, indicate your decision by adding "[Contributor]
31 * elects to include this software in this distribution under the [CDDL or GPL
32 * Version 2] license." If you don't indicate a single choice of license, a
33 * recipient has the option to distribute your version of this file under
34 * either the CDDL, the GPL Version 2 or to extend the choice of license to
35 * its licensees as provided above. However, if you add GPL Version 2 code
36 * and therefore, elected the GPL Version 2 license, then the option applies
37 * only if the new code is made subject to such option by the copyright
38 * holder.
39 */
40
41package org.glassfish.json;
42
43import javax.json.JsonException;
44import java.io.FilterInputStream;
45import java.io.IOException;
46import java.io.InputStream;
47import java.nio.charset.Charset;
48import java.nio.charset.StandardCharsets;
49
50/**
51 * A filter stream that detects the unicode encoding for the original
52 * stream
53 *
54 * @author Jitendra Kotamraju
55 */
56class UnicodeDetectingInputStream extends FilterInputStream {
57
58 private static final Charset UTF_32LE = Charset.forName("UTF-32LE");
59 private static final Charset UTF_32BE = Charset.forName("UTF-32BE");
60
61 private static final byte FF = (byte)0xFF;
62 private static final byte FE = (byte)0xFE;
63 private static final byte EF = (byte)0xEF;
64 private static final byte BB = (byte)0xBB;
65 private static final byte BF = (byte)0xBF;
66 private static final byte NUL = (byte)0x00;
67
68 private final byte[] buf = new byte[4];
69 private int bufLen;
70 private int curIndex;
71 private final Charset charset;
72
73 UnicodeDetectingInputStream(InputStream is) {
74 super(is);
75 charset = detectEncoding();
76 }
77
78 Charset getCharset() {
79 return charset;
80 }
81
82 private void fillBuf() {
83 int b1;
84 int b2;
85 int b3;
86 int b4;
87
88 try {
89 b1 = in.read();
90 if (b1 == -1) {
91 return;
92 }
93
94 b2 = in.read();
95 if (b2 == -1) {
96 bufLen = 1;
97 buf[0] = (byte)b1;
98 return;
99 }
100
101 b3 = in.read();
102 if (b3 == -1) {
103 bufLen = 2;
104 buf[0] = (byte)b1;
105 buf[1] = (byte)b2;
106 return;
107 }
108
109 b4 = in.read();
110 if (b4 == -1) {
111 bufLen = 3;
112 buf[0] = (byte)b1;
113 buf[1] = (byte)b2;
114 buf[2] = (byte)b3;
115 return;
116 }
117 bufLen = 4;
118 buf[0] = (byte)b1;
119 buf[1] = (byte)b2;
120 buf[2] = (byte)b3;
121 buf[3] = (byte)b4;
122 } catch (IOException ioe) {
123 throw new JsonException(JsonMessages.PARSER_INPUT_ENC_DETECT_IOERR(), ioe);
124 }
125 }
126
127 private Charset detectEncoding() {
128 fillBuf();
129 if (bufLen < 2) {
130 throw new JsonException(JsonMessages.PARSER_INPUT_ENC_DETECT_FAILED());
131 } else if (bufLen == 4) {
132 // Use BOM to detect encoding
133 if (buf[0] == NUL && buf[1] == NUL && buf[2] == FE && buf[3] == FF) {
134 curIndex = 4;
135 return UTF_32BE;
136 } else if (buf[0] == FF && buf[1] == FE && buf[2] == NUL && buf[3] == NUL) {
137 curIndex = 4;
138 return UTF_32LE;
139 } else if (buf[0] == FE && buf[1] == FF) {
140 curIndex = 2;
141 return StandardCharsets.UTF_16BE;
142 } else if (buf[0] == FF && buf[1] == FE) {
143 curIndex = 2;
144 return StandardCharsets.UTF_16LE;
145 } else if (buf[0] == EF && buf[1] == BB && buf[2] == BF) {
146 curIndex = 3;
147 return StandardCharsets.UTF_8;
148 }
149 // No BOM, just use JSON RFC's encoding algo to auto-detect
150 if (buf[0] == NUL && buf[1] == NUL && buf[2] == NUL) {
151 return UTF_32BE;
152 } else if (buf[0] == NUL && buf[2] == NUL) {
153 return StandardCharsets.UTF_16BE;
154 } else if (buf[1] == NUL && buf[2] == NUL && buf[3] == NUL) {
155 return UTF_32LE;
156 } else if (buf[1] == NUL && buf[3] == NUL) {
157 return StandardCharsets.UTF_16LE;
158 }
159 }
160 return StandardCharsets.UTF_8;
161 }
162
163 @Override
164 public int read() throws IOException {
165 if (curIndex < bufLen) {
166 return buf[curIndex++];
167 }
168 return in.read();
169 }
170
171 @Override
172 public int read(byte b[], int off, int len) throws IOException {
173 if (curIndex < bufLen) {
174 if (len == 0) {
175 return 0;
176 }
177 if (off < 0 || len < 0 || len > b.length -off) {
178 throw new IndexOutOfBoundsException();
179 }
180 int min = Math.min(bufLen-curIndex, len);
181 System.arraycopy(buf, curIndex, b, off, min);
182 curIndex += min;
183 return min;
184 }
185 return in.read(b, off, len);
186 }
187
188}
Note: See TracBrowser for help on using the repository browser.