[6756] | 1 | /*
|
---|
| 2 | * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS HEADER.
|
---|
| 3 | *
|
---|
| 4 | * Copyright (c) 2012-2013 Oracle and/or its affiliates. All rights reserved.
|
---|
| 5 | *
|
---|
| 6 | * The contents of this file are subject to the terms of either the GNU
|
---|
| 7 | * General Public License Version 2 only ("GPL") or the Common Development
|
---|
| 8 | * and Distribution License("CDDL") (collectively, the "License"). You
|
---|
| 9 | * may not use this file except in compliance with the License. You can
|
---|
| 10 | * obtain a copy of the License at
|
---|
| 11 | * https://glassfish.dev.java.net/public/CDDL+GPL_1_1.html
|
---|
| 12 | * or packager/legal/LICENSE.txt. See the License for the specific
|
---|
| 13 | * language governing permissions and limitations under the License.
|
---|
| 14 | *
|
---|
| 15 | * When distributing the software, include this License Header Notice in each
|
---|
| 16 | * file and include the License file at packager/legal/LICENSE.txt.
|
---|
| 17 | *
|
---|
| 18 | * GPL Classpath Exception:
|
---|
| 19 | * Oracle designates this particular file as subject to the "Classpath"
|
---|
| 20 | * exception as provided by Oracle in the GPL Version 2 section of the License
|
---|
| 21 | * file that accompanied this code.
|
---|
| 22 | *
|
---|
| 23 | * Modifications:
|
---|
| 24 | * If applicable, add the following below the License Header, with the fields
|
---|
| 25 | * enclosed by brackets [] replaced by your own identifying information:
|
---|
| 26 | * "Portions Copyright [year] [name of copyright owner]"
|
---|
| 27 | *
|
---|
| 28 | * Contributor(s):
|
---|
| 29 | * If you wish your version of this file to be governed by only the CDDL or
|
---|
| 30 | * only the GPL Version 2, indicate your decision by adding "[Contributor]
|
---|
| 31 | * elects to include this software in this distribution under the [CDDL or GPL
|
---|
| 32 | * Version 2] license." If you don't indicate a single choice of license, a
|
---|
| 33 | * recipient has the option to distribute your version of this file under
|
---|
| 34 | * either the CDDL, the GPL Version 2 or to extend the choice of license to
|
---|
| 35 | * its licensees as provided above. However, if you add GPL Version 2 code
|
---|
| 36 | * and therefore, elected the GPL Version 2 license, then the option applies
|
---|
| 37 | * only if the new code is made subject to such option by the copyright
|
---|
| 38 | * holder.
|
---|
| 39 | */
|
---|
| 40 |
|
---|
| 41 | package org.glassfish.json;
|
---|
| 42 |
|
---|
| 43 | import javax.json.JsonException;
|
---|
| 44 | import java.io.FilterInputStream;
|
---|
| 45 | import java.io.IOException;
|
---|
| 46 | import java.io.InputStream;
|
---|
| 47 | import java.nio.charset.Charset;
|
---|
| 48 |
|
---|
| 49 | /**
|
---|
| 50 | * A filter stream that detects the unicode encoding for the original
|
---|
| 51 | * stream
|
---|
| 52 | *
|
---|
| 53 | * @author Jitendra Kotamraju
|
---|
| 54 | */
|
---|
| 55 | class UnicodeDetectingInputStream extends FilterInputStream {
|
---|
| 56 | private static final Charset UTF_8 = Charset.forName("UTF-8");
|
---|
| 57 | private static final Charset UTF_16BE = Charset.forName("UTF-16BE");
|
---|
| 58 | private static final Charset UTF_16LE = Charset.forName("UTF-16LE");
|
---|
| 59 | private static final Charset UTF_32LE = Charset.forName("UTF-32LE");
|
---|
| 60 | private static final Charset UTF_32BE = Charset.forName("UTF-32BE");
|
---|
| 61 |
|
---|
| 62 | private static final byte FF = (byte)0xFF;
|
---|
| 63 | private static final byte FE = (byte)0xFE;
|
---|
| 64 | private static final byte EF = (byte)0xEF;
|
---|
| 65 | private static final byte BB = (byte)0xBB;
|
---|
| 66 | private static final byte BF = (byte)0xBF;
|
---|
| 67 | private static final byte NUL = (byte)0x00;
|
---|
| 68 |
|
---|
| 69 | private final byte[] buf = new byte[4];
|
---|
| 70 | private int bufLen;
|
---|
| 71 | private int curIndex;
|
---|
| 72 | private final Charset charset;
|
---|
| 73 |
|
---|
| 74 | UnicodeDetectingInputStream(InputStream is) {
|
---|
| 75 | super(is);
|
---|
| 76 | charset = detectEncoding();
|
---|
| 77 | }
|
---|
| 78 |
|
---|
| 79 | Charset getCharset() {
|
---|
| 80 | return charset;
|
---|
| 81 | }
|
---|
| 82 |
|
---|
| 83 | private void fillBuf() {
|
---|
| 84 | int b1;
|
---|
| 85 | int b2;
|
---|
| 86 | int b3;
|
---|
| 87 | int b4;
|
---|
| 88 |
|
---|
| 89 | try {
|
---|
| 90 | b1 = in.read();
|
---|
| 91 | if (b1 == -1) {
|
---|
| 92 | return;
|
---|
| 93 | }
|
---|
| 94 |
|
---|
| 95 | b2 = in.read();
|
---|
| 96 | if (b2 == -1) {
|
---|
| 97 | bufLen = 1;
|
---|
| 98 | buf[0] = (byte)b1;
|
---|
| 99 | return;
|
---|
| 100 | }
|
---|
| 101 |
|
---|
| 102 | b3 = in.read();
|
---|
| 103 | if (b3 == -1) {
|
---|
| 104 | bufLen = 2;
|
---|
| 105 | buf[0] = (byte)b1;
|
---|
| 106 | buf[1] = (byte)b2;
|
---|
| 107 | return;
|
---|
| 108 | }
|
---|
| 109 |
|
---|
| 110 | b4 = in.read();
|
---|
| 111 | if (b4 == -1) {
|
---|
| 112 | bufLen = 3;
|
---|
| 113 | buf[0] = (byte)b1;
|
---|
| 114 | buf[1] = (byte)b2;
|
---|
| 115 | buf[2] = (byte)b3;
|
---|
| 116 | return;
|
---|
| 117 | }
|
---|
| 118 | bufLen = 4;
|
---|
| 119 | buf[0] = (byte)b1;
|
---|
| 120 | buf[1] = (byte)b2;
|
---|
| 121 | buf[2] = (byte)b3;
|
---|
| 122 | buf[3] = (byte)b4;
|
---|
| 123 | } catch (IOException ioe) {
|
---|
| 124 | throw new JsonException("I/O error while auto-detecting the encoding of stream", ioe);
|
---|
| 125 | }
|
---|
| 126 | }
|
---|
| 127 |
|
---|
| 128 | private Charset detectEncoding() {
|
---|
| 129 | fillBuf();
|
---|
| 130 | if (bufLen < 2) {
|
---|
| 131 | throw new JsonException("Cannot auto-detect encoding, not enough chars");
|
---|
| 132 | } else if (bufLen == 4) {
|
---|
| 133 | // Use BOM to detect encoding
|
---|
| 134 | if (buf[0] == NUL && buf[1] == NUL && buf[2] == FE && buf[3] == FF) {
|
---|
| 135 | curIndex = 4;
|
---|
| 136 | return UTF_32BE;
|
---|
| 137 | } else if (buf[0] == FF && buf[1] == FE && buf[2] == NUL && buf[3] == NUL) {
|
---|
| 138 | curIndex = 4;
|
---|
| 139 | return UTF_32LE;
|
---|
| 140 | } else if (buf[0] == FE && buf[1] == FF) {
|
---|
| 141 | curIndex = 2;
|
---|
| 142 | return UTF_16BE;
|
---|
| 143 | } else if (buf[0] == FF && buf[1] == FE) {
|
---|
| 144 | curIndex = 2;
|
---|
| 145 | return UTF_16LE;
|
---|
| 146 | } else if (buf[0] == EF && buf[1] == BB && buf[2] == BF) {
|
---|
| 147 | curIndex = 3;
|
---|
| 148 | return UTF_8;
|
---|
| 149 | }
|
---|
| 150 | // No BOM, just use JSON RFC's encoding algo to auto-detect
|
---|
| 151 | if (buf[0] == NUL && buf[1] == NUL && buf[2] == NUL) {
|
---|
| 152 | return UTF_32BE;
|
---|
| 153 | } else if (buf[0] == NUL && buf[2] == NUL) {
|
---|
| 154 | return UTF_16BE;
|
---|
| 155 | } else if (buf[1] == NUL && buf[2] == NUL && buf[3] == NUL) {
|
---|
| 156 | return UTF_32LE;
|
---|
| 157 | } else if (buf[1] == NUL && buf[3] == NUL) {
|
---|
| 158 | return UTF_16LE;
|
---|
| 159 | }
|
---|
| 160 | }
|
---|
| 161 | return UTF_8;
|
---|
| 162 | }
|
---|
| 163 |
|
---|
| 164 | @Override
|
---|
| 165 | public int read() throws IOException {
|
---|
| 166 | if (curIndex < bufLen) {
|
---|
| 167 | return buf[curIndex++];
|
---|
| 168 | }
|
---|
| 169 | return in.read();
|
---|
| 170 | }
|
---|
| 171 |
|
---|
| 172 | @Override
|
---|
| 173 | public int read(byte b[], int off, int len) throws IOException {
|
---|
| 174 | if (curIndex < bufLen) {
|
---|
| 175 | if (len == 0) {
|
---|
| 176 | return 0;
|
---|
| 177 | }
|
---|
| 178 | if (off < 0 || len < 0 || len > b.length -off) {
|
---|
| 179 | throw new IndexOutOfBoundsException();
|
---|
| 180 | }
|
---|
| 181 | int min = Math.min(bufLen-curIndex, len);
|
---|
| 182 | System.arraycopy(buf, curIndex, b, off, min);
|
---|
| 183 | curIndex += min;
|
---|
| 184 | return min;
|
---|
| 185 | }
|
---|
| 186 | return in.read(b, off, len);
|
---|
| 187 | }
|
---|
| 188 |
|
---|
| 189 | }
|
---|