/* * XZInputStream * * Author: Lasse Collin * * This file has been put into the public domain. * You can do whatever you want with this file. */ package org.tukaani.xz; import java.io.InputStream; import java.io.DataInputStream; import java.io.IOException; import java.io.EOFException; import org.tukaani.xz.common.DecoderUtil; /** * Decompresses a .xz file in streamed mode (no seeking). *

* Use this to decompress regular standalone .xz files. This reads from * its input stream until the end of the input or until an error occurs. * This supports decompressing concatenated .xz files. * *

Typical use cases

*

* Getting an input stream to decompress a .xz file: *

 * InputStream infile = new FileInputStream("foo.xz");
 * XZInputStream inxz = new XZInputStream(infile);
 * 
*

* It's important to keep in mind that decompressor memory usage depends * on the settings used to compress the file. The worst-case memory usage * of XZInputStream is currently 1.5 GiB. Still, very few files will * require more than about 65 MiB because that's how much decompressing * a file created with the highest preset level will need, and only a few * people use settings other than the predefined presets. *

* It is possible to specify a memory usage limit for * XZInputStream. If decompression requires more memory than * the specified limit, MemoryLimitException will be thrown when reading * from the stream. For example, the following sets the memory usage limit * to 100 MiB: *

 * InputStream infile = new FileInputStream("foo.xz");
 * XZInputStream inxz = new XZInputStream(infile, 100 * 1024);
 * 
* *

When uncompressed size is known beforehand

*

* If you are decompressing complete files and your application knows * exactly how much uncompressed data there should be, it is good to try * reading one more byte by calling read() and checking * that it returns -1. This way the decompressor will parse the * file footers and verify the integrity checks, giving the caller more * confidence that the uncompressed data is valid. (This advice seems to * apply to * {@link java.util.zip.GZIPInputStream java.util.zip.GZIPInputStream} too.) * * @see SingleXZInputStream */ public class XZInputStream extends InputStream { private final ArrayCache arrayCache; private final int memoryLimit; private InputStream in; private SingleXZInputStream xzIn; private final boolean verifyCheck; private boolean endReached = false; private IOException exception = null; private final byte[] tempBuf = new byte[1]; /** * Creates a new XZ decompressor without a memory usage limit. *

* This constructor reads and parses the XZ Stream Header (12 bytes) * from in. The header of the first Block is not read * until read is called. * * @param in input stream from which XZ-compressed * data is read * * @throws XZFormatException * input is not in the XZ format * * @throws CorruptedInputException * XZ header CRC32 doesn't match * * @throws UnsupportedOptionsException * XZ header is valid but specifies options * not supported by this implementation * * @throws EOFException * less than 12 bytes of input was available * from in * * @throws IOException may be thrown by in */ public XZInputStream(InputStream in) throws IOException { this(in, -1); } /** * Creates a new XZ decompressor without a memory usage limit. *

* This is identical to XZInputStream(InputStream) * except that this takes also the arrayCache argument. * * @param in input stream from which XZ-compressed * data is read * * @param arrayCache cache to be used for allocating large arrays * * @throws XZFormatException * input is not in the XZ format * * @throws CorruptedInputException * XZ header CRC32 doesn't match * * @throws UnsupportedOptionsException * XZ header is valid but specifies options * not supported by this implementation * * @throws EOFException * less than 12 bytes of input was available * from in * * @throws IOException may be thrown by in * * @since 1.7 */ public XZInputStream(InputStream in, ArrayCache arrayCache) throws IOException { this(in, -1, arrayCache); } /** * Creates a new XZ decompressor with an optional memory usage limit. *

* This is identical to XZInputStream(InputStream) except * that this takes also the memoryLimit argument. * * @param in input stream from which XZ-compressed * data is read * * @param memoryLimit memory usage limit in kibibytes (KiB) * or -1 to impose no * memory usage limit * * @throws XZFormatException * input is not in the XZ format * * @throws CorruptedInputException * XZ header CRC32 doesn't match * * @throws UnsupportedOptionsException * XZ header is valid but specifies options * not supported by this implementation * * @throws EOFException * less than 12 bytes of input was available * from in * * @throws IOException may be thrown by in */ public XZInputStream(InputStream in, int memoryLimit) throws IOException { this(in, memoryLimit, true); } /** * Creates a new XZ decompressor with an optional memory usage limit. *

* This is identical to XZInputStream(InputStream) except * that this takes also the memoryLimit and * arrayCache arguments. * * @param in input stream from which XZ-compressed * data is read * * @param memoryLimit memory usage limit in kibibytes (KiB) * or -1 to impose no * memory usage limit * * @param arrayCache cache to be used for allocating large arrays * * @throws XZFormatException * input is not in the XZ format * * @throws CorruptedInputException * XZ header CRC32 doesn't match * * @throws UnsupportedOptionsException * XZ header is valid but specifies options * not supported by this implementation * * @throws EOFException * less than 12 bytes of input was available * from in * * @throws IOException may be thrown by in * * @since 1.7 */ public XZInputStream(InputStream in, int memoryLimit, ArrayCache arrayCache) throws IOException { this(in, memoryLimit, true, arrayCache); } /** * Creates a new XZ decompressor with an optional memory usage limit * and ability to disable verification of integrity checks. *

* This is identical to XZInputStream(InputStream,int) except * that this takes also the verifyCheck argument. *

* Note that integrity check verification should almost never be disabled. * Possible reasons to disable integrity check verification: *

*

* verifyCheck only affects the integrity check of * the actual compressed data. The CRC32 fields in the headers * are always verified. * * @param in input stream from which XZ-compressed * data is read * * @param memoryLimit memory usage limit in kibibytes (KiB) * or -1 to impose no * memory usage limit * * @param verifyCheck if true, the integrity checks * will be verified; this should almost never * be set to false * * @throws XZFormatException * input is not in the XZ format * * @throws CorruptedInputException * XZ header CRC32 doesn't match * * @throws UnsupportedOptionsException * XZ header is valid but specifies options * not supported by this implementation * * @throws EOFException * less than 12 bytes of input was available * from in * * @throws IOException may be thrown by in * * @since 1.6 */ public XZInputStream(InputStream in, int memoryLimit, boolean verifyCheck) throws IOException { this(in, memoryLimit, verifyCheck, ArrayCache.getDefaultCache()); } /** * Creates a new XZ decompressor with an optional memory usage limit * and ability to disable verification of integrity checks. *

* This is identical to XZInputStream(InputStream,int,boolean) * except that this takes also the arrayCache argument. * * @param in input stream from which XZ-compressed * data is read * * @param memoryLimit memory usage limit in kibibytes (KiB) * or -1 to impose no * memory usage limit * * @param verifyCheck if true, the integrity checks * will be verified; this should almost never * be set to false * * @param arrayCache cache to be used for allocating large arrays * * @throws XZFormatException * input is not in the XZ format * * @throws CorruptedInputException * XZ header CRC32 doesn't match * * @throws UnsupportedOptionsException * XZ header is valid but specifies options * not supported by this implementation * * @throws EOFException * less than 12 bytes of input was available * from in * * @throws IOException may be thrown by in * * @since 1.7 */ public XZInputStream(InputStream in, int memoryLimit, boolean verifyCheck, ArrayCache arrayCache) throws IOException { this.arrayCache = arrayCache; this.in = in; this.memoryLimit = memoryLimit; this.verifyCheck = verifyCheck; this.xzIn = new SingleXZInputStream(in, memoryLimit, verifyCheck, arrayCache); } /** * Decompresses the next byte from this input stream. *

* Reading lots of data with read() from this input stream * may be inefficient. Wrap it in {@link java.io.BufferedInputStream} * if you need to read lots of data one byte at a time. * * @return the next decompressed byte, or -1 * to indicate the end of the compressed stream * * @throws CorruptedInputException * @throws UnsupportedOptionsException * @throws MemoryLimitException * * @throws XZIOException if the stream has been closed * * @throws EOFException * compressed input is truncated or corrupt * * @throws IOException may be thrown by in */ public int read() throws IOException { return read(tempBuf, 0, 1) == -1 ? -1 : (tempBuf[0] & 0xFF); } /** * Decompresses into an array of bytes. *

* If len is zero, no bytes are read and 0 * is returned. Otherwise this will try to decompress len * bytes of uncompressed data. Less than len bytes may * be read only in the following situations: *

* * @param buf target buffer for uncompressed data * @param off start offset in buf * @param len maximum number of uncompressed bytes to read * * @return number of bytes read, or -1 to indicate * the end of the compressed stream * * @throws CorruptedInputException * @throws UnsupportedOptionsException * @throws MemoryLimitException * * @throws XZIOException if the stream has been closed * * @throws EOFException * compressed input is truncated or corrupt * * @throws IOException may be thrown by in */ public int read(byte[] buf, int off, int len) throws IOException { if (off < 0 || len < 0 || off + len < 0 || off + len > buf.length) throw new IndexOutOfBoundsException(); if (len == 0) return 0; if (in == null) throw new XZIOException("Stream closed"); if (exception != null) throw exception; if (endReached) return -1; int size = 0; try { while (len > 0) { if (xzIn == null) { prepareNextStream(); if (endReached) return size == 0 ? -1 : size; } int ret = xzIn.read(buf, off, len); if (ret > 0) { size += ret; off += ret; len -= ret; } else if (ret == -1) { xzIn = null; } } } catch (IOException e) { exception = e; if (size == 0) throw e; } return size; } private void prepareNextStream() throws IOException { DataInputStream inData = new DataInputStream(in); byte[] buf = new byte[DecoderUtil.STREAM_HEADER_SIZE]; // The size of Stream Padding must be a multiple of four bytes, // all bytes zero. do { // First try to read one byte to see if we have reached the end // of the file. int ret = inData.read(buf, 0, 1); if (ret == -1) { endReached = true; return; } // Since we got one byte of input, there must be at least // three more available in a valid file. inData.readFully(buf, 1, 3); } while (buf[0] == 0 && buf[1] == 0 && buf[2] == 0 && buf[3] == 0); // Not all bytes are zero. In a valid Stream it indicates the // beginning of the next Stream. Read the rest of the Stream Header // and initialize the XZ decoder. inData.readFully(buf, 4, DecoderUtil.STREAM_HEADER_SIZE - 4); try { xzIn = new SingleXZInputStream(in, memoryLimit, verifyCheck, buf, arrayCache); } catch (XZFormatException e) { // Since this isn't the first .xz Stream, it is more // logical to tell that the data is corrupt. throw new CorruptedInputException( "Garbage after a valid XZ Stream"); } } /** * Returns the number of uncompressed bytes that can be read * without blocking. The value is returned with an assumption * that the compressed input data will be valid. If the compressed * data is corrupt, CorruptedInputException may get * thrown before the number of bytes claimed to be available have * been read from this input stream. * * @return the number of uncompressed bytes that can be read * without blocking */ public int available() throws IOException { if (in == null) throw new XZIOException("Stream closed"); if (exception != null) throw exception; return xzIn == null ? 0 : xzIn.available(); } /** * Closes the stream and calls in.close(). * If the stream was already closed, this does nothing. *

* This is equivalent to close(true). * * @throws IOException if thrown by in.close() */ public void close() throws IOException { close(true); } /** * Closes the stream and optionally calls in.close(). * If the stream was already closed, this does nothing. * If close(false) has been called, a further * call of close(true) does nothing (it doesn't call * in.close()). *

* If you don't want to close the underlying InputStream, * there is usually no need to worry about closing this stream either; * it's fine to do nothing and let the garbage collector handle it. * However, if you are using {@link ArrayCache}, close(false) * can be useful to put the allocated arrays back to the cache without * closing the underlying InputStream. *

* Note that if you successfully reach the end of the stream * (read returns -1), the arrays are * automatically put back to the cache by that read call. In * this situation close(false) is redundant (but harmless). * * @throws IOException if thrown by in.close() * * @since 1.7 */ public void close(boolean closeInput) throws IOException { if (in != null) { if (xzIn != null) { xzIn.close(false); xzIn = null; } try { if (closeInput) in.close(); } finally { in = null; } } } }