Context Navigation

← Previous Revision
Latest Revision
Next Revision →
Normal
Revision Log

source: josm/trunk/src/org/tukaani/xz/XZInputStream.java@ 15948

Last change on this file since 15948 was 13350, checked in by stoecker, 7 years ago
see #15816 - add XZ support
File size: 19.7 KB

Rev	Line
[13350]	1	/*
	2	* XZInputStream
	3	*
	4	* Author: Lasse Collin <lasse.collin@tukaani.org>
	5	*
	6	* This file has been put into the public domain.
	7	* You can do whatever you want with this file.
	8	*/
	9
	10	package org.tukaani.xz;
	11
	12	import java.io.InputStream;
	13	import java.io.DataInputStream;
	14	import java.io.IOException;
	15	import java.io.EOFException;
	16	import org.tukaani.xz.common.DecoderUtil;
	17
	18	/**
	19	* Decompresses a .xz file in streamed mode (no seeking).
	20	* <p>
	21	* Use this to decompress regular standalone .xz files. This reads from
	22	* its input stream until the end of the input or until an error occurs.
	23	* This supports decompressing concatenated .xz files.
	24	*
	25	* <h4>Typical use cases</h4>
	26	* <p>
	27	* Getting an input stream to decompress a .xz file:
	28	* <p><blockquote><pre>
	29	* InputStream infile = new FileInputStream("foo.xz");
	30	* XZInputStream inxz = new XZInputStream(infile);
	31	* </pre></blockquote>
	32	* <p>
	33	* It's important to keep in mind that decompressor memory usage depends
	34	* on the settings used to compress the file. The worst-case memory usage
	35	* of XZInputStream is currently 1.5 GiB. Still, very few files will
	36	* require more than about 65 MiB because that's how much decompressing
	37	* a file created with the highest preset level will need, and only a few
	38	* people use settings other than the predefined presets.
	39	* <p>
	40	* It is possible to specify a memory usage limit for
	41	* <code>XZInputStream</code>. If decompression requires more memory than
	42	* the specified limit, MemoryLimitException will be thrown when reading
	43	* from the stream. For example, the following sets the memory usage limit
	44	* to 100 MiB:
	45	* <p><blockquote><pre>
	46	* InputStream infile = new FileInputStream("foo.xz");
	47	* XZInputStream inxz = new XZInputStream(infile, 100 * 1024);
	48	* </pre></blockquote>
	49	*
	50	* <h4>When uncompressed size is known beforehand</h4>
	51	* <p>
	52	* If you are decompressing complete files and your application knows
	53	* exactly how much uncompressed data there should be, it is good to try
	54	* reading one more byte by calling <code>read()</code> and checking
	55	* that it returns <code>-1</code>. This way the decompressor will parse the
	56	* file footers and verify the integrity checks, giving the caller more
	57	* confidence that the uncompressed data is valid. (This advice seems to
	58	* apply to
	59	* {@link java.util.zip.GZIPInputStream java.util.zip.GZIPInputStream} too.)
	60	*
	61	* @see SingleXZInputStream
	62	*/
	63	public class XZInputStream extends InputStream {
	64	private final ArrayCache arrayCache;
	65
	66	private final int memoryLimit;
	67	private InputStream in;
	68	private SingleXZInputStream xzIn;
	69	private final boolean verifyCheck;
	70	private boolean endReached = false;
	71	private IOException exception = null;
	72
	73	private final byte[] tempBuf = new byte[1];
	74
	75	/**
	76	* Creates a new XZ decompressor without a memory usage limit.
	77	* <p>
	78	* This constructor reads and parses the XZ Stream Header (12 bytes)
	79	* from <code>in</code>. The header of the first Block is not read
	80	* until <code>read</code> is called.
	81	*
	82	* @param in input stream from which XZ-compressed
	83	* data is read
	84	*
	85	* @throws XZFormatException
	86	* input is not in the XZ format
	87	*
	88	* @throws CorruptedInputException
	89	* XZ header CRC32 doesn't match
	90	*
	91	* @throws UnsupportedOptionsException
	92	* XZ header is valid but specifies options
	93	* not supported by this implementation
	94	*
	95	* @throws EOFException
	96	* less than 12 bytes of input was available
	97	* from <code>in</code>
	98	*
	99	* @throws IOException may be thrown by <code>in</code>
	100	*/
	101	public XZInputStream(InputStream in) throws IOException {
	102	this(in, -1);
	103	}
	104
	105	/**
	106	* Creates a new XZ decompressor without a memory usage limit.
	107	* <p>
	108	* This is identical to <code>XZInputStream(InputStream)</code>
	109	* except that this takes also the <code>arrayCache</code> argument.
	110	*
	111	* @param in input stream from which XZ-compressed
	112	* data is read
	113	*
	114	* @param arrayCache cache to be used for allocating large arrays
	115	*
	116	* @throws XZFormatException
	117	* input is not in the XZ format
	118	*
	119	* @throws CorruptedInputException
	120	* XZ header CRC32 doesn't match
	121	*
	122	* @throws UnsupportedOptionsException
	123	* XZ header is valid but specifies options
	124	* not supported by this implementation
	125	*
	126	* @throws EOFException
	127	* less than 12 bytes of input was available
	128	* from <code>in</code>
	129	*
	130	* @throws IOException may be thrown by <code>in</code>
	131	*
	132	* @since 1.7
	133	*/
	134	public XZInputStream(InputStream in, ArrayCache arrayCache)
	135	throws IOException {
	136	this(in, -1, arrayCache);
	137	}
	138
	139	/**
	140	* Creates a new XZ decompressor with an optional memory usage limit.
	141	* <p>
	142	* This is identical to <code>XZInputStream(InputStream)</code> except
	143	* that this takes also the <code>memoryLimit</code> argument.
	144	*
	145	* @param in input stream from which XZ-compressed
	146	* data is read
	147	*
	148	* @param memoryLimit memory usage limit in kibibytes (KiB)
	149	* or <code>-1</code> to impose no
	150	* memory usage limit
	151	*
	152	* @throws XZFormatException
	153	* input is not in the XZ format
	154	*
	155	* @throws CorruptedInputException
	156	* XZ header CRC32 doesn't match
	157	*
	158	* @throws UnsupportedOptionsException
	159	* XZ header is valid but specifies options
	160	* not supported by this implementation
	161	*
	162	* @throws EOFException
	163	* less than 12 bytes of input was available
	164	* from <code>in</code>
	165	*
	166	* @throws IOException may be thrown by <code>in</code>
	167	*/
	168	public XZInputStream(InputStream in, int memoryLimit) throws IOException {
	169	this(in, memoryLimit, true);
	170	}
	171
	172	/**
	173	* Creates a new XZ decompressor with an optional memory usage limit.
	174	* <p>
	175	* This is identical to <code>XZInputStream(InputStream)</code> except
	176	* that this takes also the <code>memoryLimit</code> and
	177	* <code>arrayCache</code> arguments.
	178	*
	179	* @param in input stream from which XZ-compressed
	180	* data is read
	181	*
	182	* @param memoryLimit memory usage limit in kibibytes (KiB)
	183	* or <code>-1</code> to impose no
	184	* memory usage limit
	185	*
	186	* @param arrayCache cache to be used for allocating large arrays
	187	*
	188	* @throws XZFormatException
	189	* input is not in the XZ format
	190	*
	191	* @throws CorruptedInputException
	192	* XZ header CRC32 doesn't match
	193	*
	194	* @throws UnsupportedOptionsException
	195	* XZ header is valid but specifies options
	196	* not supported by this implementation
	197	*
	198	* @throws EOFException
	199	* less than 12 bytes of input was available
	200	* from <code>in</code>
	201	*
	202	* @throws IOException may be thrown by <code>in</code>
	203	*
	204	* @since 1.7
	205	*/
	206	public XZInputStream(InputStream in, int memoryLimit,
	207	ArrayCache arrayCache) throws IOException {
	208	this(in, memoryLimit, true, arrayCache);
	209	}
	210
	211	/**
	212	* Creates a new XZ decompressor with an optional memory usage limit
	213	* and ability to disable verification of integrity checks.
	214	* <p>
	215	* This is identical to <code>XZInputStream(InputStream,int)</code> except
	216	* that this takes also the <code>verifyCheck</code> argument.
	217	* <p>
	218	* Note that integrity check verification should almost never be disabled.
	219	* Possible reasons to disable integrity check verification:
	220	* <ul>
	221	* <li>Trying to recover data from a corrupt .xz file.</li>
	222	* <li>Speeding up decompression. This matters mostly with SHA-256
	223	* or with files that have compressed extremely well. It's recommended
	224	* that integrity checking isn't disabled for performance reasons
	225	* unless the file integrity is verified externally in some other
	226	* way.</li>
	227	* </ul>
	228	* <p>
	229	* <code>verifyCheck</code> only affects the integrity check of
	230	* the actual compressed data. The CRC32 fields in the headers
	231	* are always verified.
	232	*
	233	* @param in input stream from which XZ-compressed
	234	* data is read
	235	*
	236	* @param memoryLimit memory usage limit in kibibytes (KiB)
	237	* or <code>-1</code> to impose no
	238	* memory usage limit
	239	*
	240	* @param verifyCheck if <code>true</code>, the integrity checks
	241	* will be verified; this should almost never
	242	* be set to <code>false</code>
	243	*
	244	* @throws XZFormatException
	245	* input is not in the XZ format
	246	*
	247	* @throws CorruptedInputException
	248	* XZ header CRC32 doesn't match
	249	*
	250	* @throws UnsupportedOptionsException
	251	* XZ header is valid but specifies options
	252	* not supported by this implementation
	253	*
	254	* @throws EOFException
	255	* less than 12 bytes of input was available
	256	* from <code>in</code>
	257	*
	258	* @throws IOException may be thrown by <code>in</code>
	259	*
	260	* @since 1.6
	261	*/
	262	public XZInputStream(InputStream in, int memoryLimit, boolean verifyCheck)
	263	throws IOException {
	264	this(in, memoryLimit, verifyCheck, ArrayCache.getDefaultCache());
	265	}
	266
	267	/**
	268	* Creates a new XZ decompressor with an optional memory usage limit
	269	* and ability to disable verification of integrity checks.
	270	* <p>
	271	* This is identical to <code>XZInputStream(InputStream,int,boolean)</code>
	272	* except that this takes also the <code>arrayCache</code> argument.
	273	*
	274	* @param in input stream from which XZ-compressed
	275	* data is read
	276	*
	277	* @param memoryLimit memory usage limit in kibibytes (KiB)
	278	* or <code>-1</code> to impose no
	279	* memory usage limit
	280	*
	281	* @param verifyCheck if <code>true</code>, the integrity checks
	282	* will be verified; this should almost never
	283	* be set to <code>false</code>
	284	*
	285	* @param arrayCache cache to be used for allocating large arrays
	286	*
	287	* @throws XZFormatException
	288	* input is not in the XZ format
	289	*
	290	* @throws CorruptedInputException
	291	* XZ header CRC32 doesn't match
	292	*
	293	* @throws UnsupportedOptionsException
	294	* XZ header is valid but specifies options
	295	* not supported by this implementation
	296	*
	297	* @throws EOFException
	298	* less than 12 bytes of input was available
	299	* from <code>in</code>
	300	*
	301	* @throws IOException may be thrown by <code>in</code>
	302	*
	303	* @since 1.7
	304	*/
	305	public XZInputStream(InputStream in, int memoryLimit, boolean verifyCheck,
	306	ArrayCache arrayCache) throws IOException {
	307	this.arrayCache = arrayCache;
	308	this.in = in;
	309	this.memoryLimit = memoryLimit;
	310	this.verifyCheck = verifyCheck;
	311	this.xzIn = new SingleXZInputStream(in, memoryLimit, verifyCheck,
	312	arrayCache);
	313	}
	314
	315	/**
	316	* Decompresses the next byte from this input stream.
	317	* <p>
	318	* Reading lots of data with <code>read()</code> from this input stream
	319	* may be inefficient. Wrap it in {@link java.io.BufferedInputStream}
	320	* if you need to read lots of data one byte at a time.
	321	*
	322	* @return the next decompressed byte, or <code>-1</code>
	323	* to indicate the end of the compressed stream
	324	*
	325	* @throws CorruptedInputException
	326	* @throws UnsupportedOptionsException
	327	* @throws MemoryLimitException
	328	*
	329	* @throws XZIOException if the stream has been closed
	330	*
	331	* @throws EOFException
	332	* compressed input is truncated or corrupt
	333	*
	334	* @throws IOException may be thrown by <code>in</code>
	335	*/
	336	public int read() throws IOException {
	337	return read(tempBuf, 0, 1) == -1 ? -1 : (tempBuf[0] & 0xFF);
	338	}
	339
	340	/**
	341	* Decompresses into an array of bytes.
	342	* <p>
	343	* If <code>len</code> is zero, no bytes are read and <code>0</code>
	344	* is returned. Otherwise this will try to decompress <code>len</code>
	345	* bytes of uncompressed data. Less than <code>len</code> bytes may
	346	* be read only in the following situations:
	347	* <ul>
	348	* <li>The end of the compressed data was reached successfully.</li>
	349	* <li>An error is detected after at least one but less <code>len</code>
	350	* bytes have already been successfully decompressed.
	351	* The next call with non-zero <code>len</code> will immediately
	352	* throw the pending exception.</li>
	353	* <li>An exception is thrown.</li>
	354	* </ul>
	355	*
	356	* @param buf target buffer for uncompressed data
	357	* @param off start offset in <code>buf</code>
	358	* @param len maximum number of uncompressed bytes to read
	359	*
	360	* @return number of bytes read, or <code>-1</code> to indicate
	361	* the end of the compressed stream
	362	*
	363	* @throws CorruptedInputException
	364	* @throws UnsupportedOptionsException
	365	* @throws MemoryLimitException
	366	*
	367	* @throws XZIOException if the stream has been closed
	368	*
	369	* @throws EOFException
	370	* compressed input is truncated or corrupt
	371	*
	372	* @throws IOException may be thrown by <code>in</code>
	373	*/
	374	public int read(byte[] buf, int off, int len) throws IOException {
	375	if (off < 0 \|\| len < 0 \|\| off + len < 0 \|\| off + len > buf.length)
	376	throw new IndexOutOfBoundsException();
	377
	378	if (len == 0)
	379	return 0;
	380
	381	if (in == null)
	382	throw new XZIOException("Stream closed");
	383
	384	if (exception != null)
	385	throw exception;
	386
	387	if (endReached)
	388	return -1;
	389
	390	int size = 0;
	391
	392	try {
	393	while (len > 0) {
	394	if (xzIn == null) {
	395	prepareNextStream();
	396	if (endReached)
	397	return size == 0 ? -1 : size;
	398	}
	399
	400	int ret = xzIn.read(buf, off, len);
	401
	402	if (ret > 0) {
	403	size += ret;
	404	off += ret;
	405	len -= ret;
	406	} else if (ret == -1) {
	407	xzIn = null;
	408	}
	409	}
	410	} catch (IOException e) {
	411	exception = e;
	412	if (size == 0)
	413	throw e;
	414	}
	415
	416	return size;
	417	}
	418
	419	private void prepareNextStream() throws IOException {
	420	DataInputStream inData = new DataInputStream(in);
	421	byte[] buf = new byte[DecoderUtil.STREAM_HEADER_SIZE];
	422
	423	// The size of Stream Padding must be a multiple of four bytes,
	424	// all bytes zero.
	425	do {
	426	// First try to read one byte to see if we have reached the end
	427	// of the file.
	428	int ret = inData.read(buf, 0, 1);
	429	if (ret == -1) {
	430	endReached = true;
	431	return;
	432	}
	433
	434	// Since we got one byte of input, there must be at least
	435	// three more available in a valid file.
	436	inData.readFully(buf, 1, 3);
	437
	438	} while (buf[0] == 0 && buf[1] == 0 && buf[2] == 0 && buf[3] == 0);
	439
	440	// Not all bytes are zero. In a valid Stream it indicates the
	441	// beginning of the next Stream. Read the rest of the Stream Header
	442	// and initialize the XZ decoder.
	443	inData.readFully(buf, 4, DecoderUtil.STREAM_HEADER_SIZE - 4);
	444
	445	try {
	446	xzIn = new SingleXZInputStream(in, memoryLimit, verifyCheck, buf,
	447	arrayCache);
	448	} catch (XZFormatException e) {
	449	// Since this isn't the first .xz Stream, it is more
	450	// logical to tell that the data is corrupt.
	451	throw new CorruptedInputException(
	452	"Garbage after a valid XZ Stream");
	453	}
	454	}
	455
	456	/**
	457	* Returns the number of uncompressed bytes that can be read
	458	* without blocking. The value is returned with an assumption
	459	* that the compressed input data will be valid. If the compressed
	460	* data is corrupt, <code>CorruptedInputException</code> may get
	461	* thrown before the number of bytes claimed to be available have
	462	* been read from this input stream.
	463	*
	464	* @return the number of uncompressed bytes that can be read
	465	* without blocking
	466	*/
	467	public int available() throws IOException {
	468	if (in == null)
	469	throw new XZIOException("Stream closed");
	470
	471	if (exception != null)
	472	throw exception;
	473
	474	return xzIn == null ? 0 : xzIn.available();
	475	}
	476
	477	/**
	478	* Closes the stream and calls <code>in.close()</code>.
	479	* If the stream was already closed, this does nothing.
	480	* <p>
	481	* This is equivalent to <code>close(true)</code>.
	482	*
	483	* @throws IOException if thrown by <code>in.close()</code>
	484	*/
	485	public void close() throws IOException {
	486	close(true);
	487	}
	488
	489	/**
	490	* Closes the stream and optionally calls <code>in.close()</code>.
	491	* If the stream was already closed, this does nothing.
	492	* If <code>close(false)</code> has been called, a further
	493	* call of <code>close(true)</code> does nothing (it doesn't call
	494	* <code>in.close()</code>).
	495	* <p>
	496	* If you don't want to close the underlying <code>InputStream</code>,
	497	* there is usually no need to worry about closing this stream either;
	498	* it's fine to do nothing and let the garbage collector handle it.
	499	* However, if you are using {@link ArrayCache}, <code>close(false)</code>
	500	* can be useful to put the allocated arrays back to the cache without
	501	* closing the underlying <code>InputStream</code>.
	502	* <p>
	503	* Note that if you successfully reach the end of the stream
	504	* (<code>read</code> returns <code>-1</code>), the arrays are
	505	* automatically put back to the cache by that <code>read</code> call. In
	506	* this situation <code>close(false)</code> is redundant (but harmless).
	507	*
	508	* @throws IOException if thrown by <code>in.close()</code>
	509	*
	510	* @since 1.7
	511	*/
	512	public void close(boolean closeInput) throws IOException {
	513	if (in != null) {
	514	if (xzIn != null) {
	515	xzIn.close(false);
	516	xzIn = null;
	517	}
	518
	519	try {
	520	if (closeInput)
	521	in.close();
	522	} finally {
	523	in = null;
	524	}
	525	}
	526	}
	527	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: