Context Navigation

← Previous Revision
Latest Revision
Next Revision →
Blame
Revision Log

source: josm/trunk/src/org/tukaani/xz/SingleXZInputStream.java@ 13766

Last change on this file since 13766 was 13350, checked in by stoecker, 7 years ago
see #15816 - add XZ support
File size: 20.3 KB

Line
1	/*
2	* SingleXZInputStream
3	*
4	* Author: Lasse Collin <lasse.collin@tukaani.org>
5	*
6	* This file has been put into the public domain.
7	* You can do whatever you want with this file.
8	*/
9
10	package org.tukaani.xz;
11
12	import java.io.InputStream;
13	import java.io.DataInputStream;
14	import java.io.IOException;
15	import java.io.EOFException;
16	import org.tukaani.xz.common.DecoderUtil;
17	import org.tukaani.xz.common.StreamFlags;
18	import org.tukaani.xz.index.IndexHash;
19	import org.tukaani.xz.check.Check;
20
21	/**
22	* Decompresses exactly one XZ Stream in streamed mode (no seeking).
23	* The decompression stops after the first XZ Stream has been decompressed,
24	* and the read position in the input stream is left at the first byte
25	* after the end of the XZ Stream. This can be useful when XZ data has
26	* been stored inside some other file format or protocol.
27	* <p>
28	* Unless you know what you are doing, don't use this class to decompress
29	* standalone .xz files. For that purpose, use <code>XZInputStream</code>.
30	*
31	* <h4>When uncompressed size is known beforehand</h4>
32	* <p>
33	* If you are decompressing complete XZ streams and your application knows
34	* exactly how much uncompressed data there should be, it is good to try
35	* reading one more byte by calling <code>read()</code> and checking
36	* that it returns <code>-1</code>. This way the decompressor will parse the
37	* file footers and verify the integrity checks, giving the caller more
38	* confidence that the uncompressed data is valid.
39	*
40	* @see XZInputStream
41	*/
42	public class SingleXZInputStream extends InputStream {
43	private InputStream in;
44	private final ArrayCache arrayCache;
45	private final int memoryLimit;
46	private final StreamFlags streamHeaderFlags;
47	private final Check check;
48	private final boolean verifyCheck;
49	private BlockInputStream blockDecoder = null;
50	private final IndexHash indexHash = new IndexHash();
51	private boolean endReached = false;
52	private IOException exception = null;
53
54	private final byte[] tempBuf = new byte[1];
55
56	/**
57	* Reads the Stream Header into a buffer.
58	* This is a helper function for the constructors.
59	*/
60	private static byte[] readStreamHeader(InputStream in) throws IOException {
61	byte[] streamHeader = new byte[DecoderUtil.STREAM_HEADER_SIZE];
62	new DataInputStream(in).readFully(streamHeader);
63	return streamHeader;
64	}
65
66	/**
67	* Creates a new XZ decompressor that decompresses exactly one
68	* XZ Stream from <code>in</code> without a memory usage limit.
69	* <p>
70	* This constructor reads and parses the XZ Stream Header (12 bytes)
71	* from <code>in</code>. The header of the first Block is not read
72	* until <code>read</code> is called.
73	*
74	* @param in input stream from which XZ-compressed
75	* data is read
76	*
77	* @throws XZFormatException
78	* input is not in the XZ format
79	*
80	* @throws CorruptedInputException
81	* XZ header CRC32 doesn't match
82	*
83	* @throws UnsupportedOptionsException
84	* XZ header is valid but specifies options
85	* not supported by this implementation
86	*
87	* @throws EOFException
88	* less than 12 bytes of input was available
89	* from <code>in</code>
90	*
91	* @throws IOException may be thrown by <code>in</code>
92	*/
93	public SingleXZInputStream(InputStream in) throws IOException {
94	this(in, -1);
95	}
96
97	/**
98	* Creates a new XZ decompressor that decompresses exactly one
99	* XZ Stream from <code>in</code> without a memory usage limit.
100	* <p>
101	* This is identical to <code>SingleXZInputStream(InputStream)</code>
102	* except that this also takes the <code>arrayCache</code> argument.
103	*
104	* @param in input stream from which XZ-compressed
105	* data is read
106	*
107	* @param arrayCache cache to be used for allocating large arrays
108	*
109	* @throws XZFormatException
110	* input is not in the XZ format
111	*
112	* @throws CorruptedInputException
113	* XZ header CRC32 doesn't match
114	*
115	* @throws UnsupportedOptionsException
116	* XZ header is valid but specifies options
117	* not supported by this implementation
118	*
119	* @throws EOFException
120	* less than 12 bytes of input was available
121	* from <code>in</code>
122	*
123	* @throws IOException may be thrown by <code>in</code>
124	*
125	* @since 1.7
126	*/
127	public SingleXZInputStream(InputStream in, ArrayCache arrayCache)
128	throws IOException {
129	this(in, -1, arrayCache);
130	}
131
132	/**
133	* Creates a new XZ decompressor that decompresses exactly one
134	* XZ Stream from <code>in</code> with an optional memory usage limit.
135	* <p>
136	* This is identical to <code>SingleXZInputStream(InputStream)</code>
137	* except that this also takes the <code>memoryLimit</code> argument.
138	*
139	* @param in input stream from which XZ-compressed
140	* data is read
141	*
142	* @param memoryLimit memory usage limit in kibibytes (KiB)
143	* or <code>-1</code> to impose no
144	* memory usage limit
145	*
146	* @throws XZFormatException
147	* input is not in the XZ format
148	*
149	* @throws CorruptedInputException
150	* XZ header CRC32 doesn't match
151	*
152	* @throws UnsupportedOptionsException
153	* XZ header is valid but specifies options
154	* not supported by this implementation
155	*
156	* @throws EOFException
157	* less than 12 bytes of input was available
158	* from <code>in</code>
159	*
160	* @throws IOException may be thrown by <code>in</code>
161	*/
162	public SingleXZInputStream(InputStream in, int memoryLimit)
163	throws IOException {
164	this(in, memoryLimit, true);
165	}
166
167	/**
168	* Creates a new XZ decompressor that decompresses exactly one
169	* XZ Stream from <code>in</code> with an optional memory usage limit.
170	* <p>
171	* This is identical to <code>SingleXZInputStream(InputStream)</code>
172	* except that this also takes the <code>memoryLimit</code> and
173	* <code>arrayCache</code> arguments.
174	*
175	* @param in input stream from which XZ-compressed
176	* data is read
177	*
178	* @param memoryLimit memory usage limit in kibibytes (KiB)
179	* or <code>-1</code> to impose no
180	* memory usage limit
181	*
182	* @param arrayCache cache to be used for allocating large arrays
183	*
184	* @throws XZFormatException
185	* input is not in the XZ format
186	*
187	* @throws CorruptedInputException
188	* XZ header CRC32 doesn't match
189	*
190	* @throws UnsupportedOptionsException
191	* XZ header is valid but specifies options
192	* not supported by this implementation
193	*
194	* @throws EOFException
195	* less than 12 bytes of input was available
196	* from <code>in</code>
197	*
198	* @throws IOException may be thrown by <code>in</code>
199	*
200	* @since 1.7
201	*/
202	public SingleXZInputStream(InputStream in, int memoryLimit,
203	ArrayCache arrayCache) throws IOException {
204	this(in, memoryLimit, true, arrayCache);
205	}
206
207	/**
208	* Creates a new XZ decompressor that decompresses exactly one
209	* XZ Stream from <code>in</code> with an optional memory usage limit
210	* and ability to disable verification of integrity checks.
211	* <p>
212	* This is identical to <code>SingleXZInputStream(InputStream,int)</code>
213	* except that this also takes the <code>verifyCheck</code> argument.
214	* <p>
215	* Note that integrity check verification should almost never be disabled.
216	* Possible reasons to disable integrity check verification:
217	* <ul>
218	* <li>Trying to recover data from a corrupt .xz file.</li>
219	* <li>Speeding up decompression. This matters mostly with SHA-256
220	* or with files that have compressed extremely well. It's recommended
221	* that integrity checking isn't disabled for performance reasons
222	* unless the file integrity is verified externally in some other
223	* way.</li>
224	* </ul>
225	* <p>
226	* <code>verifyCheck</code> only affects the integrity check of
227	* the actual compressed data. The CRC32 fields in the headers
228	* are always verified.
229	*
230	* @param in input stream from which XZ-compressed
231	* data is read
232	*
233	* @param memoryLimit memory usage limit in kibibytes (KiB)
234	* or <code>-1</code> to impose no
235	* memory usage limit
236	*
237	* @param verifyCheck if <code>true</code>, the integrity checks
238	* will be verified; this should almost never
239	* be set to <code>false</code>
240	*
241	* @throws XZFormatException
242	* input is not in the XZ format
243	*
244	* @throws CorruptedInputException
245	* XZ header CRC32 doesn't match
246	*
247	* @throws UnsupportedOptionsException
248	* XZ header is valid but specifies options
249	* not supported by this implementation
250	*
251	* @throws EOFException
252	* less than 12 bytes of input was available
253	* from <code>in</code>
254	*
255	* @throws IOException may be thrown by <code>in</code>
256	*
257	* @since 1.6
258	*/
259	public SingleXZInputStream(InputStream in, int memoryLimit,
260	boolean verifyCheck) throws IOException {
261	this(in, memoryLimit, verifyCheck, ArrayCache.getDefaultCache());
262	}
263
264	/**
265	* Creates a new XZ decompressor that decompresses exactly one
266	* XZ Stream from <code>in</code> with an optional memory usage limit
267	* and ability to disable verification of integrity checks.
268	* <p>
269	* This is identical to
270	* <code>SingleXZInputStream(InputStream,int,boolean)</code>
271	* except that this also takes the <code>arrayCache</code> argument.
272	*
273	* @param in input stream from which XZ-compressed
274	* data is read
275	*
276	* @param memoryLimit memory usage limit in kibibytes (KiB)
277	* or <code>-1</code> to impose no
278	* memory usage limit
279	*
280	* @param verifyCheck if <code>true</code>, the integrity checks
281	* will be verified; this should almost never
282	* be set to <code>false</code>
283	*
284	* @param arrayCache cache to be used for allocating large arrays
285	*
286	* @throws XZFormatException
287	* input is not in the XZ format
288	*
289	* @throws CorruptedInputException
290	* XZ header CRC32 doesn't match
291	*
292	* @throws UnsupportedOptionsException
293	* XZ header is valid but specifies options
294	* not supported by this implementation
295	*
296	* @throws EOFException
297	* less than 12 bytes of input was available
298	* from <code>in</code>
299	*
300	* @throws IOException may be thrown by <code>in</code>
301	*
302	* @since 1.7
303	*/
304	public SingleXZInputStream(InputStream in, int memoryLimit,
305	boolean verifyCheck, ArrayCache arrayCache)
306	throws IOException {
307	this(in, memoryLimit, verifyCheck, readStreamHeader(in), arrayCache);
308	}
309
310	SingleXZInputStream(InputStream in, int memoryLimit, boolean verifyCheck,
311	byte[] streamHeader, ArrayCache arrayCache)
312	throws IOException {
313	this.arrayCache = arrayCache;
314	this.in = in;
315	this.memoryLimit = memoryLimit;
316	this.verifyCheck = verifyCheck;
317	streamHeaderFlags = DecoderUtil.decodeStreamHeader(streamHeader);
318	check = Check.getInstance(streamHeaderFlags.checkType);
319	}
320
321	/**
322	* Gets the ID of the integrity check used in this XZ Stream.
323	*
324	* @return the Check ID specified in the XZ Stream Header
325	*/
326	public int getCheckType() {
327	return streamHeaderFlags.checkType;
328	}
329
330	/**
331	* Gets the name of the integrity check used in this XZ Stream.
332	*
333	* @return the name of the check specified in the XZ Stream Header
334	*/
335	public String getCheckName() {
336	return check.getName();
337	}
338
339	/**
340	* Decompresses the next byte from this input stream.
341	* <p>
342	* Reading lots of data with <code>read()</code> from this input stream
343	* may be inefficient. Wrap it in {@link java.io.BufferedInputStream}
344	* if you need to read lots of data one byte at a time.
345	*
346	* @return the next decompressed byte, or <code>-1</code>
347	* to indicate the end of the compressed stream
348	*
349	* @throws CorruptedInputException
350	* @throws UnsupportedOptionsException
351	* @throws MemoryLimitException
352	*
353	* @throws XZIOException if the stream has been closed
354	*
355	* @throws EOFException
356	* compressed input is truncated or corrupt
357	*
358	* @throws IOException may be thrown by <code>in</code>
359	*/
360	public int read() throws IOException {
361	return read(tempBuf, 0, 1) == -1 ? -1 : (tempBuf[0] & 0xFF);
362	}
363
364	/**
365	* Decompresses into an array of bytes.
366	* <p>
367	* If <code>len</code> is zero, no bytes are read and <code>0</code>
368	* is returned. Otherwise this will try to decompress <code>len</code>
369	* bytes of uncompressed data. Less than <code>len</code> bytes may
370	* be read only in the following situations:
371	* <ul>
372	* <li>The end of the compressed data was reached successfully.</li>
373	* <li>An error is detected after at least one but less <code>len</code>
374	* bytes have already been successfully decompressed.
375	* The next call with non-zero <code>len</code> will immediately
376	* throw the pending exception.</li>
377	* <li>An exception is thrown.</li>
378	* </ul>
379	*
380	* @param buf target buffer for uncompressed data
381	* @param off start offset in <code>buf</code>
382	* @param len maximum number of uncompressed bytes to read
383	*
384	* @return number of bytes read, or <code>-1</code> to indicate
385	* the end of the compressed stream
386	*
387	* @throws CorruptedInputException
388	* @throws UnsupportedOptionsException
389	* @throws MemoryLimitException
390	*
391	* @throws XZIOException if the stream has been closed
392	*
393	* @throws EOFException
394	* compressed input is truncated or corrupt
395	*
396	* @throws IOException may be thrown by <code>in</code>
397	*/
398	public int read(byte[] buf, int off, int len) throws IOException {
399	if (off < 0 \|\| len < 0 \|\| off + len < 0 \|\| off + len > buf.length)
400	throw new IndexOutOfBoundsException();
401
402	if (len == 0)
403	return 0;
404
405	if (in == null)
406	throw new XZIOException("Stream closed");
407
408	if (exception != null)
409	throw exception;
410
411	if (endReached)
412	return -1;
413
414	int size = 0;
415
416	try {
417	while (len > 0) {
418	if (blockDecoder == null) {
419	try {
420	blockDecoder = new BlockInputStream(
421	in, check, verifyCheck, memoryLimit, -1, -1,
422	arrayCache);
423	} catch (IndexIndicatorException e) {
424	indexHash.validate(in);
425	validateStreamFooter();
426	endReached = true;
427	return size > 0 ? size : -1;
428	}
429	}
430
431	int ret = blockDecoder.read(buf, off, len);
432
433	if (ret > 0) {
434	size += ret;
435	off += ret;
436	len -= ret;
437	} else if (ret == -1) {
438	indexHash.add(blockDecoder.getUnpaddedSize(),
439	blockDecoder.getUncompressedSize());
440	blockDecoder = null;
441	}
442	}
443	} catch (IOException e) {
444	exception = e;
445	if (size == 0)
446	throw e;
447	}
448
449	return size;
450	}
451
452	private void validateStreamFooter() throws IOException {
453	byte[] buf = new byte[DecoderUtil.STREAM_HEADER_SIZE];
454	new DataInputStream(in).readFully(buf);
455	StreamFlags streamFooterFlags = DecoderUtil.decodeStreamFooter(buf);
456
457	if (!DecoderUtil.areStreamFlagsEqual(streamHeaderFlags,
458	streamFooterFlags)
459	\|\| indexHash.getIndexSize() != streamFooterFlags.backwardSize)
460	throw new CorruptedInputException(
461	"XZ Stream Footer does not match Stream Header");
462	}
463
464	/**
465	* Returns the number of uncompressed bytes that can be read
466	* without blocking. The value is returned with an assumption
467	* that the compressed input data will be valid. If the compressed
468	* data is corrupt, <code>CorruptedInputException</code> may get
469	* thrown before the number of bytes claimed to be available have
470	* been read from this input stream.
471	*
472	* @return the number of uncompressed bytes that can be read
473	* without blocking
474	*/
475	public int available() throws IOException {
476	if (in == null)
477	throw new XZIOException("Stream closed");
478
479	if (exception != null)
480	throw exception;
481
482	return blockDecoder == null ? 0 : blockDecoder.available();
483	}
484
485	/**
486	* Closes the stream and calls <code>in.close()</code>.
487	* If the stream was already closed, this does nothing.
488	* <p>
489	* This is equivalent to <code>close(true)</code>.
490	*
491	* @throws IOException if thrown by <code>in.close()</code>
492	*/
493	public void close() throws IOException {
494	close(true);
495	}
496
497	/**
498	* Closes the stream and optionally calls <code>in.close()</code>.
499	* If the stream was already closed, this does nothing.
500	* If <code>close(false)</code> has been called, a further
501	* call of <code>close(true)</code> does nothing (it doesn't call
502	* <code>in.close()</code>).
503	* <p>
504	* If you don't want to close the underlying <code>InputStream</code>,
505	* there is usually no need to worry about closing this stream either;
506	* it's fine to do nothing and let the garbage collector handle it.
507	* However, if you are using {@link ArrayCache}, <code>close(false)</code>
508	* can be useful to put the allocated arrays back to the cache without
509	* closing the underlying <code>InputStream</code>.
510	* <p>
511	* Note that if you successfully reach the end of the stream
512	* (<code>read</code> returns <code>-1</code>), the arrays are
513	* automatically put back to the cache by that <code>read</code> call. In
514	* this situation <code>close(false)</code> is redundant (but harmless).
515	*
516	* @throws IOException if thrown by <code>in.close()</code>
517	*
518	* @since 1.7
519	*/
520	public void close(boolean closeInput) throws IOException {
521	if (in != null) {
522	if (blockDecoder != null) {
523	blockDecoder.close();
524	blockDecoder = null;
525	}
526
527	try {
528	if (closeInput)
529	in.close();
530	} finally {
531	in = null;
532	}
533	}
534	}
535	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: