source: josm/trunk/src/org/tukaani/xz/SingleXZInputStream.java@ 15731

Last change on this file since 15731 was 13350, checked in by stoecker, 7 years ago

see #15816 - add XZ support

File size: 20.3 KB
Line 
1/*
2 * SingleXZInputStream
3 *
4 * Author: Lasse Collin <lasse.collin@tukaani.org>
5 *
6 * This file has been put into the public domain.
7 * You can do whatever you want with this file.
8 */
9
10package org.tukaani.xz;
11
12import java.io.InputStream;
13import java.io.DataInputStream;
14import java.io.IOException;
15import java.io.EOFException;
16import org.tukaani.xz.common.DecoderUtil;
17import org.tukaani.xz.common.StreamFlags;
18import org.tukaani.xz.index.IndexHash;
19import org.tukaani.xz.check.Check;
20
21/**
22 * Decompresses exactly one XZ Stream in streamed mode (no seeking).
23 * The decompression stops after the first XZ Stream has been decompressed,
24 * and the read position in the input stream is left at the first byte
25 * after the end of the XZ Stream. This can be useful when XZ data has
26 * been stored inside some other file format or protocol.
27 * <p>
28 * Unless you know what you are doing, don't use this class to decompress
29 * standalone .xz files. For that purpose, use <code>XZInputStream</code>.
30 *
31 * <h4>When uncompressed size is known beforehand</h4>
32 * <p>
33 * If you are decompressing complete XZ streams and your application knows
34 * exactly how much uncompressed data there should be, it is good to try
35 * reading one more byte by calling <code>read()</code> and checking
36 * that it returns <code>-1</code>. This way the decompressor will parse the
37 * file footers and verify the integrity checks, giving the caller more
38 * confidence that the uncompressed data is valid.
39 *
40 * @see XZInputStream
41 */
42public class SingleXZInputStream extends InputStream {
43 private InputStream in;
44 private final ArrayCache arrayCache;
45 private final int memoryLimit;
46 private final StreamFlags streamHeaderFlags;
47 private final Check check;
48 private final boolean verifyCheck;
49 private BlockInputStream blockDecoder = null;
50 private final IndexHash indexHash = new IndexHash();
51 private boolean endReached = false;
52 private IOException exception = null;
53
54 private final byte[] tempBuf = new byte[1];
55
56 /**
57 * Reads the Stream Header into a buffer.
58 * This is a helper function for the constructors.
59 */
60 private static byte[] readStreamHeader(InputStream in) throws IOException {
61 byte[] streamHeader = new byte[DecoderUtil.STREAM_HEADER_SIZE];
62 new DataInputStream(in).readFully(streamHeader);
63 return streamHeader;
64 }
65
66 /**
67 * Creates a new XZ decompressor that decompresses exactly one
68 * XZ Stream from <code>in</code> without a memory usage limit.
69 * <p>
70 * This constructor reads and parses the XZ Stream Header (12 bytes)
71 * from <code>in</code>. The header of the first Block is not read
72 * until <code>read</code> is called.
73 *
74 * @param in input stream from which XZ-compressed
75 * data is read
76 *
77 * @throws XZFormatException
78 * input is not in the XZ format
79 *
80 * @throws CorruptedInputException
81 * XZ header CRC32 doesn't match
82 *
83 * @throws UnsupportedOptionsException
84 * XZ header is valid but specifies options
85 * not supported by this implementation
86 *
87 * @throws EOFException
88 * less than 12 bytes of input was available
89 * from <code>in</code>
90 *
91 * @throws IOException may be thrown by <code>in</code>
92 */
93 public SingleXZInputStream(InputStream in) throws IOException {
94 this(in, -1);
95 }
96
97 /**
98 * Creates a new XZ decompressor that decompresses exactly one
99 * XZ Stream from <code>in</code> without a memory usage limit.
100 * <p>
101 * This is identical to <code>SingleXZInputStream(InputStream)</code>
102 * except that this also takes the <code>arrayCache</code> argument.
103 *
104 * @param in input stream from which XZ-compressed
105 * data is read
106 *
107 * @param arrayCache cache to be used for allocating large arrays
108 *
109 * @throws XZFormatException
110 * input is not in the XZ format
111 *
112 * @throws CorruptedInputException
113 * XZ header CRC32 doesn't match
114 *
115 * @throws UnsupportedOptionsException
116 * XZ header is valid but specifies options
117 * not supported by this implementation
118 *
119 * @throws EOFException
120 * less than 12 bytes of input was available
121 * from <code>in</code>
122 *
123 * @throws IOException may be thrown by <code>in</code>
124 *
125 * @since 1.7
126 */
127 public SingleXZInputStream(InputStream in, ArrayCache arrayCache)
128 throws IOException {
129 this(in, -1, arrayCache);
130 }
131
132 /**
133 * Creates a new XZ decompressor that decompresses exactly one
134 * XZ Stream from <code>in</code> with an optional memory usage limit.
135 * <p>
136 * This is identical to <code>SingleXZInputStream(InputStream)</code>
137 * except that this also takes the <code>memoryLimit</code> argument.
138 *
139 * @param in input stream from which XZ-compressed
140 * data is read
141 *
142 * @param memoryLimit memory usage limit in kibibytes (KiB)
143 * or <code>-1</code> to impose no
144 * memory usage limit
145 *
146 * @throws XZFormatException
147 * input is not in the XZ format
148 *
149 * @throws CorruptedInputException
150 * XZ header CRC32 doesn't match
151 *
152 * @throws UnsupportedOptionsException
153 * XZ header is valid but specifies options
154 * not supported by this implementation
155 *
156 * @throws EOFException
157 * less than 12 bytes of input was available
158 * from <code>in</code>
159 *
160 * @throws IOException may be thrown by <code>in</code>
161 */
162 public SingleXZInputStream(InputStream in, int memoryLimit)
163 throws IOException {
164 this(in, memoryLimit, true);
165 }
166
167 /**
168 * Creates a new XZ decompressor that decompresses exactly one
169 * XZ Stream from <code>in</code> with an optional memory usage limit.
170 * <p>
171 * This is identical to <code>SingleXZInputStream(InputStream)</code>
172 * except that this also takes the <code>memoryLimit</code> and
173 * <code>arrayCache</code> arguments.
174 *
175 * @param in input stream from which XZ-compressed
176 * data is read
177 *
178 * @param memoryLimit memory usage limit in kibibytes (KiB)
179 * or <code>-1</code> to impose no
180 * memory usage limit
181 *
182 * @param arrayCache cache to be used for allocating large arrays
183 *
184 * @throws XZFormatException
185 * input is not in the XZ format
186 *
187 * @throws CorruptedInputException
188 * XZ header CRC32 doesn't match
189 *
190 * @throws UnsupportedOptionsException
191 * XZ header is valid but specifies options
192 * not supported by this implementation
193 *
194 * @throws EOFException
195 * less than 12 bytes of input was available
196 * from <code>in</code>
197 *
198 * @throws IOException may be thrown by <code>in</code>
199 *
200 * @since 1.7
201 */
202 public SingleXZInputStream(InputStream in, int memoryLimit,
203 ArrayCache arrayCache) throws IOException {
204 this(in, memoryLimit, true, arrayCache);
205 }
206
207 /**
208 * Creates a new XZ decompressor that decompresses exactly one
209 * XZ Stream from <code>in</code> with an optional memory usage limit
210 * and ability to disable verification of integrity checks.
211 * <p>
212 * This is identical to <code>SingleXZInputStream(InputStream,int)</code>
213 * except that this also takes the <code>verifyCheck</code> argument.
214 * <p>
215 * Note that integrity check verification should almost never be disabled.
216 * Possible reasons to disable integrity check verification:
217 * <ul>
218 * <li>Trying to recover data from a corrupt .xz file.</li>
219 * <li>Speeding up decompression. This matters mostly with SHA-256
220 * or with files that have compressed extremely well. It's recommended
221 * that integrity checking isn't disabled for performance reasons
222 * unless the file integrity is verified externally in some other
223 * way.</li>
224 * </ul>
225 * <p>
226 * <code>verifyCheck</code> only affects the integrity check of
227 * the actual compressed data. The CRC32 fields in the headers
228 * are always verified.
229 *
230 * @param in input stream from which XZ-compressed
231 * data is read
232 *
233 * @param memoryLimit memory usage limit in kibibytes (KiB)
234 * or <code>-1</code> to impose no
235 * memory usage limit
236 *
237 * @param verifyCheck if <code>true</code>, the integrity checks
238 * will be verified; this should almost never
239 * be set to <code>false</code>
240 *
241 * @throws XZFormatException
242 * input is not in the XZ format
243 *
244 * @throws CorruptedInputException
245 * XZ header CRC32 doesn't match
246 *
247 * @throws UnsupportedOptionsException
248 * XZ header is valid but specifies options
249 * not supported by this implementation
250 *
251 * @throws EOFException
252 * less than 12 bytes of input was available
253 * from <code>in</code>
254 *
255 * @throws IOException may be thrown by <code>in</code>
256 *
257 * @since 1.6
258 */
259 public SingleXZInputStream(InputStream in, int memoryLimit,
260 boolean verifyCheck) throws IOException {
261 this(in, memoryLimit, verifyCheck, ArrayCache.getDefaultCache());
262 }
263
264 /**
265 * Creates a new XZ decompressor that decompresses exactly one
266 * XZ Stream from <code>in</code> with an optional memory usage limit
267 * and ability to disable verification of integrity checks.
268 * <p>
269 * This is identical to
270 * <code>SingleXZInputStream(InputStream,int,boolean)</code>
271 * except that this also takes the <code>arrayCache</code> argument.
272 *
273 * @param in input stream from which XZ-compressed
274 * data is read
275 *
276 * @param memoryLimit memory usage limit in kibibytes (KiB)
277 * or <code>-1</code> to impose no
278 * memory usage limit
279 *
280 * @param verifyCheck if <code>true</code>, the integrity checks
281 * will be verified; this should almost never
282 * be set to <code>false</code>
283 *
284 * @param arrayCache cache to be used for allocating large arrays
285 *
286 * @throws XZFormatException
287 * input is not in the XZ format
288 *
289 * @throws CorruptedInputException
290 * XZ header CRC32 doesn't match
291 *
292 * @throws UnsupportedOptionsException
293 * XZ header is valid but specifies options
294 * not supported by this implementation
295 *
296 * @throws EOFException
297 * less than 12 bytes of input was available
298 * from <code>in</code>
299 *
300 * @throws IOException may be thrown by <code>in</code>
301 *
302 * @since 1.7
303 */
304 public SingleXZInputStream(InputStream in, int memoryLimit,
305 boolean verifyCheck, ArrayCache arrayCache)
306 throws IOException {
307 this(in, memoryLimit, verifyCheck, readStreamHeader(in), arrayCache);
308 }
309
310 SingleXZInputStream(InputStream in, int memoryLimit, boolean verifyCheck,
311 byte[] streamHeader, ArrayCache arrayCache)
312 throws IOException {
313 this.arrayCache = arrayCache;
314 this.in = in;
315 this.memoryLimit = memoryLimit;
316 this.verifyCheck = verifyCheck;
317 streamHeaderFlags = DecoderUtil.decodeStreamHeader(streamHeader);
318 check = Check.getInstance(streamHeaderFlags.checkType);
319 }
320
321 /**
322 * Gets the ID of the integrity check used in this XZ Stream.
323 *
324 * @return the Check ID specified in the XZ Stream Header
325 */
326 public int getCheckType() {
327 return streamHeaderFlags.checkType;
328 }
329
330 /**
331 * Gets the name of the integrity check used in this XZ Stream.
332 *
333 * @return the name of the check specified in the XZ Stream Header
334 */
335 public String getCheckName() {
336 return check.getName();
337 }
338
339 /**
340 * Decompresses the next byte from this input stream.
341 * <p>
342 * Reading lots of data with <code>read()</code> from this input stream
343 * may be inefficient. Wrap it in {@link java.io.BufferedInputStream}
344 * if you need to read lots of data one byte at a time.
345 *
346 * @return the next decompressed byte, or <code>-1</code>
347 * to indicate the end of the compressed stream
348 *
349 * @throws CorruptedInputException
350 * @throws UnsupportedOptionsException
351 * @throws MemoryLimitException
352 *
353 * @throws XZIOException if the stream has been closed
354 *
355 * @throws EOFException
356 * compressed input is truncated or corrupt
357 *
358 * @throws IOException may be thrown by <code>in</code>
359 */
360 public int read() throws IOException {
361 return read(tempBuf, 0, 1) == -1 ? -1 : (tempBuf[0] & 0xFF);
362 }
363
364 /**
365 * Decompresses into an array of bytes.
366 * <p>
367 * If <code>len</code> is zero, no bytes are read and <code>0</code>
368 * is returned. Otherwise this will try to decompress <code>len</code>
369 * bytes of uncompressed data. Less than <code>len</code> bytes may
370 * be read only in the following situations:
371 * <ul>
372 * <li>The end of the compressed data was reached successfully.</li>
373 * <li>An error is detected after at least one but less <code>len</code>
374 * bytes have already been successfully decompressed.
375 * The next call with non-zero <code>len</code> will immediately
376 * throw the pending exception.</li>
377 * <li>An exception is thrown.</li>
378 * </ul>
379 *
380 * @param buf target buffer for uncompressed data
381 * @param off start offset in <code>buf</code>
382 * @param len maximum number of uncompressed bytes to read
383 *
384 * @return number of bytes read, or <code>-1</code> to indicate
385 * the end of the compressed stream
386 *
387 * @throws CorruptedInputException
388 * @throws UnsupportedOptionsException
389 * @throws MemoryLimitException
390 *
391 * @throws XZIOException if the stream has been closed
392 *
393 * @throws EOFException
394 * compressed input is truncated or corrupt
395 *
396 * @throws IOException may be thrown by <code>in</code>
397 */
398 public int read(byte[] buf, int off, int len) throws IOException {
399 if (off < 0 || len < 0 || off + len < 0 || off + len > buf.length)
400 throw new IndexOutOfBoundsException();
401
402 if (len == 0)
403 return 0;
404
405 if (in == null)
406 throw new XZIOException("Stream closed");
407
408 if (exception != null)
409 throw exception;
410
411 if (endReached)
412 return -1;
413
414 int size = 0;
415
416 try {
417 while (len > 0) {
418 if (blockDecoder == null) {
419 try {
420 blockDecoder = new BlockInputStream(
421 in, check, verifyCheck, memoryLimit, -1, -1,
422 arrayCache);
423 } catch (IndexIndicatorException e) {
424 indexHash.validate(in);
425 validateStreamFooter();
426 endReached = true;
427 return size > 0 ? size : -1;
428 }
429 }
430
431 int ret = blockDecoder.read(buf, off, len);
432
433 if (ret > 0) {
434 size += ret;
435 off += ret;
436 len -= ret;
437 } else if (ret == -1) {
438 indexHash.add(blockDecoder.getUnpaddedSize(),
439 blockDecoder.getUncompressedSize());
440 blockDecoder = null;
441 }
442 }
443 } catch (IOException e) {
444 exception = e;
445 if (size == 0)
446 throw e;
447 }
448
449 return size;
450 }
451
452 private void validateStreamFooter() throws IOException {
453 byte[] buf = new byte[DecoderUtil.STREAM_HEADER_SIZE];
454 new DataInputStream(in).readFully(buf);
455 StreamFlags streamFooterFlags = DecoderUtil.decodeStreamFooter(buf);
456
457 if (!DecoderUtil.areStreamFlagsEqual(streamHeaderFlags,
458 streamFooterFlags)
459 || indexHash.getIndexSize() != streamFooterFlags.backwardSize)
460 throw new CorruptedInputException(
461 "XZ Stream Footer does not match Stream Header");
462 }
463
464 /**
465 * Returns the number of uncompressed bytes that can be read
466 * without blocking. The value is returned with an assumption
467 * that the compressed input data will be valid. If the compressed
468 * data is corrupt, <code>CorruptedInputException</code> may get
469 * thrown before the number of bytes claimed to be available have
470 * been read from this input stream.
471 *
472 * @return the number of uncompressed bytes that can be read
473 * without blocking
474 */
475 public int available() throws IOException {
476 if (in == null)
477 throw new XZIOException("Stream closed");
478
479 if (exception != null)
480 throw exception;
481
482 return blockDecoder == null ? 0 : blockDecoder.available();
483 }
484
485 /**
486 * Closes the stream and calls <code>in.close()</code>.
487 * If the stream was already closed, this does nothing.
488 * <p>
489 * This is equivalent to <code>close(true)</code>.
490 *
491 * @throws IOException if thrown by <code>in.close()</code>
492 */
493 public void close() throws IOException {
494 close(true);
495 }
496
497 /**
498 * Closes the stream and optionally calls <code>in.close()</code>.
499 * If the stream was already closed, this does nothing.
500 * If <code>close(false)</code> has been called, a further
501 * call of <code>close(true)</code> does nothing (it doesn't call
502 * <code>in.close()</code>).
503 * <p>
504 * If you don't want to close the underlying <code>InputStream</code>,
505 * there is usually no need to worry about closing this stream either;
506 * it's fine to do nothing and let the garbage collector handle it.
507 * However, if you are using {@link ArrayCache}, <code>close(false)</code>
508 * can be useful to put the allocated arrays back to the cache without
509 * closing the underlying <code>InputStream</code>.
510 * <p>
511 * Note that if you successfully reach the end of the stream
512 * (<code>read</code> returns <code>-1</code>), the arrays are
513 * automatically put back to the cache by that <code>read</code> call. In
514 * this situation <code>close(false)</code> is redundant (but harmless).
515 *
516 * @throws IOException if thrown by <code>in.close()</code>
517 *
518 * @since 1.7
519 */
520 public void close(boolean closeInput) throws IOException {
521 if (in != null) {
522 if (blockDecoder != null) {
523 blockDecoder.close();
524 blockDecoder = null;
525 }
526
527 try {
528 if (closeInput)
529 in.close();
530 } finally {
531 in = null;
532 }
533 }
534 }
535}
Note: See TracBrowser for help on using the repository browser.