Context Navigation

← Previous Revision
Latest Revision
Next Revision →
Blame
Revision Log

source: josm/trunk/src/org/tukaani/xz/LZMA2InputStream.java@ 14982

Last change on this file since 14982 was 13350, checked in by stoecker, 7 years ago
see #15816 - add XZ support
File size: 13.5 KB

Line
1	/*
2	* LZMA2InputStream
3	*
4	* Authors: Lasse Collin <lasse.collin@tukaani.org>
5	* Igor Pavlov <http://7-zip.org/>
6	*
7	* This file has been put into the public domain.
8	* You can do whatever you want with this file.
9	*/
10
11	package org.tukaani.xz;
12
13	import java.io.InputStream;
14	import java.io.DataInputStream;
15	import java.io.IOException;
16	import org.tukaani.xz.lz.LZDecoder;
17	import org.tukaani.xz.rangecoder.RangeDecoderFromBuffer;
18	import org.tukaani.xz.lzma.LZMADecoder;
19
20	/**
21	* Decompresses a raw LZMA2 stream (no XZ headers).
22	*/
23	public class LZMA2InputStream extends InputStream {
24	/**
25	* Smallest valid LZMA2 dictionary size.
26	* <p>
27	* Very tiny dictionaries would be a performance problem, so
28	* the minimum is 4 KiB.
29	*/
30	public static final int DICT_SIZE_MIN = 4096;
31
32	/**
33	* Largest dictionary size supported by this implementation.
34	* <p>
35	* The LZMA2 algorithm allows dictionaries up to one byte less than 4 GiB.
36	* This implementation supports only 16 bytes less than 2 GiB for raw
37	* LZMA2 streams, and for .xz files the maximum is 1.5 GiB. This
38	* limitation is due to Java using signed 32-bit integers for array
39	* indexing. The limitation shouldn't matter much in practice since so
40	* huge dictionaries are not normally used.
41	*/
42	public static final int DICT_SIZE_MAX = Integer.MAX_VALUE & ~15;
43
44	private static final int COMPRESSED_SIZE_MAX = 1 << 16;
45
46	private final ArrayCache arrayCache;
47	private DataInputStream in;
48
49	private LZDecoder lz;
50	private RangeDecoderFromBuffer rc;
51	private LZMADecoder lzma;
52
53	private int uncompressedSize = 0;
54	private boolean isLZMAChunk = false;
55
56	private boolean needDictReset = true;
57	private boolean needProps = true;
58	private boolean endReached = false;
59
60	private IOException exception = null;
61
62	private final byte[] tempBuf = new byte[1];
63
64	/**
65	* Gets approximate decompressor memory requirements as kibibytes for
66	* the given dictionary size.
67	*
68	* @param dictSize LZMA2 dictionary size as bytes, must be
69	* in the range [<code>DICT_SIZE_MIN</code>,
70	* <code>DICT_SIZE_MAX</code>]
71	*
72	* @return approximate memory requirements as kibibytes (KiB)
73	*/
74	public static int getMemoryUsage(int dictSize) {
75	// The base state is around 30-40 KiB (probabilities etc.),
76	// range decoder needs COMPRESSED_SIZE_MAX bytes for buffering,
77	// and LZ decoder needs a dictionary buffer.
78	return 40 + COMPRESSED_SIZE_MAX / 1024 + getDictSize(dictSize) / 1024;
79	}
80
81	private static int getDictSize(int dictSize) {
82	if (dictSize < DICT_SIZE_MIN \|\| dictSize > DICT_SIZE_MAX)
83	throw new IllegalArgumentException(
84	"Unsupported dictionary size " + dictSize);
85
86	// Round dictionary size upward to a multiple of 16. This way LZMA
87	// can use LZDecoder.getPos() for calculating LZMA's posMask.
88	// Note that this check is needed only for raw LZMA2 streams; it is
89	// redundant with .xz.
90	return (dictSize + 15) & ~15;
91	}
92
93	/**
94	* Creates a new input stream that decompresses raw LZMA2 data
95	* from <code>in</code>.
96	* <p>
97	* The caller needs to know the dictionary size used when compressing;
98	* the dictionary size isn't stored as part of a raw LZMA2 stream.
99	* <p>
100	* Specifying a too small dictionary size will prevent decompressing
101	* the stream. Specifying a too big dictionary is waste of memory but
102	* decompression will work.
103	* <p>
104	* There is no need to specify a dictionary bigger than
105	* the uncompressed size of the data even if a bigger dictionary
106	* was used when compressing. If you know the uncompressed size
107	* of the data, this might allow saving some memory.
108	*
109	* @param in input stream from which LZMA2-compressed
110	* data is read
111	*
112	* @param dictSize LZMA2 dictionary size as bytes, must be
113	* in the range [<code>DICT_SIZE_MIN</code>,
114	* <code>DICT_SIZE_MAX</code>]
115	*/
116	public LZMA2InputStream(InputStream in, int dictSize) {
117	this(in, dictSize, null);
118	}
119
120	/**
121	* Creates a new LZMA2 decompressor using a preset dictionary.
122	* <p>
123	* This is like <code>LZMA2InputStream(InputStream, int)</code> except
124	* that the dictionary may be initialized using a preset dictionary.
125	* If a preset dictionary was used when compressing the data, the
126	* same preset dictionary must be provided when decompressing.
127	*
128	* @param in input stream from which LZMA2-compressed
129	* data is read
130	*
131	* @param dictSize LZMA2 dictionary size as bytes, must be
132	* in the range [<code>DICT_SIZE_MIN</code>,
133	* <code>DICT_SIZE_MAX</code>]
134	*
135	* @param presetDict preset dictionary or <code>null</code>
136	* to use no preset dictionary
137	*/
138	public LZMA2InputStream(InputStream in, int dictSize, byte[] presetDict) {
139	this(in, dictSize, presetDict, ArrayCache.getDefaultCache());
140	}
141
142	/**
143	* Creates a new LZMA2 decompressor using a preset dictionary
144	* and array cache.
145	* <p>
146	* This is like <code>LZMA2InputStream(InputStream, int, byte[])</code>
147	* except that this also takes the <code>arrayCache</code> argument.
148	*
149	* @param in input stream from which LZMA2-compressed
150	* data is read
151	*
152	* @param dictSize LZMA2 dictionary size as bytes, must be
153	* in the range [<code>DICT_SIZE_MIN</code>,
154	* <code>DICT_SIZE_MAX</code>]
155	*
156	* @param presetDict preset dictionary or <code>null</code>
157	* to use no preset dictionary
158	*
159	* @param arrayCache cache to be used for allocating large arrays
160	*
161	* @since 1.7
162	*/
163	LZMA2InputStream(InputStream in, int dictSize, byte[] presetDict,
164	ArrayCache arrayCache) {
165	// Check for null because otherwise null isn't detect
166	// in this constructor.
167	if (in == null)
168	throw new NullPointerException();
169
170	this.arrayCache = arrayCache;
171	this.in = new DataInputStream(in);
172	this.rc = new RangeDecoderFromBuffer(COMPRESSED_SIZE_MAX, arrayCache);
173	this.lz = new LZDecoder(getDictSize(dictSize), presetDict, arrayCache);
174
175	if (presetDict != null && presetDict.length > 0)
176	needDictReset = false;
177	}
178
179	/**
180	* Decompresses the next byte from this input stream.
181	* <p>
182	* Reading lots of data with <code>read()</code> from this input stream
183	* may be inefficient. Wrap it in <code>java.io.BufferedInputStream</code>
184	* if you need to read lots of data one byte at a time.
185	*
186	* @return the next decompressed byte, or <code>-1</code>
187	* to indicate the end of the compressed stream
188	*
189	* @throws CorruptedInputException
190	*
191	* @throws XZIOException if the stream has been closed
192	*
193	* @throws EOFException
194	* compressed input is truncated or corrupt
195	*
196	* @throws IOException may be thrown by <code>in</code>
197	*/
198	public int read() throws IOException {
199	return read(tempBuf, 0, 1) == -1 ? -1 : (tempBuf[0] & 0xFF);
200	}
201
202	/**
203	* Decompresses into an array of bytes.
204	* <p>
205	* If <code>len</code> is zero, no bytes are read and <code>0</code>
206	* is returned. Otherwise this will block until <code>len</code>
207	* bytes have been decompressed, the end of the LZMA2 stream is reached,
208	* or an exception is thrown.
209	*
210	* @param buf target buffer for uncompressed data
211	* @param off start offset in <code>buf</code>
212	* @param len maximum number of uncompressed bytes to read
213	*
214	* @return number of bytes read, or <code>-1</code> to indicate
215	* the end of the compressed stream
216	*
217	* @throws CorruptedInputException
218	*
219	* @throws XZIOException if the stream has been closed
220	*
221	* @throws EOFException
222	* compressed input is truncated or corrupt
223	*
224	* @throws IOException may be thrown by <code>in</code>
225	*/
226	public int read(byte[] buf, int off, int len) throws IOException {
227	if (off < 0 \|\| len < 0 \|\| off + len < 0 \|\| off + len > buf.length)
228	throw new IndexOutOfBoundsException();
229
230	if (len == 0)
231	return 0;
232
233	if (in == null)
234	throw new XZIOException("Stream closed");
235
236	if (exception != null)
237	throw exception;
238
239	if (endReached)
240	return -1;
241
242	try {
243	int size = 0;
244
245	while (len > 0) {
246	if (uncompressedSize == 0) {
247	decodeChunkHeader();
248	if (endReached)
249	return size == 0 ? -1 : size;
250	}
251
252	int copySizeMax = Math.min(uncompressedSize, len);
253
254	if (!isLZMAChunk) {
255	lz.copyUncompressed(in, copySizeMax);
256	} else {
257	lz.setLimit(copySizeMax);
258	lzma.decode();
259	}
260
261	int copiedSize = lz.flush(buf, off);
262	off += copiedSize;
263	len -= copiedSize;
264	size += copiedSize;
265	uncompressedSize -= copiedSize;
266
267	if (uncompressedSize == 0)
268	if (!rc.isFinished() \|\| lz.hasPending())
269	throw new CorruptedInputException();
270	}
271
272	return size;
273
274	} catch (IOException e) {
275	exception = e;
276	throw e;
277	}
278	}
279
280	private void decodeChunkHeader() throws IOException {
281	int control = in.readUnsignedByte();
282
283	if (control == 0x00) {
284	endReached = true;
285	putArraysToCache();
286	return;
287	}
288
289	if (control >= 0xE0 \|\| control == 0x01) {
290	needProps = true;
291	needDictReset = false;
292	lz.reset();
293	} else if (needDictReset) {
294	throw new CorruptedInputException();
295	}
296
297	if (control >= 0x80) {
298	isLZMAChunk = true;
299
300	uncompressedSize = (control & 0x1F) << 16;
301	uncompressedSize += in.readUnsignedShort() + 1;
302
303	int compressedSize = in.readUnsignedShort() + 1;
304
305	if (control >= 0xC0) {
306	needProps = false;
307	decodeProps();
308
309	} else if (needProps) {
310	throw new CorruptedInputException();
311
312	} else if (control >= 0xA0) {
313	lzma.reset();
314	}
315
316	rc.prepareInputBuffer(in, compressedSize);
317
318	} else if (control > 0x02) {
319	throw new CorruptedInputException();
320
321	} else {
322	isLZMAChunk = false;
323	uncompressedSize = in.readUnsignedShort() + 1;
324	}
325	}
326
327	private void decodeProps() throws IOException {
328	int props = in.readUnsignedByte();
329
330	if (props > (4 * 5 + 4) * 9 + 8)
331	throw new CorruptedInputException();
332
333	int pb = props / (9 * 5);
334	props -= pb * 9 * 5;
335	int lp = props / 9;
336	int lc = props - lp * 9;
337
338	if (lc + lp > 4)
339	throw new CorruptedInputException();
340
341	lzma = new LZMADecoder(lz, rc, lc, lp, pb);
342	}
343
344	/**
345	* Returns the number of uncompressed bytes that can be read
346	* without blocking. The value is returned with an assumption
347	* that the compressed input data will be valid. If the compressed
348	* data is corrupt, <code>CorruptedInputException</code> may get
349	* thrown before the number of bytes claimed to be available have
350	* been read from this input stream.
351	* <p>
352	* In LZMA2InputStream, the return value will be non-zero when the
353	* decompressor is in the middle of an LZMA2 chunk. The return value
354	* will then be the number of uncompressed bytes remaining from that
355	* chunk. The return value can also be non-zero in the middle of
356	* an uncompressed chunk, but then the return value depends also on
357	* the <code>available()</code> method of the underlying InputStream.
358	*
359	* @return the number of uncompressed bytes that can be read
360	* without blocking
361	*/
362	public int available() throws IOException {
363	if (in == null)
364	throw new XZIOException("Stream closed");
365
366	if (exception != null)
367	throw exception;
368
369	return isLZMAChunk ? uncompressedSize
370	: Math.min(uncompressedSize, in.available());
371	}
372
373	private void putArraysToCache() {
374	if (lz != null) {
375	lz.putArraysToCache(arrayCache);
376	lz = null;
377
378	rc.putArraysToCache(arrayCache);
379	rc = null;
380	}
381	}
382
383	/**
384	* Closes the stream and calls <code>in.close()</code>.
385	* If the stream was already closed, this does nothing.
386	*
387	* @throws IOException if thrown by <code>in.close()</code>
388	*/
389	public void close() throws IOException {
390	if (in != null) {
391	putArraysToCache();
392
393	try {
394	in.close();
395	} finally {
396	in = null;
397	}
398	}
399	}
400	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: