source: josm/trunk/src/org/tukaani/xz/LZMA2Options.java@ 14039

Last change on this file since 14039 was 13350, checked in by stoecker, 7 years ago

see #15816 - add XZ support

File size: 19.3 KB
Line 
1/*
2 * LZMA2Options
3 *
4 * Author: Lasse Collin <lasse.collin@tukaani.org>
5 *
6 * This file has been put into the public domain.
7 * You can do whatever you want with this file.
8 */
9
10package org.tukaani.xz;
11
12import java.io.InputStream;
13import java.io.IOException;
14import org.tukaani.xz.lz.LZEncoder;
15import org.tukaani.xz.lzma.LZMAEncoder;
16
17/**
18 * LZMA2 compression options.
19 * <p>
20 * While this allows setting the LZMA2 compression options in detail,
21 * often you only need <code>LZMA2Options()</code> or
22 * <code>LZMA2Options(int)</code>.
23 */
24public class LZMA2Options extends FilterOptions {
25 /**
26 * Minimum valid compression preset level is 0.
27 */
28 public static final int PRESET_MIN = 0;
29
30 /**
31 * Maximum valid compression preset level is 9.
32 */
33 public static final int PRESET_MAX = 9;
34
35 /**
36 * Default compression preset level is 6.
37 */
38 public static final int PRESET_DEFAULT = 6;
39
40 /**
41 * Minimum dictionary size is 4 KiB.
42 */
43 public static final int DICT_SIZE_MIN = 4096;
44
45 /**
46 * Maximum dictionary size for compression is 768 MiB.
47 * <p>
48 * The decompressor supports bigger dictionaries, up to almost 2 GiB.
49 * With HC4 the encoder would support dictionaries bigger than 768 MiB.
50 * The 768 MiB limit comes from the current implementation of BT4 where
51 * we would otherwise hit the limits of signed ints in array indexing.
52 * <p>
53 * If you really need bigger dictionary for decompression,
54 * use {@link LZMA2InputStream} directly.
55 */
56 public static final int DICT_SIZE_MAX = 768 << 20;
57
58 /**
59 * The default dictionary size is 8 MiB.
60 */
61 public static final int DICT_SIZE_DEFAULT = 8 << 20;
62
63 /**
64 * Maximum value for lc + lp is 4.
65 */
66 public static final int LC_LP_MAX = 4;
67
68 /**
69 * The default number of literal context bits is 3.
70 */
71 public static final int LC_DEFAULT = 3;
72
73 /**
74 * The default number of literal position bits is 0.
75 */
76 public static final int LP_DEFAULT = 0;
77
78 /**
79 * Maximum value for pb is 4.
80 */
81 public static final int PB_MAX = 4;
82
83 /**
84 * The default number of position bits is 2.
85 */
86 public static final int PB_DEFAULT = 2;
87
88 /**
89 * Compression mode: uncompressed.
90 * The data is wrapped into a LZMA2 stream without compression.
91 */
92 public static final int MODE_UNCOMPRESSED = 0;
93
94 /**
95 * Compression mode: fast.
96 * This is usually combined with a hash chain match finder.
97 */
98 public static final int MODE_FAST = LZMAEncoder.MODE_FAST;
99
100 /**
101 * Compression mode: normal.
102 * This is usually combined with a binary tree match finder.
103 */
104 public static final int MODE_NORMAL = LZMAEncoder.MODE_NORMAL;
105
106 /**
107 * Minimum value for <code>niceLen</code> is 8.
108 */
109 public static final int NICE_LEN_MIN = 8;
110
111 /**
112 * Maximum value for <code>niceLen</code> is 273.
113 */
114 public static final int NICE_LEN_MAX = 273;
115
116 /**
117 * Match finder: Hash Chain 2-3-4
118 */
119 public static final int MF_HC4 = LZEncoder.MF_HC4;
120
121 /**
122 * Match finder: Binary tree 2-3-4
123 */
124 public static final int MF_BT4 = LZEncoder.MF_BT4;
125
126 private static final int[] presetToDictSize = {
127 1 << 18, 1 << 20, 1 << 21, 1 << 22, 1 << 22,
128 1 << 23, 1 << 23, 1 << 24, 1 << 25, 1 << 26 };
129
130 private static final int[] presetToDepthLimit = { 4, 8, 24, 48 };
131
132 private int dictSize;
133 private byte[] presetDict = null;
134 private int lc;
135 private int lp;
136 private int pb;
137 private int mode;
138 private int niceLen;
139 private int mf;
140 private int depthLimit;
141
142 /**
143 * Creates new LZMA2 options and sets them to the default values.
144 * This is equivalent to <code>LZMA2Options(PRESET_DEFAULT)</code>.
145 */
146 public LZMA2Options() {
147 try {
148 setPreset(PRESET_DEFAULT);
149 } catch (UnsupportedOptionsException e) {
150 assert false;
151 throw new RuntimeException();
152 }
153 }
154
155 /**
156 * Creates new LZMA2 options and sets them to the given preset.
157 *
158 * @throws UnsupportedOptionsException
159 * <code>preset</code> is not supported
160 */
161 public LZMA2Options(int preset) throws UnsupportedOptionsException {
162 setPreset(preset);
163 }
164
165 /**
166 * Creates new LZMA2 options and sets them to the given custom values.
167 *
168 * @throws UnsupportedOptionsException
169 * unsupported options were specified
170 */
171 public LZMA2Options(int dictSize, int lc, int lp, int pb, int mode,
172 int niceLen, int mf, int depthLimit)
173 throws UnsupportedOptionsException {
174 setDictSize(dictSize);
175 setLcLp(lc, lp);
176 setPb(pb);
177 setMode(mode);
178 setNiceLen(niceLen);
179 setMatchFinder(mf);
180 setDepthLimit(depthLimit);
181 }
182
183 /**
184 * Sets the compression options to the given preset.
185 * <p>
186 * The presets 0-3 are fast presets with medium compression.
187 * The presets 4-6 are fairly slow presets with high compression.
188 * The default preset (<code>PRESET_DEFAULT</code>) is 6.
189 * <p>
190 * The presets 7-9 are like the preset 6 but use bigger dictionaries
191 * and have higher compressor and decompressor memory requirements.
192 * Unless the uncompressed size of the file exceeds 8&nbsp;MiB,
193 * 16&nbsp;MiB, or 32&nbsp;MiB, it is waste of memory to use the
194 * presets 7, 8, or 9, respectively.
195 *
196 * @throws UnsupportedOptionsException
197 * <code>preset</code> is not supported
198 */
199 public void setPreset(int preset) throws UnsupportedOptionsException {
200 if (preset < 0 || preset > 9)
201 throw new UnsupportedOptionsException(
202 "Unsupported preset: " + preset);
203
204 lc = LC_DEFAULT;
205 lp = LP_DEFAULT;
206 pb = PB_DEFAULT;
207 dictSize = presetToDictSize[preset];
208
209 if (preset <= 3) {
210 mode = MODE_FAST;
211 mf = MF_HC4;
212 niceLen = preset <= 1 ? 128 : NICE_LEN_MAX;
213 depthLimit = presetToDepthLimit[preset];
214 } else {
215 mode = MODE_NORMAL;
216 mf = MF_BT4;
217 niceLen = (preset == 4) ? 16 : (preset == 5) ? 32 : 64;
218 depthLimit = 0;
219 }
220 }
221
222 /**
223 * Sets the dictionary size in bytes.
224 * <p>
225 * The dictionary (or history buffer) holds the most recently seen
226 * uncompressed data. Bigger dictionary usually means better compression.
227 * However, using a dictioanary bigger than the size of the uncompressed
228 * data is waste of memory.
229 * <p>
230 * Any value in the range [DICT_SIZE_MIN, DICT_SIZE_MAX] is valid,
231 * but sizes of 2^n and 2^n&nbsp;+&nbsp;2^(n-1) bytes are somewhat
232 * recommended.
233 *
234 * @throws UnsupportedOptionsException
235 * <code>dictSize</code> is not supported
236 */
237 public void setDictSize(int dictSize) throws UnsupportedOptionsException {
238 if (dictSize < DICT_SIZE_MIN)
239 throw new UnsupportedOptionsException(
240 "LZMA2 dictionary size must be at least 4 KiB: "
241 + dictSize + " B");
242
243 if (dictSize > DICT_SIZE_MAX)
244 throw new UnsupportedOptionsException(
245 "LZMA2 dictionary size must not exceed "
246 + (DICT_SIZE_MAX >> 20) + " MiB: " + dictSize + " B");
247
248 this.dictSize = dictSize;
249 }
250
251 /**
252 * Gets the dictionary size in bytes.
253 */
254 public int getDictSize() {
255 return dictSize;
256 }
257
258 /**
259 * Sets a preset dictionary. Use null to disable the use of
260 * a preset dictionary. By default there is no preset dictionary.
261 * <p>
262 * <b>The .xz format doesn't support a preset dictionary for now.
263 * Do not set a preset dictionary unless you use raw LZMA2.</b>
264 * <p>
265 * Preset dictionary can be useful when compressing many similar,
266 * relatively small chunks of data independently from each other.
267 * A preset dictionary should contain typical strings that occur in
268 * the files being compressed. The most probable strings should be
269 * near the end of the preset dictionary. The preset dictionary used
270 * for compression is also needed for decompression.
271 */
272 public void setPresetDict(byte[] presetDict) {
273 this.presetDict = presetDict;
274 }
275
276 /**
277 * Gets the preset dictionary.
278 */
279 public byte[] getPresetDict() {
280 return presetDict;
281 }
282
283 /**
284 * Sets the number of literal context bits and literal position bits.
285 * <p>
286 * The sum of <code>lc</code> and <code>lp</code> is limited to 4.
287 * Trying to exceed it will throw an exception. This function lets
288 * you change both at the same time.
289 *
290 * @throws UnsupportedOptionsException
291 * <code>lc</code> and <code>lp</code>
292 * are invalid
293 */
294 public void setLcLp(int lc, int lp) throws UnsupportedOptionsException {
295 if (lc < 0 || lp < 0 || lc > LC_LP_MAX || lp > LC_LP_MAX
296 || lc + lp > LC_LP_MAX)
297 throw new UnsupportedOptionsException(
298 "lc + lp must not exceed " + LC_LP_MAX + ": "
299 + lc + " + " + lp);
300
301 this.lc = lc;
302 this.lp = lp;
303 }
304
305 /**
306 * Sets the number of literal context bits.
307 * <p>
308 * All bytes that cannot be encoded as matches are encoded as literals.
309 * That is, literals are simply 8-bit bytes that are encoded one at
310 * a time.
311 * <p>
312 * The literal coding makes an assumption that the highest <code>lc</code>
313 * bits of the previous uncompressed byte correlate with the next byte.
314 * For example, in typical English text, an upper-case letter is often
315 * followed by a lower-case letter, and a lower-case letter is usually
316 * followed by another lower-case letter. In the US-ASCII character set,
317 * the highest three bits are 010 for upper-case letters and 011 for
318 * lower-case letters. When <code>lc</code> is at least 3, the literal
319 * coding can take advantage of this property in the uncompressed data.
320 * <p>
321 * The default value (3) is usually good. If you want maximum compression,
322 * try <code>setLc(4)</code>. Sometimes it helps a little, and sometimes it
323 * makes compression worse. If it makes it worse, test for example
324 * <code>setLc(2)</code> too.
325 *
326 * @throws UnsupportedOptionsException
327 * <code>lc</code> is invalid, or the sum
328 * of <code>lc</code> and <code>lp</code>
329 * exceed LC_LP_MAX
330 */
331 public void setLc(int lc) throws UnsupportedOptionsException {
332 setLcLp(lc, lp);
333 }
334
335 /**
336 * Sets the number of literal position bits.
337 * <p>
338 * This affets what kind of alignment in the uncompressed data is
339 * assumed when encoding literals. See {@link #setPb(int) setPb} for
340 * more information about alignment.
341 *
342 * @throws UnsupportedOptionsException
343 * <code>lp</code> is invalid, or the sum
344 * of <code>lc</code> and <code>lp</code>
345 * exceed LC_LP_MAX
346 */
347 public void setLp(int lp) throws UnsupportedOptionsException {
348 setLcLp(lc, lp);
349 }
350
351 /**
352 * Gets the number of literal context bits.
353 */
354 public int getLc() {
355 return lc;
356 }
357
358 /**
359 * Gets the number of literal position bits.
360 */
361 public int getLp() {
362 return lp;
363 }
364
365 /**
366 * Sets the number of position bits.
367 * <p>
368 * This affects what kind of alignment in the uncompressed data is
369 * assumed in general. The default (2) means four-byte alignment
370 * (2^<code>pb</code> = 2^2 = 4), which is often a good choice when
371 * there's no better guess.
372 * <p>
373 * When the alignment is known, setting the number of position bits
374 * accordingly may reduce the file size a little. For example with text
375 * files having one-byte alignment (US-ASCII, ISO-8859-*, UTF-8), using
376 * <code>setPb(0)</code> can improve compression slightly. For UTF-16
377 * text, <code>setPb(1)</code> is a good choice. If the alignment is
378 * an odd number like 3 bytes, <code>setPb(0)</code> might be the best
379 * choice.
380 * <p>
381 * Even though the assumed alignment can be adjusted with
382 * <code>setPb</code> and <code>setLp</code>, LZMA2 still slightly favors
383 * 16-byte alignment. It might be worth taking into account when designing
384 * file formats that are likely to be often compressed with LZMA2.
385 *
386 * @throws UnsupportedOptionsException
387 * <code>pb</code> is invalid
388 */
389 public void setPb(int pb) throws UnsupportedOptionsException {
390 if (pb < 0 || pb > PB_MAX)
391 throw new UnsupportedOptionsException(
392 "pb must not exceed " + PB_MAX + ": " + pb);
393
394 this.pb = pb;
395 }
396
397 /**
398 * Gets the number of position bits.
399 */
400 public int getPb() {
401 return pb;
402 }
403
404 /**
405 * Sets the compression mode.
406 * <p>
407 * This specifies the method to analyze the data produced by
408 * a match finder. The default is <code>MODE_FAST</code> for presets
409 * 0-3 and <code>MODE_NORMAL</code> for presets 4-9.
410 * <p>
411 * Usually <code>MODE_FAST</code> is used with Hash Chain match finders
412 * and <code>MODE_NORMAL</code> with Binary Tree match finders. This is
413 * also what the presets do.
414 * <p>
415 * The special mode <code>MODE_UNCOMPRESSED</code> doesn't try to
416 * compress the data at all (and doesn't use a match finder) and will
417 * simply wrap it in uncompressed LZMA2 chunks.
418 *
419 * @throws UnsupportedOptionsException
420 * <code>mode</code> is not supported
421 */
422 public void setMode(int mode) throws UnsupportedOptionsException {
423 if (mode < MODE_UNCOMPRESSED || mode > MODE_NORMAL)
424 throw new UnsupportedOptionsException(
425 "Unsupported compression mode: " + mode);
426
427 this.mode = mode;
428 }
429
430 /**
431 * Gets the compression mode.
432 */
433 public int getMode() {
434 return mode;
435 }
436
437 /**
438 * Sets the nice length of matches.
439 * Once a match of at least <code>niceLen</code> bytes is found,
440 * the algorithm stops looking for better matches. Higher values tend
441 * to give better compression at the expense of speed. The default
442 * depends on the preset.
443 *
444 * @throws UnsupportedOptionsException
445 * <code>niceLen</code> is invalid
446 */
447 public void setNiceLen(int niceLen) throws UnsupportedOptionsException {
448 if (niceLen < NICE_LEN_MIN)
449 throw new UnsupportedOptionsException(
450 "Minimum nice length of matches is "
451 + NICE_LEN_MIN + " bytes: " + niceLen);
452
453 if (niceLen > NICE_LEN_MAX)
454 throw new UnsupportedOptionsException(
455 "Maximum nice length of matches is " + NICE_LEN_MAX
456 + ": " + niceLen);
457
458 this.niceLen = niceLen;
459 }
460
461 /**
462 * Gets the nice length of matches.
463 */
464 public int getNiceLen() {
465 return niceLen;
466 }
467
468 /**
469 * Sets the match finder type.
470 * <p>
471 * Match finder has a major effect on compression speed, memory usage,
472 * and compression ratio. Usually Hash Chain match finders are faster
473 * than Binary Tree match finders. The default depends on the preset:
474 * 0-3 use <code>MF_HC4</code> and 4-9 use <code>MF_BT4</code>.
475 *
476 * @throws UnsupportedOptionsException
477 * <code>mf</code> is not supported
478 */
479 public void setMatchFinder(int mf) throws UnsupportedOptionsException {
480 if (mf != MF_HC4 && mf != MF_BT4)
481 throw new UnsupportedOptionsException(
482 "Unsupported match finder: " + mf);
483
484 this.mf = mf;
485 }
486
487 /**
488 * Gets the match finder type.
489 */
490 public int getMatchFinder() {
491 return mf;
492 }
493
494 /**
495 * Sets the match finder search depth limit.
496 * <p>
497 * The default is a special value of <code>0</code> which indicates that
498 * the depth limit should be automatically calculated by the selected
499 * match finder from the nice length of matches.
500 * <p>
501 * Reasonable depth limit for Hash Chain match finders is 4-100 and
502 * 16-1000 for Binary Tree match finders. Using very high values can
503 * make the compressor extremely slow with some files. Avoid settings
504 * higher than 1000 unless you are prepared to interrupt the compression
505 * in case it is taking far too long.
506 *
507 * @throws UnsupportedOptionsException
508 * <code>depthLimit</code> is invalid
509 */
510 public void setDepthLimit(int depthLimit)
511 throws UnsupportedOptionsException {
512 if (depthLimit < 0)
513 throw new UnsupportedOptionsException(
514 "Depth limit cannot be negative: " + depthLimit);
515
516 this.depthLimit = depthLimit;
517 }
518
519 /**
520 * Gets the match finder search depth limit.
521 */
522 public int getDepthLimit() {
523 return depthLimit;
524 }
525
526 public int getEncoderMemoryUsage() {
527 return (mode == MODE_UNCOMPRESSED)
528 ? UncompressedLZMA2OutputStream.getMemoryUsage()
529 : LZMA2OutputStream.getMemoryUsage(this);
530 }
531
532 public FinishableOutputStream getOutputStream(FinishableOutputStream out,
533 ArrayCache arrayCache) {
534 if (mode == MODE_UNCOMPRESSED)
535 return new UncompressedLZMA2OutputStream(out, arrayCache);
536
537 return new LZMA2OutputStream(out, this, arrayCache);
538 }
539
540 /**
541 * Gets how much memory the LZMA2 decoder will need to decompress the data
542 * that was encoded with these options and stored in a .xz file.
543 * <p>
544 * The returned value may bigger than the value returned by a direct call
545 * to {@link LZMA2InputStream#getMemoryUsage(int)} if the dictionary size
546 * is not 2^n or 2^n&nbsp;+&nbsp;2^(n-1) bytes. This is because the .xz
547 * headers store the dictionary size in such a format and other values
548 * are rounded up to the next such value. Such rounding is harmess except
549 * it might waste some memory if an unsual dictionary size is used.
550 * <p>
551 * If you use raw LZMA2 streams and unusual dictioanary size, call
552 * {@link LZMA2InputStream#getMemoryUsage} directly to get raw decoder
553 * memory requirements.
554 */
555 public int getDecoderMemoryUsage() {
556 // Round the dictionary size up to the next 2^n or 2^n + 2^(n-1).
557 int d = dictSize - 1;
558 d |= d >>> 2;
559 d |= d >>> 3;
560 d |= d >>> 4;
561 d |= d >>> 8;
562 d |= d >>> 16;
563 return LZMA2InputStream.getMemoryUsage(d + 1);
564 }
565
566 public InputStream getInputStream(InputStream in, ArrayCache arrayCache)
567 throws IOException {
568 return new LZMA2InputStream(in, dictSize, presetDict, arrayCache);
569 }
570
571 FilterEncoder getFilterEncoder() {
572 return new LZMA2Encoder(this);
573 }
574
575 public Object clone() {
576 try {
577 return super.clone();
578 } catch (CloneNotSupportedException e) {
579 assert false;
580 throw new RuntimeException();
581 }
582 }
583}
Note: See TracBrowser for help on using the repository browser.