1 | /*
|
---|
2 | * LZMAEncoder
|
---|
3 | *
|
---|
4 | * Authors: Lasse Collin <lasse.collin@tukaani.org>
|
---|
5 | * Igor Pavlov <http://7-zip.org/>
|
---|
6 | *
|
---|
7 | * This file has been put into the public domain.
|
---|
8 | * You can do whatever you want with this file.
|
---|
9 | */
|
---|
10 |
|
---|
11 | package org.tukaani.xz.lzma;
|
---|
12 |
|
---|
13 | import java.io.IOException;
|
---|
14 | import org.tukaani.xz.ArrayCache;
|
---|
15 | import org.tukaani.xz.lz.LZEncoder;
|
---|
16 | import org.tukaani.xz.lz.Matches;
|
---|
17 | import org.tukaani.xz.rangecoder.RangeEncoder;
|
---|
18 |
|
---|
19 | public abstract class LZMAEncoder extends LZMACoder {
|
---|
20 | public static final int MODE_FAST = 1;
|
---|
21 | public static final int MODE_NORMAL = 2;
|
---|
22 |
|
---|
23 | /**
|
---|
24 | * LZMA2 chunk is considered full when its uncompressed size exceeds
|
---|
25 | * <code>LZMA2_UNCOMPRESSED_LIMIT</code>.
|
---|
26 | * <p>
|
---|
27 | * A compressed LZMA2 chunk can hold 2 MiB of uncompressed data.
|
---|
28 | * A single LZMA symbol may indicate up to MATCH_LEN_MAX bytes
|
---|
29 | * of data, so the LZMA2 chunk is considered full when there is
|
---|
30 | * less space than MATCH_LEN_MAX bytes.
|
---|
31 | */
|
---|
32 | private static final int LZMA2_UNCOMPRESSED_LIMIT
|
---|
33 | = (2 << 20) - MATCH_LEN_MAX;
|
---|
34 |
|
---|
35 | /**
|
---|
36 | * LZMA2 chunk is considered full when its compressed size exceeds
|
---|
37 | * <code>LZMA2_COMPRESSED_LIMIT</code>.
|
---|
38 | * <p>
|
---|
39 | * The maximum compressed size of a LZMA2 chunk is 64 KiB.
|
---|
40 | * A single LZMA symbol might use 20 bytes of space even though
|
---|
41 | * it usually takes just one byte or so. Two more bytes are needed
|
---|
42 | * for LZMA2 uncompressed chunks (see LZMA2OutputStream.writeChunk).
|
---|
43 | * Leave a little safety margin and use 26 bytes.
|
---|
44 | */
|
---|
45 | private static final int LZMA2_COMPRESSED_LIMIT = (64 << 10) - 26;
|
---|
46 |
|
---|
47 | private static final int DIST_PRICE_UPDATE_INTERVAL = FULL_DISTANCES;
|
---|
48 | private static final int ALIGN_PRICE_UPDATE_INTERVAL = ALIGN_SIZE;
|
---|
49 |
|
---|
50 | private final RangeEncoder rc;
|
---|
51 | final LZEncoder lz;
|
---|
52 | final LiteralEncoder literalEncoder;
|
---|
53 | final LengthEncoder matchLenEncoder;
|
---|
54 | final LengthEncoder repLenEncoder;
|
---|
55 | final int niceLen;
|
---|
56 |
|
---|
57 | private int distPriceCount = 0;
|
---|
58 | private int alignPriceCount = 0;
|
---|
59 |
|
---|
60 | private final int distSlotPricesSize;
|
---|
61 | private final int[][] distSlotPrices;
|
---|
62 | private final int[][] fullDistPrices
|
---|
63 | = new int[DIST_STATES][FULL_DISTANCES];
|
---|
64 | private final int[] alignPrices = new int[ALIGN_SIZE];
|
---|
65 |
|
---|
66 | int back = 0;
|
---|
67 | int readAhead = -1;
|
---|
68 | private int uncompressedSize = 0;
|
---|
69 |
|
---|
70 | public static int getMemoryUsage(int mode, int dictSize,
|
---|
71 | int extraSizeBefore, int mf) {
|
---|
72 | int m = 80;
|
---|
73 |
|
---|
74 | switch (mode) {
|
---|
75 | case MODE_FAST:
|
---|
76 | m += LZMAEncoderFast.getMemoryUsage(
|
---|
77 | dictSize, extraSizeBefore, mf);
|
---|
78 | break;
|
---|
79 |
|
---|
80 | case MODE_NORMAL:
|
---|
81 | m += LZMAEncoderNormal.getMemoryUsage(
|
---|
82 | dictSize, extraSizeBefore, mf);
|
---|
83 | break;
|
---|
84 |
|
---|
85 | default:
|
---|
86 | throw new IllegalArgumentException();
|
---|
87 | }
|
---|
88 |
|
---|
89 | return m;
|
---|
90 | }
|
---|
91 |
|
---|
92 | public static LZMAEncoder getInstance(
|
---|
93 | RangeEncoder rc, int lc, int lp, int pb, int mode,
|
---|
94 | int dictSize, int extraSizeBefore,
|
---|
95 | int niceLen, int mf, int depthLimit,
|
---|
96 | ArrayCache arrayCache) {
|
---|
97 | switch (mode) {
|
---|
98 | case MODE_FAST:
|
---|
99 | return new LZMAEncoderFast(rc, lc, lp, pb,
|
---|
100 | dictSize, extraSizeBefore,
|
---|
101 | niceLen, mf, depthLimit,
|
---|
102 | arrayCache);
|
---|
103 |
|
---|
104 | case MODE_NORMAL:
|
---|
105 | return new LZMAEncoderNormal(rc, lc, lp, pb,
|
---|
106 | dictSize, extraSizeBefore,
|
---|
107 | niceLen, mf, depthLimit,
|
---|
108 | arrayCache);
|
---|
109 | }
|
---|
110 |
|
---|
111 | throw new IllegalArgumentException();
|
---|
112 | }
|
---|
113 |
|
---|
114 | public void putArraysToCache(ArrayCache arrayCache) {
|
---|
115 | lz.putArraysToCache(arrayCache);
|
---|
116 | }
|
---|
117 |
|
---|
118 | /**
|
---|
119 | * Gets an integer [0, 63] matching the highest two bits of an integer.
|
---|
120 | * This is like bit scan reverse (BSR) on x86 except that this also
|
---|
121 | * cares about the second highest bit.
|
---|
122 | */
|
---|
123 | public static int getDistSlot(int dist) {
|
---|
124 | if (dist <= DIST_MODEL_START && dist >= 0)
|
---|
125 | return dist;
|
---|
126 |
|
---|
127 | int n = dist;
|
---|
128 | int i = 31;
|
---|
129 |
|
---|
130 | if ((n & 0xFFFF0000) == 0) {
|
---|
131 | n <<= 16;
|
---|
132 | i = 15;
|
---|
133 | }
|
---|
134 |
|
---|
135 | if ((n & 0xFF000000) == 0) {
|
---|
136 | n <<= 8;
|
---|
137 | i -= 8;
|
---|
138 | }
|
---|
139 |
|
---|
140 | if ((n & 0xF0000000) == 0) {
|
---|
141 | n <<= 4;
|
---|
142 | i -= 4;
|
---|
143 | }
|
---|
144 |
|
---|
145 | if ((n & 0xC0000000) == 0) {
|
---|
146 | n <<= 2;
|
---|
147 | i -= 2;
|
---|
148 | }
|
---|
149 |
|
---|
150 | if ((n & 0x80000000) == 0)
|
---|
151 | --i;
|
---|
152 |
|
---|
153 | return (i << 1) + ((dist >>> (i - 1)) & 1);
|
---|
154 | }
|
---|
155 |
|
---|
156 | /**
|
---|
157 | * Gets the next LZMA symbol.
|
---|
158 | * <p>
|
---|
159 | * There are three types of symbols: literal (a single byte),
|
---|
160 | * repeated match, and normal match. The symbol is indicated
|
---|
161 | * by the return value and by the variable <code>back</code>.
|
---|
162 | * <p>
|
---|
163 | * Literal: <code>back == -1</code> and return value is <code>1</code>.
|
---|
164 | * The literal itself needs to be read from <code>lz</code> separately.
|
---|
165 | * <p>
|
---|
166 | * Repeated match: <code>back</code> is in the range [0, 3] and
|
---|
167 | * the return value is the length of the repeated match.
|
---|
168 | * <p>
|
---|
169 | * Normal match: <code>back - REPS<code> (<code>back - 4</code>)
|
---|
170 | * is the distance of the match and the return value is the length
|
---|
171 | * of the match.
|
---|
172 | */
|
---|
173 | abstract int getNextSymbol();
|
---|
174 |
|
---|
175 | LZMAEncoder(RangeEncoder rc, LZEncoder lz,
|
---|
176 | int lc, int lp, int pb, int dictSize, int niceLen) {
|
---|
177 | super(pb);
|
---|
178 | this.rc = rc;
|
---|
179 | this.lz = lz;
|
---|
180 | this.niceLen = niceLen;
|
---|
181 |
|
---|
182 | literalEncoder = new LiteralEncoder(lc, lp);
|
---|
183 | matchLenEncoder = new LengthEncoder(pb, niceLen);
|
---|
184 | repLenEncoder = new LengthEncoder(pb, niceLen);
|
---|
185 |
|
---|
186 | distSlotPricesSize = getDistSlot(dictSize - 1) + 1;
|
---|
187 | distSlotPrices = new int[DIST_STATES][distSlotPricesSize];
|
---|
188 |
|
---|
189 | reset();
|
---|
190 | }
|
---|
191 |
|
---|
192 | public LZEncoder getLZEncoder() {
|
---|
193 | return lz;
|
---|
194 | }
|
---|
195 |
|
---|
196 | public void reset() {
|
---|
197 | super.reset();
|
---|
198 | literalEncoder.reset();
|
---|
199 | matchLenEncoder.reset();
|
---|
200 | repLenEncoder.reset();
|
---|
201 | distPriceCount = 0;
|
---|
202 | alignPriceCount = 0;
|
---|
203 |
|
---|
204 | uncompressedSize += readAhead + 1;
|
---|
205 | readAhead = -1;
|
---|
206 | }
|
---|
207 |
|
---|
208 | public int getUncompressedSize() {
|
---|
209 | return uncompressedSize;
|
---|
210 | }
|
---|
211 |
|
---|
212 | public void resetUncompressedSize() {
|
---|
213 | uncompressedSize = 0;
|
---|
214 | }
|
---|
215 |
|
---|
216 | /**
|
---|
217 | * Compress for LZMA1.
|
---|
218 | */
|
---|
219 | public void encodeForLZMA1() throws IOException {
|
---|
220 | if (!lz.isStarted() && !encodeInit())
|
---|
221 | return;
|
---|
222 |
|
---|
223 | while (encodeSymbol()) {}
|
---|
224 | }
|
---|
225 |
|
---|
226 | public void encodeLZMA1EndMarker() throws IOException {
|
---|
227 | // End of stream marker is encoded as a match with the maximum
|
---|
228 | // possible distance. The length is ignored by the decoder,
|
---|
229 | // but the minimum length has been used by the LZMA SDK.
|
---|
230 | //
|
---|
231 | // Distance is a 32-bit unsigned integer in LZMA.
|
---|
232 | // With Java's signed int, UINT32_MAX becomes -1.
|
---|
233 | int posState = (lz.getPos() - readAhead) & posMask;
|
---|
234 | rc.encodeBit(isMatch[state.get()], posState, 1);
|
---|
235 | rc.encodeBit(isRep, state.get(), 0);
|
---|
236 | encodeMatch(-1, MATCH_LEN_MIN, posState);
|
---|
237 | }
|
---|
238 |
|
---|
239 | /**
|
---|
240 | * Compresses for LZMA2.
|
---|
241 | *
|
---|
242 | * @return true if the LZMA2 chunk became full, false otherwise
|
---|
243 | */
|
---|
244 | public boolean encodeForLZMA2() {
|
---|
245 | // LZMA2 uses RangeEncoderToBuffer so IOExceptions aren't possible.
|
---|
246 | try {
|
---|
247 | if (!lz.isStarted() && !encodeInit())
|
---|
248 | return false;
|
---|
249 |
|
---|
250 | while (uncompressedSize <= LZMA2_UNCOMPRESSED_LIMIT
|
---|
251 | && rc.getPendingSize() <= LZMA2_COMPRESSED_LIMIT)
|
---|
252 | if (!encodeSymbol())
|
---|
253 | return false;
|
---|
254 | } catch (IOException e) {
|
---|
255 | throw new Error();
|
---|
256 | }
|
---|
257 |
|
---|
258 | return true;
|
---|
259 | }
|
---|
260 |
|
---|
261 | private boolean encodeInit() throws IOException {
|
---|
262 | assert readAhead == -1;
|
---|
263 | if (!lz.hasEnoughData(0))
|
---|
264 | return false;
|
---|
265 |
|
---|
266 | // The first symbol must be a literal unless using
|
---|
267 | // a preset dictionary. This code isn't run if using
|
---|
268 | // a preset dictionary.
|
---|
269 | skip(1);
|
---|
270 | rc.encodeBit(isMatch[state.get()], 0, 0);
|
---|
271 | literalEncoder.encodeInit();
|
---|
272 |
|
---|
273 | --readAhead;
|
---|
274 | assert readAhead == -1;
|
---|
275 |
|
---|
276 | ++uncompressedSize;
|
---|
277 | assert uncompressedSize == 1;
|
---|
278 |
|
---|
279 | return true;
|
---|
280 | }
|
---|
281 |
|
---|
282 | private boolean encodeSymbol() throws IOException {
|
---|
283 | if (!lz.hasEnoughData(readAhead + 1))
|
---|
284 | return false;
|
---|
285 |
|
---|
286 | int len = getNextSymbol();
|
---|
287 |
|
---|
288 | assert readAhead >= 0;
|
---|
289 | int posState = (lz.getPos() - readAhead) & posMask;
|
---|
290 |
|
---|
291 | if (back == -1) {
|
---|
292 | // Literal i.e. eight-bit byte
|
---|
293 | assert len == 1;
|
---|
294 | rc.encodeBit(isMatch[state.get()], posState, 0);
|
---|
295 | literalEncoder.encode();
|
---|
296 | } else {
|
---|
297 | // Some type of match
|
---|
298 | rc.encodeBit(isMatch[state.get()], posState, 1);
|
---|
299 | if (back < REPS) {
|
---|
300 | // Repeated match i.e. the same distance
|
---|
301 | // has been used earlier.
|
---|
302 | assert lz.getMatchLen(-readAhead, reps[back], len) == len;
|
---|
303 | rc.encodeBit(isRep, state.get(), 1);
|
---|
304 | encodeRepMatch(back, len, posState);
|
---|
305 | } else {
|
---|
306 | // Normal match
|
---|
307 | assert lz.getMatchLen(-readAhead, back - REPS, len) == len;
|
---|
308 | rc.encodeBit(isRep, state.get(), 0);
|
---|
309 | encodeMatch(back - REPS, len, posState);
|
---|
310 | }
|
---|
311 | }
|
---|
312 |
|
---|
313 | readAhead -= len;
|
---|
314 | uncompressedSize += len;
|
---|
315 |
|
---|
316 | return true;
|
---|
317 | }
|
---|
318 |
|
---|
319 | private void encodeMatch(int dist, int len, int posState)
|
---|
320 | throws IOException {
|
---|
321 | state.updateMatch();
|
---|
322 | matchLenEncoder.encode(len, posState);
|
---|
323 |
|
---|
324 | int distSlot = getDistSlot(dist);
|
---|
325 | rc.encodeBitTree(distSlots[getDistState(len)], distSlot);
|
---|
326 |
|
---|
327 | if (distSlot >= DIST_MODEL_START) {
|
---|
328 | int footerBits = (distSlot >>> 1) - 1;
|
---|
329 | int base = (2 | (distSlot & 1)) << footerBits;
|
---|
330 | int distReduced = dist - base;
|
---|
331 |
|
---|
332 | if (distSlot < DIST_MODEL_END) {
|
---|
333 | rc.encodeReverseBitTree(
|
---|
334 | distSpecial[distSlot - DIST_MODEL_START],
|
---|
335 | distReduced);
|
---|
336 | } else {
|
---|
337 | rc.encodeDirectBits(distReduced >>> ALIGN_BITS,
|
---|
338 | footerBits - ALIGN_BITS);
|
---|
339 | rc.encodeReverseBitTree(distAlign, distReduced & ALIGN_MASK);
|
---|
340 | --alignPriceCount;
|
---|
341 | }
|
---|
342 | }
|
---|
343 |
|
---|
344 | reps[3] = reps[2];
|
---|
345 | reps[2] = reps[1];
|
---|
346 | reps[1] = reps[0];
|
---|
347 | reps[0] = dist;
|
---|
348 |
|
---|
349 | --distPriceCount;
|
---|
350 | }
|
---|
351 |
|
---|
352 | private void encodeRepMatch(int rep, int len, int posState)
|
---|
353 | throws IOException {
|
---|
354 | if (rep == 0) {
|
---|
355 | rc.encodeBit(isRep0, state.get(), 0);
|
---|
356 | rc.encodeBit(isRep0Long[state.get()], posState, len == 1 ? 0 : 1);
|
---|
357 | } else {
|
---|
358 | int dist = reps[rep];
|
---|
359 | rc.encodeBit(isRep0, state.get(), 1);
|
---|
360 |
|
---|
361 | if (rep == 1) {
|
---|
362 | rc.encodeBit(isRep1, state.get(), 0);
|
---|
363 | } else {
|
---|
364 | rc.encodeBit(isRep1, state.get(), 1);
|
---|
365 | rc.encodeBit(isRep2, state.get(), rep - 2);
|
---|
366 |
|
---|
367 | if (rep == 3)
|
---|
368 | reps[3] = reps[2];
|
---|
369 |
|
---|
370 | reps[2] = reps[1];
|
---|
371 | }
|
---|
372 |
|
---|
373 | reps[1] = reps[0];
|
---|
374 | reps[0] = dist;
|
---|
375 | }
|
---|
376 |
|
---|
377 | if (len == 1) {
|
---|
378 | state.updateShortRep();
|
---|
379 | } else {
|
---|
380 | repLenEncoder.encode(len, posState);
|
---|
381 | state.updateLongRep();
|
---|
382 | }
|
---|
383 | }
|
---|
384 |
|
---|
385 | Matches getMatches() {
|
---|
386 | ++readAhead;
|
---|
387 | Matches matches = lz.getMatches();
|
---|
388 | assert lz.verifyMatches(matches);
|
---|
389 | return matches;
|
---|
390 | }
|
---|
391 |
|
---|
392 | void skip(int len) {
|
---|
393 | readAhead += len;
|
---|
394 | lz.skip(len);
|
---|
395 | }
|
---|
396 |
|
---|
397 | int getAnyMatchPrice(State state, int posState) {
|
---|
398 | return RangeEncoder.getBitPrice(isMatch[state.get()][posState], 1);
|
---|
399 | }
|
---|
400 |
|
---|
401 | int getNormalMatchPrice(int anyMatchPrice, State state) {
|
---|
402 | return anyMatchPrice
|
---|
403 | + RangeEncoder.getBitPrice(isRep[state.get()], 0);
|
---|
404 | }
|
---|
405 |
|
---|
406 | int getAnyRepPrice(int anyMatchPrice, State state) {
|
---|
407 | return anyMatchPrice
|
---|
408 | + RangeEncoder.getBitPrice(isRep[state.get()], 1);
|
---|
409 | }
|
---|
410 |
|
---|
411 | int getShortRepPrice(int anyRepPrice, State state, int posState) {
|
---|
412 | return anyRepPrice
|
---|
413 | + RangeEncoder.getBitPrice(isRep0[state.get()], 0)
|
---|
414 | + RangeEncoder.getBitPrice(isRep0Long[state.get()][posState],
|
---|
415 | 0);
|
---|
416 | }
|
---|
417 |
|
---|
418 | int getLongRepPrice(int anyRepPrice, int rep, State state, int posState) {
|
---|
419 | int price = anyRepPrice;
|
---|
420 |
|
---|
421 | if (rep == 0) {
|
---|
422 | price += RangeEncoder.getBitPrice(isRep0[state.get()], 0)
|
---|
423 | + RangeEncoder.getBitPrice(
|
---|
424 | isRep0Long[state.get()][posState], 1);
|
---|
425 | } else {
|
---|
426 | price += RangeEncoder.getBitPrice(isRep0[state.get()], 1);
|
---|
427 |
|
---|
428 | if (rep == 1)
|
---|
429 | price += RangeEncoder.getBitPrice(isRep1[state.get()], 0);
|
---|
430 | else
|
---|
431 | price += RangeEncoder.getBitPrice(isRep1[state.get()], 1)
|
---|
432 | + RangeEncoder.getBitPrice(isRep2[state.get()],
|
---|
433 | rep - 2);
|
---|
434 | }
|
---|
435 |
|
---|
436 | return price;
|
---|
437 | }
|
---|
438 |
|
---|
439 | int getLongRepAndLenPrice(int rep, int len, State state, int posState) {
|
---|
440 | int anyMatchPrice = getAnyMatchPrice(state, posState);
|
---|
441 | int anyRepPrice = getAnyRepPrice(anyMatchPrice, state);
|
---|
442 | int longRepPrice = getLongRepPrice(anyRepPrice, rep, state, posState);
|
---|
443 | return longRepPrice + repLenEncoder.getPrice(len, posState);
|
---|
444 | }
|
---|
445 |
|
---|
446 | int getMatchAndLenPrice(int normalMatchPrice,
|
---|
447 | int dist, int len, int posState) {
|
---|
448 | int price = normalMatchPrice
|
---|
449 | + matchLenEncoder.getPrice(len, posState);
|
---|
450 | int distState = getDistState(len);
|
---|
451 |
|
---|
452 | if (dist < FULL_DISTANCES) {
|
---|
453 | price += fullDistPrices[distState][dist];
|
---|
454 | } else {
|
---|
455 | // Note that distSlotPrices includes also
|
---|
456 | // the price of direct bits.
|
---|
457 | int distSlot = getDistSlot(dist);
|
---|
458 | price += distSlotPrices[distState][distSlot]
|
---|
459 | + alignPrices[dist & ALIGN_MASK];
|
---|
460 | }
|
---|
461 |
|
---|
462 | return price;
|
---|
463 | }
|
---|
464 |
|
---|
465 | private void updateDistPrices() {
|
---|
466 | distPriceCount = DIST_PRICE_UPDATE_INTERVAL;
|
---|
467 |
|
---|
468 | for (int distState = 0; distState < DIST_STATES; ++distState) {
|
---|
469 | for (int distSlot = 0; distSlot < distSlotPricesSize; ++distSlot)
|
---|
470 | distSlotPrices[distState][distSlot]
|
---|
471 | = RangeEncoder.getBitTreePrice(
|
---|
472 | distSlots[distState], distSlot);
|
---|
473 |
|
---|
474 | for (int distSlot = DIST_MODEL_END; distSlot < distSlotPricesSize;
|
---|
475 | ++distSlot) {
|
---|
476 | int count = (distSlot >>> 1) - 1 - ALIGN_BITS;
|
---|
477 | distSlotPrices[distState][distSlot]
|
---|
478 | += RangeEncoder.getDirectBitsPrice(count);
|
---|
479 | }
|
---|
480 |
|
---|
481 | for (int dist = 0; dist < DIST_MODEL_START; ++dist)
|
---|
482 | fullDistPrices[distState][dist]
|
---|
483 | = distSlotPrices[distState][dist];
|
---|
484 | }
|
---|
485 |
|
---|
486 | int dist = DIST_MODEL_START;
|
---|
487 | for (int distSlot = DIST_MODEL_START; distSlot < DIST_MODEL_END;
|
---|
488 | ++distSlot) {
|
---|
489 | int footerBits = (distSlot >>> 1) - 1;
|
---|
490 | int base = (2 | (distSlot & 1)) << footerBits;
|
---|
491 |
|
---|
492 | int limit = distSpecial[distSlot - DIST_MODEL_START].length;
|
---|
493 | for (int i = 0; i < limit; ++i) {
|
---|
494 | int distReduced = dist - base;
|
---|
495 | int price = RangeEncoder.getReverseBitTreePrice(
|
---|
496 | distSpecial[distSlot - DIST_MODEL_START],
|
---|
497 | distReduced);
|
---|
498 |
|
---|
499 | for (int distState = 0; distState < DIST_STATES; ++distState)
|
---|
500 | fullDistPrices[distState][dist]
|
---|
501 | = distSlotPrices[distState][distSlot] + price;
|
---|
502 |
|
---|
503 | ++dist;
|
---|
504 | }
|
---|
505 | }
|
---|
506 |
|
---|
507 | assert dist == FULL_DISTANCES;
|
---|
508 | }
|
---|
509 |
|
---|
510 | private void updateAlignPrices() {
|
---|
511 | alignPriceCount = ALIGN_PRICE_UPDATE_INTERVAL;
|
---|
512 |
|
---|
513 | for (int i = 0; i < ALIGN_SIZE; ++i)
|
---|
514 | alignPrices[i] = RangeEncoder.getReverseBitTreePrice(distAlign,
|
---|
515 | i);
|
---|
516 | }
|
---|
517 |
|
---|
518 | /**
|
---|
519 | * Updates the lookup tables used for calculating match distance
|
---|
520 | * and length prices. The updating is skipped for performance reasons
|
---|
521 | * if the tables haven't changed much since the previous update.
|
---|
522 | */
|
---|
523 | void updatePrices() {
|
---|
524 | if (distPriceCount <= 0)
|
---|
525 | updateDistPrices();
|
---|
526 |
|
---|
527 | if (alignPriceCount <= 0)
|
---|
528 | updateAlignPrices();
|
---|
529 |
|
---|
530 | matchLenEncoder.updatePrices();
|
---|
531 | repLenEncoder.updatePrices();
|
---|
532 | }
|
---|
533 |
|
---|
534 |
|
---|
535 | class LiteralEncoder extends LiteralCoder {
|
---|
536 | private final LiteralSubencoder[] subencoders;
|
---|
537 |
|
---|
538 | LiteralEncoder(int lc, int lp) {
|
---|
539 | super(lc, lp);
|
---|
540 |
|
---|
541 | subencoders = new LiteralSubencoder[1 << (lc + lp)];
|
---|
542 | for (int i = 0; i < subencoders.length; ++i)
|
---|
543 | subencoders[i] = new LiteralSubencoder();
|
---|
544 | }
|
---|
545 |
|
---|
546 | void reset() {
|
---|
547 | for (int i = 0; i < subencoders.length; ++i)
|
---|
548 | subencoders[i].reset();
|
---|
549 | }
|
---|
550 |
|
---|
551 | void encodeInit() throws IOException {
|
---|
552 | // When encoding the first byte of the stream, there is
|
---|
553 | // no previous byte in the dictionary so the encode function
|
---|
554 | // wouldn't work.
|
---|
555 | assert readAhead >= 0;
|
---|
556 | subencoders[0].encode();
|
---|
557 | }
|
---|
558 |
|
---|
559 | void encode() throws IOException {
|
---|
560 | assert readAhead >= 0;
|
---|
561 | int i = getSubcoderIndex(lz.getByte(1 + readAhead),
|
---|
562 | lz.getPos() - readAhead);
|
---|
563 | subencoders[i].encode();
|
---|
564 | }
|
---|
565 |
|
---|
566 | int getPrice(int curByte, int matchByte,
|
---|
567 | int prevByte, int pos, State state) {
|
---|
568 | int price = RangeEncoder.getBitPrice(
|
---|
569 | isMatch[state.get()][pos & posMask], 0);
|
---|
570 |
|
---|
571 | int i = getSubcoderIndex(prevByte, pos);
|
---|
572 | price += state.isLiteral()
|
---|
573 | ? subencoders[i].getNormalPrice(curByte)
|
---|
574 | : subencoders[i].getMatchedPrice(curByte, matchByte);
|
---|
575 |
|
---|
576 | return price;
|
---|
577 | }
|
---|
578 |
|
---|
579 | private class LiteralSubencoder extends LiteralSubcoder {
|
---|
580 | void encode() throws IOException {
|
---|
581 | int symbol = lz.getByte(readAhead) | 0x100;
|
---|
582 |
|
---|
583 | if (state.isLiteral()) {
|
---|
584 | int subencoderIndex;
|
---|
585 | int bit;
|
---|
586 |
|
---|
587 | do {
|
---|
588 | subencoderIndex = symbol >>> 8;
|
---|
589 | bit = (symbol >>> 7) & 1;
|
---|
590 | rc.encodeBit(probs, subencoderIndex, bit);
|
---|
591 | symbol <<= 1;
|
---|
592 | } while (symbol < 0x10000);
|
---|
593 |
|
---|
594 | } else {
|
---|
595 | int matchByte = lz.getByte(reps[0] + 1 + readAhead);
|
---|
596 | int offset = 0x100;
|
---|
597 | int subencoderIndex;
|
---|
598 | int matchBit;
|
---|
599 | int bit;
|
---|
600 |
|
---|
601 | do {
|
---|
602 | matchByte <<= 1;
|
---|
603 | matchBit = matchByte & offset;
|
---|
604 | subencoderIndex = offset + matchBit + (symbol >>> 8);
|
---|
605 | bit = (symbol >>> 7) & 1;
|
---|
606 | rc.encodeBit(probs, subencoderIndex, bit);
|
---|
607 | symbol <<= 1;
|
---|
608 | offset &= ~(matchByte ^ symbol);
|
---|
609 | } while (symbol < 0x10000);
|
---|
610 | }
|
---|
611 |
|
---|
612 | state.updateLiteral();
|
---|
613 | }
|
---|
614 |
|
---|
615 | int getNormalPrice(int symbol) {
|
---|
616 | int price = 0;
|
---|
617 | int subencoderIndex;
|
---|
618 | int bit;
|
---|
619 |
|
---|
620 | symbol |= 0x100;
|
---|
621 |
|
---|
622 | do {
|
---|
623 | subencoderIndex = symbol >>> 8;
|
---|
624 | bit = (symbol >>> 7) & 1;
|
---|
625 | price += RangeEncoder.getBitPrice(probs[subencoderIndex],
|
---|
626 | bit);
|
---|
627 | symbol <<= 1;
|
---|
628 | } while (symbol < (0x100 << 8));
|
---|
629 |
|
---|
630 | return price;
|
---|
631 | }
|
---|
632 |
|
---|
633 | int getMatchedPrice(int symbol, int matchByte) {
|
---|
634 | int price = 0;
|
---|
635 | int offset = 0x100;
|
---|
636 | int subencoderIndex;
|
---|
637 | int matchBit;
|
---|
638 | int bit;
|
---|
639 |
|
---|
640 | symbol |= 0x100;
|
---|
641 |
|
---|
642 | do {
|
---|
643 | matchByte <<= 1;
|
---|
644 | matchBit = matchByte & offset;
|
---|
645 | subencoderIndex = offset + matchBit + (symbol >>> 8);
|
---|
646 | bit = (symbol >>> 7) & 1;
|
---|
647 | price += RangeEncoder.getBitPrice(probs[subencoderIndex],
|
---|
648 | bit);
|
---|
649 | symbol <<= 1;
|
---|
650 | offset &= ~(matchByte ^ symbol);
|
---|
651 | } while (symbol < (0x100 << 8));
|
---|
652 |
|
---|
653 | return price;
|
---|
654 | }
|
---|
655 | }
|
---|
656 | }
|
---|
657 |
|
---|
658 |
|
---|
659 | class LengthEncoder extends LengthCoder {
|
---|
660 | /**
|
---|
661 | * The prices are updated after at least
|
---|
662 | * <code>PRICE_UPDATE_INTERVAL</code> many lengths
|
---|
663 | * have been encoded with the same posState.
|
---|
664 | */
|
---|
665 | private static final int PRICE_UPDATE_INTERVAL = 32; // FIXME?
|
---|
666 |
|
---|
667 | private final int[] counters;
|
---|
668 | private final int[][] prices;
|
---|
669 |
|
---|
670 | LengthEncoder(int pb, int niceLen) {
|
---|
671 | int posStates = 1 << pb;
|
---|
672 | counters = new int[posStates];
|
---|
673 |
|
---|
674 | // Always allocate at least LOW_SYMBOLS + MID_SYMBOLS because
|
---|
675 | // it makes updatePrices slightly simpler. The prices aren't
|
---|
676 | // usually needed anyway if niceLen < 18.
|
---|
677 | int lenSymbols = Math.max(niceLen - MATCH_LEN_MIN + 1,
|
---|
678 | LOW_SYMBOLS + MID_SYMBOLS);
|
---|
679 | prices = new int[posStates][lenSymbols];
|
---|
680 | }
|
---|
681 |
|
---|
682 | void reset() {
|
---|
683 | super.reset();
|
---|
684 |
|
---|
685 | // Reset counters to zero to force price update before
|
---|
686 | // the prices are needed.
|
---|
687 | for (int i = 0; i < counters.length; ++i)
|
---|
688 | counters[i] = 0;
|
---|
689 | }
|
---|
690 |
|
---|
691 | void encode(int len, int posState) throws IOException {
|
---|
692 | len -= MATCH_LEN_MIN;
|
---|
693 |
|
---|
694 | if (len < LOW_SYMBOLS) {
|
---|
695 | rc.encodeBit(choice, 0, 0);
|
---|
696 | rc.encodeBitTree(low[posState], len);
|
---|
697 | } else {
|
---|
698 | rc.encodeBit(choice, 0, 1);
|
---|
699 | len -= LOW_SYMBOLS;
|
---|
700 |
|
---|
701 | if (len < MID_SYMBOLS) {
|
---|
702 | rc.encodeBit(choice, 1, 0);
|
---|
703 | rc.encodeBitTree(mid[posState], len);
|
---|
704 | } else {
|
---|
705 | rc.encodeBit(choice, 1, 1);
|
---|
706 | rc.encodeBitTree(high, len - MID_SYMBOLS);
|
---|
707 | }
|
---|
708 | }
|
---|
709 |
|
---|
710 | --counters[posState];
|
---|
711 | }
|
---|
712 |
|
---|
713 | int getPrice(int len, int posState) {
|
---|
714 | return prices[posState][len - MATCH_LEN_MIN];
|
---|
715 | }
|
---|
716 |
|
---|
717 | void updatePrices() {
|
---|
718 | for (int posState = 0; posState < counters.length; ++posState) {
|
---|
719 | if (counters[posState] <= 0) {
|
---|
720 | counters[posState] = PRICE_UPDATE_INTERVAL;
|
---|
721 | updatePrices(posState);
|
---|
722 | }
|
---|
723 | }
|
---|
724 | }
|
---|
725 |
|
---|
726 | private void updatePrices(int posState) {
|
---|
727 | int choice0Price = RangeEncoder.getBitPrice(choice[0], 0);
|
---|
728 |
|
---|
729 | int i = 0;
|
---|
730 | for (; i < LOW_SYMBOLS; ++i)
|
---|
731 | prices[posState][i] = choice0Price
|
---|
732 | + RangeEncoder.getBitTreePrice(low[posState], i);
|
---|
733 |
|
---|
734 | choice0Price = RangeEncoder.getBitPrice(choice[0], 1);
|
---|
735 | int choice1Price = RangeEncoder.getBitPrice(choice[1], 0);
|
---|
736 |
|
---|
737 | for (; i < LOW_SYMBOLS + MID_SYMBOLS; ++i)
|
---|
738 | prices[posState][i] = choice0Price + choice1Price
|
---|
739 | + RangeEncoder.getBitTreePrice(mid[posState],
|
---|
740 | i - LOW_SYMBOLS);
|
---|
741 |
|
---|
742 | choice1Price = RangeEncoder.getBitPrice(choice[1], 1);
|
---|
743 |
|
---|
744 | for (; i < prices[posState].length; ++i)
|
---|
745 | prices[posState][i] = choice0Price + choice1Price
|
---|
746 | + RangeEncoder.getBitTreePrice(high, i - LOW_SYMBOLS
|
---|
747 | - MID_SYMBOLS);
|
---|
748 | }
|
---|
749 | }
|
---|
750 | }
|
---|