source: josm/trunk/src/org/tukaani/xz/lzma/LZMAEncoderNormal.java@ 13573

Last change on this file since 13573 was 13350, checked in by stoecker, 7 years ago

see #15816 - add XZ support

File size: 20.1 KB
Line 
1/*
2 * LZMAEncoderNormal
3 *
4 * Authors: Lasse Collin <lasse.collin@tukaani.org>
5 * Igor Pavlov <http://7-zip.org/>
6 *
7 * This file has been put into the public domain.
8 * You can do whatever you want with this file.
9 */
10
11package org.tukaani.xz.lzma;
12
13import org.tukaani.xz.ArrayCache;
14import org.tukaani.xz.lz.LZEncoder;
15import org.tukaani.xz.lz.Matches;
16import org.tukaani.xz.rangecoder.RangeEncoder;
17
18final class LZMAEncoderNormal extends LZMAEncoder {
19 private static final int OPTS = 4096;
20
21 private static final int EXTRA_SIZE_BEFORE = OPTS;
22 private static final int EXTRA_SIZE_AFTER = OPTS;
23
24 private final Optimum[] opts = new Optimum[OPTS];
25 private int optCur = 0;
26 private int optEnd = 0;
27
28 private Matches matches;
29
30 // These are fields solely to avoid allocating the objects again and
31 // again on each function call.
32 private final int[] repLens = new int[REPS];
33 private final State nextState = new State();
34
35 static int getMemoryUsage(int dictSize, int extraSizeBefore, int mf) {
36 return LZEncoder.getMemoryUsage(dictSize,
37 Math.max(extraSizeBefore, EXTRA_SIZE_BEFORE),
38 EXTRA_SIZE_AFTER, MATCH_LEN_MAX, mf)
39 + OPTS * 64 / 1024;
40 }
41
42 LZMAEncoderNormal(RangeEncoder rc, int lc, int lp, int pb,
43 int dictSize, int extraSizeBefore,
44 int niceLen, int mf, int depthLimit,
45 ArrayCache arrayCache) {
46 super(rc, LZEncoder.getInstance(dictSize,
47 Math.max(extraSizeBefore,
48 EXTRA_SIZE_BEFORE),
49 EXTRA_SIZE_AFTER,
50 niceLen, MATCH_LEN_MAX,
51 mf, depthLimit, arrayCache),
52 lc, lp, pb, dictSize, niceLen);
53
54 for (int i = 0; i < OPTS; ++i)
55 opts[i] = new Optimum();
56 }
57
58 public void reset() {
59 optCur = 0;
60 optEnd = 0;
61 super.reset();
62 }
63
64 /**
65 * Converts the opts array from backward indexes to forward indexes.
66 * Then it will be simple to get the next symbol from the array
67 * in later calls to <code>getNextSymbol()</code>.
68 */
69 private int convertOpts() {
70 optEnd = optCur;
71
72 int optPrev = opts[optCur].optPrev;
73
74 do {
75 Optimum opt = opts[optCur];
76
77 if (opt.prev1IsLiteral) {
78 opts[optPrev].optPrev = optCur;
79 opts[optPrev].backPrev = -1;
80 optCur = optPrev--;
81
82 if (opt.hasPrev2) {
83 opts[optPrev].optPrev = optPrev + 1;
84 opts[optPrev].backPrev = opt.backPrev2;
85 optCur = optPrev;
86 optPrev = opt.optPrev2;
87 }
88 }
89
90 int temp = opts[optPrev].optPrev;
91 opts[optPrev].optPrev = optCur;
92 optCur = optPrev;
93 optPrev = temp;
94 } while (optCur > 0);
95
96 optCur = opts[0].optPrev;
97 back = opts[optCur].backPrev;
98 return optCur;
99 }
100
101 int getNextSymbol() {
102 // If there are pending symbols from an earlier call to this
103 // function, return those symbols first.
104 if (optCur < optEnd) {
105 int len = opts[optCur].optPrev - optCur;
106 optCur = opts[optCur].optPrev;
107 back = opts[optCur].backPrev;
108 return len;
109 }
110
111 assert optCur == optEnd;
112 optCur = 0;
113 optEnd = 0;
114 back = -1;
115
116 if (readAhead == -1)
117 matches = getMatches();
118
119 // Get the number of bytes available in the dictionary, but
120 // not more than the maximum match length. If there aren't
121 // enough bytes remaining to encode a match at all, return
122 // immediately to encode this byte as a literal.
123 int avail = Math.min(lz.getAvail(), MATCH_LEN_MAX);
124 if (avail < MATCH_LEN_MIN)
125 return 1;
126
127 // Get the lengths of repeated matches.
128 int repBest = 0;
129 for (int rep = 0; rep < REPS; ++rep) {
130 repLens[rep] = lz.getMatchLen(reps[rep], avail);
131
132 if (repLens[rep] < MATCH_LEN_MIN) {
133 repLens[rep] = 0;
134 continue;
135 }
136
137 if (repLens[rep] > repLens[repBest])
138 repBest = rep;
139 }
140
141 // Return if the best repeated match is at least niceLen bytes long.
142 if (repLens[repBest] >= niceLen) {
143 back = repBest;
144 skip(repLens[repBest] - 1);
145 return repLens[repBest];
146 }
147
148 // Initialize mainLen and mainDist to the longest match found
149 // by the match finder.
150 int mainLen = 0;
151 int mainDist = 0;
152 if (matches.count > 0) {
153 mainLen = matches.len[matches.count - 1];
154 mainDist = matches.dist[matches.count - 1];
155
156 // Return if it is at least niceLen bytes long.
157 if (mainLen >= niceLen) {
158 back = mainDist + REPS;
159 skip(mainLen - 1);
160 return mainLen;
161 }
162 }
163
164 int curByte = lz.getByte(0);
165 int matchByte = lz.getByte(reps[0] + 1);
166
167 // If the match finder found no matches and this byte cannot be
168 // encoded as a repeated match (short or long), we must be return
169 // to have the byte encoded as a literal.
170 if (mainLen < MATCH_LEN_MIN && curByte != matchByte
171 && repLens[repBest] < MATCH_LEN_MIN)
172 return 1;
173
174
175 int pos = lz.getPos();
176 int posState = pos & posMask;
177
178 // Calculate the price of encoding the current byte as a literal.
179 {
180 int prevByte = lz.getByte(1);
181 int literalPrice = literalEncoder.getPrice(curByte, matchByte,
182 prevByte, pos, state);
183 opts[1].set1(literalPrice, 0, -1);
184 }
185
186 int anyMatchPrice = getAnyMatchPrice(state, posState);
187 int anyRepPrice = getAnyRepPrice(anyMatchPrice, state);
188
189 // If it is possible to encode this byte as a short rep, see if
190 // it is cheaper than encoding it as a literal.
191 if (matchByte == curByte) {
192 int shortRepPrice = getShortRepPrice(anyRepPrice,
193 state, posState);
194 if (shortRepPrice < opts[1].price)
195 opts[1].set1(shortRepPrice, 0, 0);
196 }
197
198 // Return if there is neither normal nor long repeated match. Use
199 // a short match instead of a literal if is is possible and cheaper.
200 optEnd = Math.max(mainLen, repLens[repBest]);
201 if (optEnd < MATCH_LEN_MIN) {
202 assert optEnd == 0 : optEnd;
203 back = opts[1].backPrev;
204 return 1;
205 }
206
207
208 // Update the lookup tables for distances and lengths before using
209 // those price calculation functions. (The price function above
210 // don't need these tables.)
211 updatePrices();
212
213 // Initialize the state and reps of this position in opts[].
214 // updateOptStateAndReps() will need these to get the new
215 // state and reps for the next byte.
216 opts[0].state.set(state);
217 System.arraycopy(reps, 0, opts[0].reps, 0, REPS);
218
219 // Initialize the prices for latter opts that will be used below.
220 for (int i = optEnd; i >= MATCH_LEN_MIN; --i)
221 opts[i].reset();
222
223 // Calculate the prices of repeated matches of all lengths.
224 for (int rep = 0; rep < REPS; ++rep) {
225 int repLen = repLens[rep];
226 if (repLen < MATCH_LEN_MIN)
227 continue;
228
229 int longRepPrice = getLongRepPrice(anyRepPrice, rep,
230 state, posState);
231 do {
232 int price = longRepPrice + repLenEncoder.getPrice(repLen,
233 posState);
234 if (price < opts[repLen].price)
235 opts[repLen].set1(price, 0, rep);
236 } while (--repLen >= MATCH_LEN_MIN);
237 }
238
239 // Calculate the prices of normal matches that are longer than rep0.
240 {
241 int len = Math.max(repLens[0] + 1, MATCH_LEN_MIN);
242 if (len <= mainLen) {
243 int normalMatchPrice = getNormalMatchPrice(anyMatchPrice,
244 state);
245
246 // Set i to the index of the shortest match that is
247 // at least len bytes long.
248 int i = 0;
249 while (len > matches.len[i])
250 ++i;
251
252 while (true) {
253 int dist = matches.dist[i];
254 int price = getMatchAndLenPrice(normalMatchPrice,
255 dist, len, posState);
256 if (price < opts[len].price)
257 opts[len].set1(price, 0, dist + REPS);
258
259 if (len == matches.len[i])
260 if (++i == matches.count)
261 break;
262
263 ++len;
264 }
265 }
266 }
267
268
269 avail = Math.min(lz.getAvail(), OPTS - 1);
270
271 // Get matches for later bytes and optimize the use of LZMA symbols
272 // by calculating the prices and picking the cheapest symbol
273 // combinations.
274 while (++optCur < optEnd) {
275 matches = getMatches();
276 if (matches.count > 0
277 && matches.len[matches.count - 1] >= niceLen)
278 break;
279
280 --avail;
281 ++pos;
282 posState = pos & posMask;
283
284 updateOptStateAndReps();
285 anyMatchPrice = opts[optCur].price
286 + getAnyMatchPrice(opts[optCur].state, posState);
287 anyRepPrice = getAnyRepPrice(anyMatchPrice, opts[optCur].state);
288
289 calc1BytePrices(pos, posState, avail, anyRepPrice);
290
291 if (avail >= MATCH_LEN_MIN) {
292 int startLen = calcLongRepPrices(pos, posState,
293 avail, anyRepPrice);
294 if (matches.count > 0)
295 calcNormalMatchPrices(pos, posState, avail,
296 anyMatchPrice, startLen);
297 }
298 }
299
300 return convertOpts();
301 }
302
303 /**
304 * Updates the state and reps for the current byte in the opts array.
305 */
306 private void updateOptStateAndReps() {
307 int optPrev = opts[optCur].optPrev;
308 assert optPrev < optCur;
309
310 if (opts[optCur].prev1IsLiteral) {
311 --optPrev;
312
313 if (opts[optCur].hasPrev2) {
314 opts[optCur].state.set(opts[opts[optCur].optPrev2].state);
315 if (opts[optCur].backPrev2 < REPS)
316 opts[optCur].state.updateLongRep();
317 else
318 opts[optCur].state.updateMatch();
319 } else {
320 opts[optCur].state.set(opts[optPrev].state);
321 }
322
323 opts[optCur].state.updateLiteral();
324 } else {
325 opts[optCur].state.set(opts[optPrev].state);
326 }
327
328 if (optPrev == optCur - 1) {
329 // Must be either a short rep or a literal.
330 assert opts[optCur].backPrev == 0 || opts[optCur].backPrev == -1;
331
332 if (opts[optCur].backPrev == 0)
333 opts[optCur].state.updateShortRep();
334 else
335 opts[optCur].state.updateLiteral();
336
337 System.arraycopy(opts[optPrev].reps, 0,
338 opts[optCur].reps, 0, REPS);
339 } else {
340 int back;
341 if (opts[optCur].prev1IsLiteral && opts[optCur].hasPrev2) {
342 optPrev = opts[optCur].optPrev2;
343 back = opts[optCur].backPrev2;
344 opts[optCur].state.updateLongRep();
345 } else {
346 back = opts[optCur].backPrev;
347 if (back < REPS)
348 opts[optCur].state.updateLongRep();
349 else
350 opts[optCur].state.updateMatch();
351 }
352
353 if (back < REPS) {
354 opts[optCur].reps[0] = opts[optPrev].reps[back];
355
356 int rep;
357 for (rep = 1; rep <= back; ++rep)
358 opts[optCur].reps[rep] = opts[optPrev].reps[rep - 1];
359
360 for (; rep < REPS; ++rep)
361 opts[optCur].reps[rep] = opts[optPrev].reps[rep];
362 } else {
363 opts[optCur].reps[0] = back - REPS;
364 System.arraycopy(opts[optPrev].reps, 0,
365 opts[optCur].reps, 1, REPS - 1);
366 }
367 }
368 }
369
370 /**
371 * Calculates prices of a literal, a short rep, and literal + rep0.
372 */
373 private void calc1BytePrices(int pos, int posState,
374 int avail, int anyRepPrice) {
375 // This will be set to true if using a literal or a short rep.
376 boolean nextIsByte = false;
377
378 int curByte = lz.getByte(0);
379 int matchByte = lz.getByte(opts[optCur].reps[0] + 1);
380
381 // Try a literal.
382 int literalPrice = opts[optCur].price
383 + literalEncoder.getPrice(curByte, matchByte, lz.getByte(1),
384 pos, opts[optCur].state);
385 if (literalPrice < opts[optCur + 1].price) {
386 opts[optCur + 1].set1(literalPrice, optCur, -1);
387 nextIsByte = true;
388 }
389
390 // Try a short rep.
391 if (matchByte == curByte && (opts[optCur + 1].optPrev == optCur
392 || opts[optCur + 1].backPrev != 0)) {
393 int shortRepPrice = getShortRepPrice(anyRepPrice,
394 opts[optCur].state,
395 posState);
396 if (shortRepPrice <= opts[optCur + 1].price) {
397 opts[optCur + 1].set1(shortRepPrice, optCur, 0);
398 nextIsByte = true;
399 }
400 }
401
402 // If neither a literal nor a short rep was the cheapest choice,
403 // try literal + long rep0.
404 if (!nextIsByte && matchByte != curByte && avail > MATCH_LEN_MIN) {
405 int lenLimit = Math.min(niceLen, avail - 1);
406 int len = lz.getMatchLen(1, opts[optCur].reps[0], lenLimit);
407
408 if (len >= MATCH_LEN_MIN) {
409 nextState.set(opts[optCur].state);
410 nextState.updateLiteral();
411 int nextPosState = (pos + 1) & posMask;
412 int price = literalPrice
413 + getLongRepAndLenPrice(0, len,
414 nextState, nextPosState);
415
416 int i = optCur + 1 + len;
417 while (optEnd < i)
418 opts[++optEnd].reset();
419
420 if (price < opts[i].price)
421 opts[i].set2(price, optCur, 0);
422 }
423 }
424 }
425
426 /**
427 * Calculates prices of long rep and long rep + literal + rep0.
428 */
429 private int calcLongRepPrices(int pos, int posState,
430 int avail, int anyRepPrice) {
431 int startLen = MATCH_LEN_MIN;
432 int lenLimit = Math.min(avail, niceLen);
433
434 for (int rep = 0; rep < REPS; ++rep) {
435 int len = lz.getMatchLen(opts[optCur].reps[rep], lenLimit);
436 if (len < MATCH_LEN_MIN)
437 continue;
438
439 while (optEnd < optCur + len)
440 opts[++optEnd].reset();
441
442 int longRepPrice = getLongRepPrice(anyRepPrice, rep,
443 opts[optCur].state, posState);
444
445 for (int i = len; i >= MATCH_LEN_MIN; --i) {
446 int price = longRepPrice
447 + repLenEncoder.getPrice(i, posState);
448 if (price < opts[optCur + i].price)
449 opts[optCur + i].set1(price, optCur, rep);
450 }
451
452 if (rep == 0)
453 startLen = len + 1;
454
455 int len2Limit = Math.min(niceLen, avail - len - 1);
456 int len2 = lz.getMatchLen(len + 1, opts[optCur].reps[rep],
457 len2Limit);
458
459 if (len2 >= MATCH_LEN_MIN) {
460 // Rep
461 int price = longRepPrice
462 + repLenEncoder.getPrice(len, posState);
463 nextState.set(opts[optCur].state);
464 nextState.updateLongRep();
465
466 // Literal
467 int curByte = lz.getByte(len, 0);
468 int matchByte = lz.getByte(0); // lz.getByte(len, len)
469 int prevByte = lz.getByte(len, 1);
470 price += literalEncoder.getPrice(curByte, matchByte, prevByte,
471 pos + len, nextState);
472 nextState.updateLiteral();
473
474 // Rep0
475 int nextPosState = (pos + len + 1) & posMask;
476 price += getLongRepAndLenPrice(0, len2,
477 nextState, nextPosState);
478
479 int i = optCur + len + 1 + len2;
480 while (optEnd < i)
481 opts[++optEnd].reset();
482
483 if (price < opts[i].price)
484 opts[i].set3(price, optCur, rep, len, 0);
485 }
486 }
487
488 return startLen;
489 }
490
491 /**
492 * Calculates prices of a normal match and normal match + literal + rep0.
493 */
494 private void calcNormalMatchPrices(int pos, int posState, int avail,
495 int anyMatchPrice, int startLen) {
496 // If the longest match is so long that it would not fit into
497 // the opts array, shorten the matches.
498 if (matches.len[matches.count - 1] > avail) {
499 matches.count = 0;
500 while (matches.len[matches.count] < avail)
501 ++matches.count;
502
503 matches.len[matches.count++] = avail;
504 }
505
506 if (matches.len[matches.count - 1] < startLen)
507 return;
508
509 while (optEnd < optCur + matches.len[matches.count - 1])
510 opts[++optEnd].reset();
511
512 int normalMatchPrice = getNormalMatchPrice(anyMatchPrice,
513 opts[optCur].state);
514
515 int match = 0;
516 while (startLen > matches.len[match])
517 ++match;
518
519 for (int len = startLen; ; ++len) {
520 int dist = matches.dist[match];
521
522 // Calculate the price of a match of len bytes from the nearest
523 // possible distance.
524 int matchAndLenPrice = getMatchAndLenPrice(normalMatchPrice,
525 dist, len, posState);
526 if (matchAndLenPrice < opts[optCur + len].price)
527 opts[optCur + len].set1(matchAndLenPrice,
528 optCur, dist + REPS);
529
530 if (len != matches.len[match])
531 continue;
532
533 // Try match + literal + rep0. First get the length of the rep0.
534 int len2Limit = Math.min(niceLen, avail - len - 1);
535 int len2 = lz.getMatchLen(len + 1, dist, len2Limit);
536
537 if (len2 >= MATCH_LEN_MIN) {
538 nextState.set(opts[optCur].state);
539 nextState.updateMatch();
540
541 // Literal
542 int curByte = lz.getByte(len, 0);
543 int matchByte = lz.getByte(0); // lz.getByte(len, len)
544 int prevByte = lz.getByte(len, 1);
545 int price = matchAndLenPrice
546 + literalEncoder.getPrice(curByte, matchByte,
547 prevByte, pos + len,
548 nextState);
549 nextState.updateLiteral();
550
551 // Rep0
552 int nextPosState = (pos + len + 1) & posMask;
553 price += getLongRepAndLenPrice(0, len2,
554 nextState, nextPosState);
555
556 int i = optCur + len + 1 + len2;
557 while (optEnd < i)
558 opts[++optEnd].reset();
559
560 if (price < opts[i].price)
561 opts[i].set3(price, optCur, dist + REPS, len, 0);
562 }
563
564 if (++match == matches.count)
565 break;
566 }
567 }
568}
Note: See TracBrowser for help on using the repository browser.