source: josm/trunk/src/org/tukaani/xz/lz/LZEncoder.java@ 13573

Last change on this file since 13573 was 13350, checked in by stoecker, 7 years ago

see #15816 - add XZ support

File size: 13.9 KB
Line 
1/*
2 * LZEncoder
3 *
4 * Authors: Lasse Collin <lasse.collin@tukaani.org>
5 * Igor Pavlov <http://7-zip.org/>
6 *
7 * This file has been put into the public domain.
8 * You can do whatever you want with this file.
9 */
10
11package org.tukaani.xz.lz;
12
13import java.io.OutputStream;
14import java.io.IOException;
15import org.tukaani.xz.ArrayCache;
16
17public abstract class LZEncoder {
18 public static final int MF_HC4 = 0x04;
19 public static final int MF_BT4 = 0x14;
20
21 /**
22 * Number of bytes to keep available before the current byte
23 * when moving the LZ window.
24 */
25 private final int keepSizeBefore;
26
27 /**
28 * Number of bytes that must be available, the current byte included,
29 * to make hasEnoughData return true. Flushing and finishing are
30 * naturally exceptions to this since there cannot be any data after
31 * the end of the uncompressed input.
32 */
33 private final int keepSizeAfter;
34
35 final int matchLenMax;
36 final int niceLen;
37
38 final byte[] buf;
39 final int bufSize; // To avoid buf.length with an array-cached buf.
40
41 int readPos = -1;
42 private int readLimit = -1;
43 private boolean finishing = false;
44 private int writePos = 0;
45 private int pendingSize = 0;
46
47 static void normalize(int[] positions, int positionsCount,
48 int normalizationOffset) {
49 for (int i = 0; i < positionsCount; ++i) {
50 if (positions[i] <= normalizationOffset)
51 positions[i] = 0;
52 else
53 positions[i] -= normalizationOffset;
54 }
55 }
56
57 /**
58 * Gets the size of the LZ window buffer that needs to be allocated.
59 */
60 private static int getBufSize(
61 int dictSize, int extraSizeBefore, int extraSizeAfter,
62 int matchLenMax) {
63 int keepSizeBefore = extraSizeBefore + dictSize;
64 int keepSizeAfter = extraSizeAfter + matchLenMax;
65 int reserveSize = Math.min(dictSize / 2 + (256 << 10), 512 << 20);
66 return keepSizeBefore + keepSizeAfter + reserveSize;
67 }
68
69 /**
70 * Gets approximate memory usage of the LZEncoder base structure and
71 * the match finder as kibibytes.
72 */
73 public static int getMemoryUsage(
74 int dictSize, int extraSizeBefore, int extraSizeAfter,
75 int matchLenMax, int mf) {
76 // Buffer size + a little extra
77 int m = getBufSize(dictSize, extraSizeBefore, extraSizeAfter,
78 matchLenMax) / 1024 + 10;
79
80 switch (mf) {
81 case MF_HC4:
82 m += HC4.getMemoryUsage(dictSize);
83 break;
84
85 case MF_BT4:
86 m += BT4.getMemoryUsage(dictSize);
87 break;
88
89 default:
90 throw new IllegalArgumentException();
91 }
92
93 return m;
94 }
95
96 /**
97 * Creates a new LZEncoder.
98 * <p>
99 * @param dictSize dictionary size
100 *
101 * @param extraSizeBefore
102 * number of bytes to keep available in the
103 * history in addition to dictSize
104 *
105 * @param extraSizeAfter
106 * number of bytes that must be available
107 * after current position + matchLenMax
108 *
109 * @param niceLen if a match of at least <code>niceLen</code>
110 * bytes is found, be happy with it and don't
111 * stop looking for longer matches
112 *
113 * @param matchLenMax don't test for matches longer than
114 * <code>matchLenMax</code> bytes
115 *
116 * @param mf match finder ID
117 *
118 * @param depthLimit match finder search depth limit
119 */
120 public static LZEncoder getInstance(
121 int dictSize, int extraSizeBefore, int extraSizeAfter,
122 int niceLen, int matchLenMax, int mf, int depthLimit,
123 ArrayCache arrayCache) {
124 switch (mf) {
125 case MF_HC4:
126 return new HC4(dictSize, extraSizeBefore, extraSizeAfter,
127 niceLen, matchLenMax, depthLimit, arrayCache);
128
129 case MF_BT4:
130 return new BT4(dictSize, extraSizeBefore, extraSizeAfter,
131 niceLen, matchLenMax, depthLimit, arrayCache);
132 }
133
134 throw new IllegalArgumentException();
135 }
136
137 /**
138 * Creates a new LZEncoder. See <code>getInstance</code>.
139 */
140 LZEncoder(int dictSize, int extraSizeBefore, int extraSizeAfter,
141 int niceLen, int matchLenMax, ArrayCache arrayCache) {
142 bufSize = getBufSize(dictSize, extraSizeBefore, extraSizeAfter,
143 matchLenMax);
144 buf = arrayCache.getByteArray(bufSize, false);
145
146 keepSizeBefore = extraSizeBefore + dictSize;
147 keepSizeAfter = extraSizeAfter + matchLenMax;
148
149 this.matchLenMax = matchLenMax;
150 this.niceLen = niceLen;
151 }
152
153 public void putArraysToCache(ArrayCache arrayCache) {
154 arrayCache.putArray(buf);
155 }
156
157 /**
158 * Sets a preset dictionary. If a preset dictionary is wanted, this
159 * function must be called immediately after creating the LZEncoder
160 * before any data has been encoded.
161 */
162 public void setPresetDict(int dictSize, byte[] presetDict) {
163 assert !isStarted();
164 assert writePos == 0;
165
166 if (presetDict != null) {
167 // If the preset dictionary buffer is bigger than the dictionary
168 // size, copy only the tail of the preset dictionary.
169 int copySize = Math.min(presetDict.length, dictSize);
170 int offset = presetDict.length - copySize;
171 System.arraycopy(presetDict, offset, buf, 0, copySize);
172 writePos += copySize;
173 skip(copySize);
174 }
175 }
176
177 /**
178 * Moves data from the end of the buffer to the beginning, discarding
179 * old data and making space for new input.
180 */
181 private void moveWindow() {
182 // Align the move to a multiple of 16 bytes. LZMA2 needs this
183 // because it uses the lowest bits from readPos to get the
184 // alignment of the uncompressed data.
185 int moveOffset = (readPos + 1 - keepSizeBefore) & ~15;
186 int moveSize = writePos - moveOffset;
187 System.arraycopy(buf, moveOffset, buf, 0, moveSize);
188
189 readPos -= moveOffset;
190 readLimit -= moveOffset;
191 writePos -= moveOffset;
192 }
193
194 /**
195 * Copies new data into the LZEncoder's buffer.
196 */
197 public int fillWindow(byte[] in, int off, int len) {
198 assert !finishing;
199
200 // Move the sliding window if needed.
201 if (readPos >= bufSize - keepSizeAfter)
202 moveWindow();
203
204 // Try to fill the dictionary buffer. If it becomes full,
205 // some of the input bytes may be left unused.
206 if (len > bufSize - writePos)
207 len = bufSize - writePos;
208
209 System.arraycopy(in, off, buf, writePos, len);
210 writePos += len;
211
212 // Set the new readLimit but only if there's enough data to allow
213 // encoding of at least one more byte.
214 if (writePos >= keepSizeAfter)
215 readLimit = writePos - keepSizeAfter;
216
217 processPendingBytes();
218
219 // Tell the caller how much input we actually copied into
220 // the dictionary.
221 return len;
222 }
223
224 /**
225 * Process pending bytes remaining from preset dictionary initialization
226 * or encoder flush operation.
227 */
228 private void processPendingBytes() {
229 // After flushing or setting a preset dictionary there will be
230 // pending data that hasn't been ran through the match finder yet.
231 // Run it through the match finder now if there is enough new data
232 // available (readPos < readLimit) that the encoder may encode at
233 // least one more input byte. This way we don't waste any time
234 // looping in the match finder (and marking the same bytes as
235 // pending again) if the application provides very little new data
236 // per write call.
237 if (pendingSize > 0 && readPos < readLimit) {
238 readPos -= pendingSize;
239 int oldPendingSize = pendingSize;
240 pendingSize = 0;
241 skip(oldPendingSize);
242 assert pendingSize < oldPendingSize;
243 }
244 }
245
246 /**
247 * Returns true if at least one byte has already been run through
248 * the match finder.
249 */
250 public boolean isStarted() {
251 return readPos != -1;
252 }
253
254 /**
255 * Marks that all the input needs to be made available in
256 * the encoded output.
257 */
258 public void setFlushing() {
259 readLimit = writePos - 1;
260 processPendingBytes();
261 }
262
263 /**
264 * Marks that there is no more input remaining. The read position
265 * can be advanced until the end of the data.
266 */
267 public void setFinishing() {
268 readLimit = writePos - 1;
269 finishing = true;
270 processPendingBytes();
271 }
272
273 /**
274 * Tests if there is enough input available to let the caller encode
275 * at least one more byte.
276 */
277 public boolean hasEnoughData(int alreadyReadLen) {
278 return readPos - alreadyReadLen < readLimit;
279 }
280
281 public void copyUncompressed(OutputStream out, int backward, int len)
282 throws IOException {
283 out.write(buf, readPos + 1 - backward, len);
284 }
285
286 /**
287 * Get the number of bytes available, including the current byte.
288 * <p>
289 * Note that the result is undefined if <code>getMatches</code> or
290 * <code>skip</code> hasn't been called yet and no preset dictionary
291 * is being used.
292 */
293 public int getAvail() {
294 assert isStarted();
295 return writePos - readPos;
296 }
297
298 /**
299 * Gets the lowest four bits of the absolute offset of the current byte.
300 * Bits other than the lowest four are undefined.
301 */
302 public int getPos() {
303 return readPos;
304 }
305
306 /**
307 * Gets the byte from the given backward offset.
308 * <p>
309 * The current byte is at <code>0</code>, the previous byte
310 * at <code>1</code> etc. To get a byte at zero-based distance,
311 * use <code>getByte(dist + 1)<code>.
312 * <p>
313 * This function is equivalent to <code>getByte(0, backward)</code>.
314 */
315 public int getByte(int backward) {
316 return buf[readPos - backward] & 0xFF;
317 }
318
319 /**
320 * Gets the byte from the given forward minus backward offset.
321 * The forward offset is added to the current position. This lets
322 * one read bytes ahead of the current byte.
323 */
324 public int getByte(int forward, int backward) {
325 return buf[readPos + forward - backward] & 0xFF;
326 }
327
328 /**
329 * Get the length of a match at the given distance.
330 *
331 * @param dist zero-based distance of the match to test
332 * @param lenLimit don't test for a match longer than this
333 *
334 * @return length of the match; it is in the range [0, lenLimit]
335 */
336 public int getMatchLen(int dist, int lenLimit) {
337 int backPos = readPos - dist - 1;
338 int len = 0;
339
340 while (len < lenLimit && buf[readPos + len] == buf[backPos + len])
341 ++len;
342
343 return len;
344 }
345
346 /**
347 * Get the length of a match at the given distance and forward offset.
348 *
349 * @param forward forward offset
350 * @param dist zero-based distance of the match to test
351 * @param lenLimit don't test for a match longer than this
352 *
353 * @return length of the match; it is in the range [0, lenLimit]
354 */
355 public int getMatchLen(int forward, int dist, int lenLimit) {
356 int curPos = readPos + forward;
357 int backPos = curPos - dist - 1;
358 int len = 0;
359
360 while (len < lenLimit && buf[curPos + len] == buf[backPos + len])
361 ++len;
362
363 return len;
364 }
365
366 /**
367 * Verifies that the matches returned by the match finder are valid.
368 * This is meant to be used in an assert statement. This is totally
369 * useless for actual encoding since match finder's results should
370 * naturally always be valid if it isn't broken.
371 *
372 * @param matches return value from <code>getMatches</code>
373 *
374 * @return true if matches are valid, false if match finder is broken
375 */
376 public boolean verifyMatches(Matches matches) {
377 int lenLimit = Math.min(getAvail(), matchLenMax);
378
379 for (int i = 0; i < matches.count; ++i)
380 if (getMatchLen(matches.dist[i], lenLimit) != matches.len[i])
381 return false;
382
383 return true;
384 }
385
386 /**
387 * Moves to the next byte, checks if there is enough input available,
388 * and returns the amount of input available.
389 *
390 * @param requiredForFlushing
391 * minimum number of available bytes when
392 * flushing; encoding may be continued with
393 * new input after flushing
394 * @param requiredForFinishing
395 * minimum number of available bytes when
396 * finishing; encoding must not be continued
397 * after finishing or the match finder state
398 * may be corrupt
399 *
400 * @return the number of bytes available or zero if there
401 * is not enough input available
402 */
403 int movePos(int requiredForFlushing, int requiredForFinishing) {
404 assert requiredForFlushing >= requiredForFinishing;
405
406 ++readPos;
407 int avail = writePos - readPos;
408
409 if (avail < requiredForFlushing) {
410 if (avail < requiredForFinishing || !finishing) {
411 ++pendingSize;
412 avail = 0;
413 }
414 }
415
416 return avail;
417 }
418
419 /**
420 * Runs match finder for the next byte and returns the matches found.
421 */
422 public abstract Matches getMatches();
423
424 /**
425 * Skips the given number of bytes in the match finder.
426 */
427 public abstract void skip(int len);
428}
Note: See TracBrowser for help on using the repository browser.