1 | /*
|
---|
2 | * LZEncoder
|
---|
3 | *
|
---|
4 | * Authors: Lasse Collin <lasse.collin@tukaani.org>
|
---|
5 | * Igor Pavlov <http://7-zip.org/>
|
---|
6 | *
|
---|
7 | * This file has been put into the public domain.
|
---|
8 | * You can do whatever you want with this file.
|
---|
9 | */
|
---|
10 |
|
---|
11 | package org.tukaani.xz.lz;
|
---|
12 |
|
---|
13 | import java.io.OutputStream;
|
---|
14 | import java.io.IOException;
|
---|
15 | import org.tukaani.xz.ArrayCache;
|
---|
16 |
|
---|
17 | public abstract class LZEncoder {
|
---|
18 | public static final int MF_HC4 = 0x04;
|
---|
19 | public static final int MF_BT4 = 0x14;
|
---|
20 |
|
---|
21 | /**
|
---|
22 | * Number of bytes to keep available before the current byte
|
---|
23 | * when moving the LZ window.
|
---|
24 | */
|
---|
25 | private final int keepSizeBefore;
|
---|
26 |
|
---|
27 | /**
|
---|
28 | * Number of bytes that must be available, the current byte included,
|
---|
29 | * to make hasEnoughData return true. Flushing and finishing are
|
---|
30 | * naturally exceptions to this since there cannot be any data after
|
---|
31 | * the end of the uncompressed input.
|
---|
32 | */
|
---|
33 | private final int keepSizeAfter;
|
---|
34 |
|
---|
35 | final int matchLenMax;
|
---|
36 | final int niceLen;
|
---|
37 |
|
---|
38 | final byte[] buf;
|
---|
39 | final int bufSize; // To avoid buf.length with an array-cached buf.
|
---|
40 |
|
---|
41 | int readPos = -1;
|
---|
42 | private int readLimit = -1;
|
---|
43 | private boolean finishing = false;
|
---|
44 | private int writePos = 0;
|
---|
45 | private int pendingSize = 0;
|
---|
46 |
|
---|
47 | static void normalize(int[] positions, int positionsCount,
|
---|
48 | int normalizationOffset) {
|
---|
49 | for (int i = 0; i < positionsCount; ++i) {
|
---|
50 | if (positions[i] <= normalizationOffset)
|
---|
51 | positions[i] = 0;
|
---|
52 | else
|
---|
53 | positions[i] -= normalizationOffset;
|
---|
54 | }
|
---|
55 | }
|
---|
56 |
|
---|
57 | /**
|
---|
58 | * Gets the size of the LZ window buffer that needs to be allocated.
|
---|
59 | */
|
---|
60 | private static int getBufSize(
|
---|
61 | int dictSize, int extraSizeBefore, int extraSizeAfter,
|
---|
62 | int matchLenMax) {
|
---|
63 | int keepSizeBefore = extraSizeBefore + dictSize;
|
---|
64 | int keepSizeAfter = extraSizeAfter + matchLenMax;
|
---|
65 | int reserveSize = Math.min(dictSize / 2 + (256 << 10), 512 << 20);
|
---|
66 | return keepSizeBefore + keepSizeAfter + reserveSize;
|
---|
67 | }
|
---|
68 |
|
---|
69 | /**
|
---|
70 | * Gets approximate memory usage of the LZEncoder base structure and
|
---|
71 | * the match finder as kibibytes.
|
---|
72 | */
|
---|
73 | public static int getMemoryUsage(
|
---|
74 | int dictSize, int extraSizeBefore, int extraSizeAfter,
|
---|
75 | int matchLenMax, int mf) {
|
---|
76 | // Buffer size + a little extra
|
---|
77 | int m = getBufSize(dictSize, extraSizeBefore, extraSizeAfter,
|
---|
78 | matchLenMax) / 1024 + 10;
|
---|
79 |
|
---|
80 | switch (mf) {
|
---|
81 | case MF_HC4:
|
---|
82 | m += HC4.getMemoryUsage(dictSize);
|
---|
83 | break;
|
---|
84 |
|
---|
85 | case MF_BT4:
|
---|
86 | m += BT4.getMemoryUsage(dictSize);
|
---|
87 | break;
|
---|
88 |
|
---|
89 | default:
|
---|
90 | throw new IllegalArgumentException();
|
---|
91 | }
|
---|
92 |
|
---|
93 | return m;
|
---|
94 | }
|
---|
95 |
|
---|
96 | /**
|
---|
97 | * Creates a new LZEncoder.
|
---|
98 | * <p>
|
---|
99 | * @param dictSize dictionary size
|
---|
100 | *
|
---|
101 | * @param extraSizeBefore
|
---|
102 | * number of bytes to keep available in the
|
---|
103 | * history in addition to dictSize
|
---|
104 | *
|
---|
105 | * @param extraSizeAfter
|
---|
106 | * number of bytes that must be available
|
---|
107 | * after current position + matchLenMax
|
---|
108 | *
|
---|
109 | * @param niceLen if a match of at least <code>niceLen</code>
|
---|
110 | * bytes is found, be happy with it and don't
|
---|
111 | * stop looking for longer matches
|
---|
112 | *
|
---|
113 | * @param matchLenMax don't test for matches longer than
|
---|
114 | * <code>matchLenMax</code> bytes
|
---|
115 | *
|
---|
116 | * @param mf match finder ID
|
---|
117 | *
|
---|
118 | * @param depthLimit match finder search depth limit
|
---|
119 | */
|
---|
120 | public static LZEncoder getInstance(
|
---|
121 | int dictSize, int extraSizeBefore, int extraSizeAfter,
|
---|
122 | int niceLen, int matchLenMax, int mf, int depthLimit,
|
---|
123 | ArrayCache arrayCache) {
|
---|
124 | switch (mf) {
|
---|
125 | case MF_HC4:
|
---|
126 | return new HC4(dictSize, extraSizeBefore, extraSizeAfter,
|
---|
127 | niceLen, matchLenMax, depthLimit, arrayCache);
|
---|
128 |
|
---|
129 | case MF_BT4:
|
---|
130 | return new BT4(dictSize, extraSizeBefore, extraSizeAfter,
|
---|
131 | niceLen, matchLenMax, depthLimit, arrayCache);
|
---|
132 | }
|
---|
133 |
|
---|
134 | throw new IllegalArgumentException();
|
---|
135 | }
|
---|
136 |
|
---|
137 | /**
|
---|
138 | * Creates a new LZEncoder. See <code>getInstance</code>.
|
---|
139 | */
|
---|
140 | LZEncoder(int dictSize, int extraSizeBefore, int extraSizeAfter,
|
---|
141 | int niceLen, int matchLenMax, ArrayCache arrayCache) {
|
---|
142 | bufSize = getBufSize(dictSize, extraSizeBefore, extraSizeAfter,
|
---|
143 | matchLenMax);
|
---|
144 | buf = arrayCache.getByteArray(bufSize, false);
|
---|
145 |
|
---|
146 | keepSizeBefore = extraSizeBefore + dictSize;
|
---|
147 | keepSizeAfter = extraSizeAfter + matchLenMax;
|
---|
148 |
|
---|
149 | this.matchLenMax = matchLenMax;
|
---|
150 | this.niceLen = niceLen;
|
---|
151 | }
|
---|
152 |
|
---|
153 | public void putArraysToCache(ArrayCache arrayCache) {
|
---|
154 | arrayCache.putArray(buf);
|
---|
155 | }
|
---|
156 |
|
---|
157 | /**
|
---|
158 | * Sets a preset dictionary. If a preset dictionary is wanted, this
|
---|
159 | * function must be called immediately after creating the LZEncoder
|
---|
160 | * before any data has been encoded.
|
---|
161 | */
|
---|
162 | public void setPresetDict(int dictSize, byte[] presetDict) {
|
---|
163 | assert !isStarted();
|
---|
164 | assert writePos == 0;
|
---|
165 |
|
---|
166 | if (presetDict != null) {
|
---|
167 | // If the preset dictionary buffer is bigger than the dictionary
|
---|
168 | // size, copy only the tail of the preset dictionary.
|
---|
169 | int copySize = Math.min(presetDict.length, dictSize);
|
---|
170 | int offset = presetDict.length - copySize;
|
---|
171 | System.arraycopy(presetDict, offset, buf, 0, copySize);
|
---|
172 | writePos += copySize;
|
---|
173 | skip(copySize);
|
---|
174 | }
|
---|
175 | }
|
---|
176 |
|
---|
177 | /**
|
---|
178 | * Moves data from the end of the buffer to the beginning, discarding
|
---|
179 | * old data and making space for new input.
|
---|
180 | */
|
---|
181 | private void moveWindow() {
|
---|
182 | // Align the move to a multiple of 16 bytes. LZMA2 needs this
|
---|
183 | // because it uses the lowest bits from readPos to get the
|
---|
184 | // alignment of the uncompressed data.
|
---|
185 | int moveOffset = (readPos + 1 - keepSizeBefore) & ~15;
|
---|
186 | int moveSize = writePos - moveOffset;
|
---|
187 | System.arraycopy(buf, moveOffset, buf, 0, moveSize);
|
---|
188 |
|
---|
189 | readPos -= moveOffset;
|
---|
190 | readLimit -= moveOffset;
|
---|
191 | writePos -= moveOffset;
|
---|
192 | }
|
---|
193 |
|
---|
194 | /**
|
---|
195 | * Copies new data into the LZEncoder's buffer.
|
---|
196 | */
|
---|
197 | public int fillWindow(byte[] in, int off, int len) {
|
---|
198 | assert !finishing;
|
---|
199 |
|
---|
200 | // Move the sliding window if needed.
|
---|
201 | if (readPos >= bufSize - keepSizeAfter)
|
---|
202 | moveWindow();
|
---|
203 |
|
---|
204 | // Try to fill the dictionary buffer. If it becomes full,
|
---|
205 | // some of the input bytes may be left unused.
|
---|
206 | if (len > bufSize - writePos)
|
---|
207 | len = bufSize - writePos;
|
---|
208 |
|
---|
209 | System.arraycopy(in, off, buf, writePos, len);
|
---|
210 | writePos += len;
|
---|
211 |
|
---|
212 | // Set the new readLimit but only if there's enough data to allow
|
---|
213 | // encoding of at least one more byte.
|
---|
214 | if (writePos >= keepSizeAfter)
|
---|
215 | readLimit = writePos - keepSizeAfter;
|
---|
216 |
|
---|
217 | processPendingBytes();
|
---|
218 |
|
---|
219 | // Tell the caller how much input we actually copied into
|
---|
220 | // the dictionary.
|
---|
221 | return len;
|
---|
222 | }
|
---|
223 |
|
---|
224 | /**
|
---|
225 | * Process pending bytes remaining from preset dictionary initialization
|
---|
226 | * or encoder flush operation.
|
---|
227 | */
|
---|
228 | private void processPendingBytes() {
|
---|
229 | // After flushing or setting a preset dictionary there will be
|
---|
230 | // pending data that hasn't been ran through the match finder yet.
|
---|
231 | // Run it through the match finder now if there is enough new data
|
---|
232 | // available (readPos < readLimit) that the encoder may encode at
|
---|
233 | // least one more input byte. This way we don't waste any time
|
---|
234 | // looping in the match finder (and marking the same bytes as
|
---|
235 | // pending again) if the application provides very little new data
|
---|
236 | // per write call.
|
---|
237 | if (pendingSize > 0 && readPos < readLimit) {
|
---|
238 | readPos -= pendingSize;
|
---|
239 | int oldPendingSize = pendingSize;
|
---|
240 | pendingSize = 0;
|
---|
241 | skip(oldPendingSize);
|
---|
242 | assert pendingSize < oldPendingSize;
|
---|
243 | }
|
---|
244 | }
|
---|
245 |
|
---|
246 | /**
|
---|
247 | * Returns true if at least one byte has already been run through
|
---|
248 | * the match finder.
|
---|
249 | */
|
---|
250 | public boolean isStarted() {
|
---|
251 | return readPos != -1;
|
---|
252 | }
|
---|
253 |
|
---|
254 | /**
|
---|
255 | * Marks that all the input needs to be made available in
|
---|
256 | * the encoded output.
|
---|
257 | */
|
---|
258 | public void setFlushing() {
|
---|
259 | readLimit = writePos - 1;
|
---|
260 | processPendingBytes();
|
---|
261 | }
|
---|
262 |
|
---|
263 | /**
|
---|
264 | * Marks that there is no more input remaining. The read position
|
---|
265 | * can be advanced until the end of the data.
|
---|
266 | */
|
---|
267 | public void setFinishing() {
|
---|
268 | readLimit = writePos - 1;
|
---|
269 | finishing = true;
|
---|
270 | processPendingBytes();
|
---|
271 | }
|
---|
272 |
|
---|
273 | /**
|
---|
274 | * Tests if there is enough input available to let the caller encode
|
---|
275 | * at least one more byte.
|
---|
276 | */
|
---|
277 | public boolean hasEnoughData(int alreadyReadLen) {
|
---|
278 | return readPos - alreadyReadLen < readLimit;
|
---|
279 | }
|
---|
280 |
|
---|
281 | public void copyUncompressed(OutputStream out, int backward, int len)
|
---|
282 | throws IOException {
|
---|
283 | out.write(buf, readPos + 1 - backward, len);
|
---|
284 | }
|
---|
285 |
|
---|
286 | /**
|
---|
287 | * Get the number of bytes available, including the current byte.
|
---|
288 | * <p>
|
---|
289 | * Note that the result is undefined if <code>getMatches</code> or
|
---|
290 | * <code>skip</code> hasn't been called yet and no preset dictionary
|
---|
291 | * is being used.
|
---|
292 | */
|
---|
293 | public int getAvail() {
|
---|
294 | assert isStarted();
|
---|
295 | return writePos - readPos;
|
---|
296 | }
|
---|
297 |
|
---|
298 | /**
|
---|
299 | * Gets the lowest four bits of the absolute offset of the current byte.
|
---|
300 | * Bits other than the lowest four are undefined.
|
---|
301 | */
|
---|
302 | public int getPos() {
|
---|
303 | return readPos;
|
---|
304 | }
|
---|
305 |
|
---|
306 | /**
|
---|
307 | * Gets the byte from the given backward offset.
|
---|
308 | * <p>
|
---|
309 | * The current byte is at <code>0</code>, the previous byte
|
---|
310 | * at <code>1</code> etc. To get a byte at zero-based distance,
|
---|
311 | * use <code>getByte(dist + 1)<code>.
|
---|
312 | * <p>
|
---|
313 | * This function is equivalent to <code>getByte(0, backward)</code>.
|
---|
314 | */
|
---|
315 | public int getByte(int backward) {
|
---|
316 | return buf[readPos - backward] & 0xFF;
|
---|
317 | }
|
---|
318 |
|
---|
319 | /**
|
---|
320 | * Gets the byte from the given forward minus backward offset.
|
---|
321 | * The forward offset is added to the current position. This lets
|
---|
322 | * one read bytes ahead of the current byte.
|
---|
323 | */
|
---|
324 | public int getByte(int forward, int backward) {
|
---|
325 | return buf[readPos + forward - backward] & 0xFF;
|
---|
326 | }
|
---|
327 |
|
---|
328 | /**
|
---|
329 | * Get the length of a match at the given distance.
|
---|
330 | *
|
---|
331 | * @param dist zero-based distance of the match to test
|
---|
332 | * @param lenLimit don't test for a match longer than this
|
---|
333 | *
|
---|
334 | * @return length of the match; it is in the range [0, lenLimit]
|
---|
335 | */
|
---|
336 | public int getMatchLen(int dist, int lenLimit) {
|
---|
337 | int backPos = readPos - dist - 1;
|
---|
338 | int len = 0;
|
---|
339 |
|
---|
340 | while (len < lenLimit && buf[readPos + len] == buf[backPos + len])
|
---|
341 | ++len;
|
---|
342 |
|
---|
343 | return len;
|
---|
344 | }
|
---|
345 |
|
---|
346 | /**
|
---|
347 | * Get the length of a match at the given distance and forward offset.
|
---|
348 | *
|
---|
349 | * @param forward forward offset
|
---|
350 | * @param dist zero-based distance of the match to test
|
---|
351 | * @param lenLimit don't test for a match longer than this
|
---|
352 | *
|
---|
353 | * @return length of the match; it is in the range [0, lenLimit]
|
---|
354 | */
|
---|
355 | public int getMatchLen(int forward, int dist, int lenLimit) {
|
---|
356 | int curPos = readPos + forward;
|
---|
357 | int backPos = curPos - dist - 1;
|
---|
358 | int len = 0;
|
---|
359 |
|
---|
360 | while (len < lenLimit && buf[curPos + len] == buf[backPos + len])
|
---|
361 | ++len;
|
---|
362 |
|
---|
363 | return len;
|
---|
364 | }
|
---|
365 |
|
---|
366 | /**
|
---|
367 | * Verifies that the matches returned by the match finder are valid.
|
---|
368 | * This is meant to be used in an assert statement. This is totally
|
---|
369 | * useless for actual encoding since match finder's results should
|
---|
370 | * naturally always be valid if it isn't broken.
|
---|
371 | *
|
---|
372 | * @param matches return value from <code>getMatches</code>
|
---|
373 | *
|
---|
374 | * @return true if matches are valid, false if match finder is broken
|
---|
375 | */
|
---|
376 | public boolean verifyMatches(Matches matches) {
|
---|
377 | int lenLimit = Math.min(getAvail(), matchLenMax);
|
---|
378 |
|
---|
379 | for (int i = 0; i < matches.count; ++i)
|
---|
380 | if (getMatchLen(matches.dist[i], lenLimit) != matches.len[i])
|
---|
381 | return false;
|
---|
382 |
|
---|
383 | return true;
|
---|
384 | }
|
---|
385 |
|
---|
386 | /**
|
---|
387 | * Moves to the next byte, checks if there is enough input available,
|
---|
388 | * and returns the amount of input available.
|
---|
389 | *
|
---|
390 | * @param requiredForFlushing
|
---|
391 | * minimum number of available bytes when
|
---|
392 | * flushing; encoding may be continued with
|
---|
393 | * new input after flushing
|
---|
394 | * @param requiredForFinishing
|
---|
395 | * minimum number of available bytes when
|
---|
396 | * finishing; encoding must not be continued
|
---|
397 | * after finishing or the match finder state
|
---|
398 | * may be corrupt
|
---|
399 | *
|
---|
400 | * @return the number of bytes available or zero if there
|
---|
401 | * is not enough input available
|
---|
402 | */
|
---|
403 | int movePos(int requiredForFlushing, int requiredForFinishing) {
|
---|
404 | assert requiredForFlushing >= requiredForFinishing;
|
---|
405 |
|
---|
406 | ++readPos;
|
---|
407 | int avail = writePos - readPos;
|
---|
408 |
|
---|
409 | if (avail < requiredForFlushing) {
|
---|
410 | if (avail < requiredForFinishing || !finishing) {
|
---|
411 | ++pendingSize;
|
---|
412 | avail = 0;
|
---|
413 | }
|
---|
414 | }
|
---|
415 |
|
---|
416 | return avail;
|
---|
417 | }
|
---|
418 |
|
---|
419 | /**
|
---|
420 | * Runs match finder for the next byte and returns the matches found.
|
---|
421 | */
|
---|
422 | public abstract Matches getMatches();
|
---|
423 |
|
---|
424 | /**
|
---|
425 | * Skips the given number of bytes in the match finder.
|
---|
426 | */
|
---|
427 | public abstract void skip(int len);
|
---|
428 | }
|
---|