Context Navigation

← Previous Revision
Latest Revision
Next Revision →
Blame
Revision Log

source: josm/trunk/src/org/tukaani/xz/lz/LZEncoder.java@ 15948

Last change on this file since 15948 was 13350, checked in by stoecker, 7 years ago
see #15816 - add XZ support
File size: 13.9 KB

Line
1	/*
2	* LZEncoder
3	*
4	* Authors: Lasse Collin <lasse.collin@tukaani.org>
5	* Igor Pavlov <http://7-zip.org/>
6	*
7	* This file has been put into the public domain.
8	* You can do whatever you want with this file.
9	*/
10
11	package org.tukaani.xz.lz;
12
13	import java.io.OutputStream;
14	import java.io.IOException;
15	import org.tukaani.xz.ArrayCache;
16
17	public abstract class LZEncoder {
18	public static final int MF_HC4 = 0x04;
19	public static final int MF_BT4 = 0x14;
20
21	/**
22	* Number of bytes to keep available before the current byte
23	* when moving the LZ window.
24	*/
25	private final int keepSizeBefore;
26
27	/**
28	* Number of bytes that must be available, the current byte included,
29	* to make hasEnoughData return true. Flushing and finishing are
30	* naturally exceptions to this since there cannot be any data after
31	* the end of the uncompressed input.
32	*/
33	private final int keepSizeAfter;
34
35	final int matchLenMax;
36	final int niceLen;
37
38	final byte[] buf;
39	final int bufSize; // To avoid buf.length with an array-cached buf.
40
41	int readPos = -1;
42	private int readLimit = -1;
43	private boolean finishing = false;
44	private int writePos = 0;
45	private int pendingSize = 0;
46
47	static void normalize(int[] positions, int positionsCount,
48	int normalizationOffset) {
49	for (int i = 0; i < positionsCount; ++i) {
50	if (positions[i] <= normalizationOffset)
51	positions[i] = 0;
52	else
53	positions[i] -= normalizationOffset;
54	}
55	}
56
57	/**
58	* Gets the size of the LZ window buffer that needs to be allocated.
59	*/
60	private static int getBufSize(
61	int dictSize, int extraSizeBefore, int extraSizeAfter,
62	int matchLenMax) {
63	int keepSizeBefore = extraSizeBefore + dictSize;
64	int keepSizeAfter = extraSizeAfter + matchLenMax;
65	int reserveSize = Math.min(dictSize / 2 + (256 << 10), 512 << 20);
66	return keepSizeBefore + keepSizeAfter + reserveSize;
67	}
68
69	/**
70	* Gets approximate memory usage of the LZEncoder base structure and
71	* the match finder as kibibytes.
72	*/
73	public static int getMemoryUsage(
74	int dictSize, int extraSizeBefore, int extraSizeAfter,
75	int matchLenMax, int mf) {
76	// Buffer size + a little extra
77	int m = getBufSize(dictSize, extraSizeBefore, extraSizeAfter,
78	matchLenMax) / 1024 + 10;
79
80	switch (mf) {
81	case MF_HC4:
82	m += HC4.getMemoryUsage(dictSize);
83	break;
84
85	case MF_BT4:
86	m += BT4.getMemoryUsage(dictSize);
87	break;
88
89	default:
90	throw new IllegalArgumentException();
91	}
92
93	return m;
94	}
95
96	/**
97	* Creates a new LZEncoder.
98	* <p>
99	* @param dictSize dictionary size
100	*
101	* @param extraSizeBefore
102	* number of bytes to keep available in the
103	* history in addition to dictSize
104	*
105	* @param extraSizeAfter
106	* number of bytes that must be available
107	* after current position + matchLenMax
108	*
109	* @param niceLen if a match of at least <code>niceLen</code>
110	* bytes is found, be happy with it and don't
111	* stop looking for longer matches
112	*
113	* @param matchLenMax don't test for matches longer than
114	* <code>matchLenMax</code> bytes
115	*
116	* @param mf match finder ID
117	*
118	* @param depthLimit match finder search depth limit
119	*/
120	public static LZEncoder getInstance(
121	int dictSize, int extraSizeBefore, int extraSizeAfter,
122	int niceLen, int matchLenMax, int mf, int depthLimit,
123	ArrayCache arrayCache) {
124	switch (mf) {
125	case MF_HC4:
126	return new HC4(dictSize, extraSizeBefore, extraSizeAfter,
127	niceLen, matchLenMax, depthLimit, arrayCache);
128
129	case MF_BT4:
130	return new BT4(dictSize, extraSizeBefore, extraSizeAfter,
131	niceLen, matchLenMax, depthLimit, arrayCache);
132	}
133
134	throw new IllegalArgumentException();
135	}
136
137	/**
138	* Creates a new LZEncoder. See <code>getInstance</code>.
139	*/
140	LZEncoder(int dictSize, int extraSizeBefore, int extraSizeAfter,
141	int niceLen, int matchLenMax, ArrayCache arrayCache) {
142	bufSize = getBufSize(dictSize, extraSizeBefore, extraSizeAfter,
143	matchLenMax);
144	buf = arrayCache.getByteArray(bufSize, false);
145
146	keepSizeBefore = extraSizeBefore + dictSize;
147	keepSizeAfter = extraSizeAfter + matchLenMax;
148
149	this.matchLenMax = matchLenMax;
150	this.niceLen = niceLen;
151	}
152
153	public void putArraysToCache(ArrayCache arrayCache) {
154	arrayCache.putArray(buf);
155	}
156
157	/**
158	* Sets a preset dictionary. If a preset dictionary is wanted, this
159	* function must be called immediately after creating the LZEncoder
160	* before any data has been encoded.
161	*/
162	public void setPresetDict(int dictSize, byte[] presetDict) {
163	assert !isStarted();
164	assert writePos == 0;
165
166	if (presetDict != null) {
167	// If the preset dictionary buffer is bigger than the dictionary
168	// size, copy only the tail of the preset dictionary.
169	int copySize = Math.min(presetDict.length, dictSize);
170	int offset = presetDict.length - copySize;
171	System.arraycopy(presetDict, offset, buf, 0, copySize);
172	writePos += copySize;
173	skip(copySize);
174	}
175	}
176
177	/**
178	* Moves data from the end of the buffer to the beginning, discarding
179	* old data and making space for new input.
180	*/
181	private void moveWindow() {
182	// Align the move to a multiple of 16 bytes. LZMA2 needs this
183	// because it uses the lowest bits from readPos to get the
184	// alignment of the uncompressed data.
185	int moveOffset = (readPos + 1 - keepSizeBefore) & ~15;
186	int moveSize = writePos - moveOffset;
187	System.arraycopy(buf, moveOffset, buf, 0, moveSize);
188
189	readPos -= moveOffset;
190	readLimit -= moveOffset;
191	writePos -= moveOffset;
192	}
193
194	/**
195	* Copies new data into the LZEncoder's buffer.
196	*/
197	public int fillWindow(byte[] in, int off, int len) {
198	assert !finishing;
199
200	// Move the sliding window if needed.
201	if (readPos >= bufSize - keepSizeAfter)
202	moveWindow();
203
204	// Try to fill the dictionary buffer. If it becomes full,
205	// some of the input bytes may be left unused.
206	if (len > bufSize - writePos)
207	len = bufSize - writePos;
208
209	System.arraycopy(in, off, buf, writePos, len);
210	writePos += len;
211
212	// Set the new readLimit but only if there's enough data to allow
213	// encoding of at least one more byte.
214	if (writePos >= keepSizeAfter)
215	readLimit = writePos - keepSizeAfter;
216
217	processPendingBytes();
218
219	// Tell the caller how much input we actually copied into
220	// the dictionary.
221	return len;
222	}
223
224	/**
225	* Process pending bytes remaining from preset dictionary initialization
226	* or encoder flush operation.
227	*/
228	private void processPendingBytes() {
229	// After flushing or setting a preset dictionary there will be
230	// pending data that hasn't been ran through the match finder yet.
231	// Run it through the match finder now if there is enough new data
232	// available (readPos < readLimit) that the encoder may encode at
233	// least one more input byte. This way we don't waste any time
234	// looping in the match finder (and marking the same bytes as
235	// pending again) if the application provides very little new data
236	// per write call.
237	if (pendingSize > 0 && readPos < readLimit) {
238	readPos -= pendingSize;
239	int oldPendingSize = pendingSize;
240	pendingSize = 0;
241	skip(oldPendingSize);
242	assert pendingSize < oldPendingSize;
243	}
244	}
245
246	/**
247	* Returns true if at least one byte has already been run through
248	* the match finder.
249	*/
250	public boolean isStarted() {
251	return readPos != -1;
252	}
253
254	/**
255	* Marks that all the input needs to be made available in
256	* the encoded output.
257	*/
258	public void setFlushing() {
259	readLimit = writePos - 1;
260	processPendingBytes();
261	}
262
263	/**
264	* Marks that there is no more input remaining. The read position
265	* can be advanced until the end of the data.
266	*/
267	public void setFinishing() {
268	readLimit = writePos - 1;
269	finishing = true;
270	processPendingBytes();
271	}
272
273	/**
274	* Tests if there is enough input available to let the caller encode
275	* at least one more byte.
276	*/
277	public boolean hasEnoughData(int alreadyReadLen) {
278	return readPos - alreadyReadLen < readLimit;
279	}
280
281	public void copyUncompressed(OutputStream out, int backward, int len)
282	throws IOException {
283	out.write(buf, readPos + 1 - backward, len);
284	}
285
286	/**
287	* Get the number of bytes available, including the current byte.
288	* <p>
289	* Note that the result is undefined if <code>getMatches</code> or
290	* <code>skip</code> hasn't been called yet and no preset dictionary
291	* is being used.
292	*/
293	public int getAvail() {
294	assert isStarted();
295	return writePos - readPos;
296	}
297
298	/**
299	* Gets the lowest four bits of the absolute offset of the current byte.
300	* Bits other than the lowest four are undefined.
301	*/
302	public int getPos() {
303	return readPos;
304	}
305
306	/**
307	* Gets the byte from the given backward offset.
308	* <p>
309	* The current byte is at <code>0</code>, the previous byte
310	* at <code>1</code> etc. To get a byte at zero-based distance,
311	* use <code>getByte(dist + 1)<code>.
312	* <p>
313	* This function is equivalent to <code>getByte(0, backward)</code>.
314	*/
315	public int getByte(int backward) {
316	return buf[readPos - backward] & 0xFF;
317	}
318
319	/**
320	* Gets the byte from the given forward minus backward offset.
321	* The forward offset is added to the current position. This lets
322	* one read bytes ahead of the current byte.
323	*/
324	public int getByte(int forward, int backward) {
325	return buf[readPos + forward - backward] & 0xFF;
326	}
327
328	/**
329	* Get the length of a match at the given distance.
330	*
331	* @param dist zero-based distance of the match to test
332	* @param lenLimit don't test for a match longer than this
333	*
334	* @return length of the match; it is in the range [0, lenLimit]
335	*/
336	public int getMatchLen(int dist, int lenLimit) {
337	int backPos = readPos - dist - 1;
338	int len = 0;
339
340	while (len < lenLimit && buf[readPos + len] == buf[backPos + len])
341	++len;
342
343	return len;
344	}
345
346	/**
347	* Get the length of a match at the given distance and forward offset.
348	*
349	* @param forward forward offset
350	* @param dist zero-based distance of the match to test
351	* @param lenLimit don't test for a match longer than this
352	*
353	* @return length of the match; it is in the range [0, lenLimit]
354	*/
355	public int getMatchLen(int forward, int dist, int lenLimit) {
356	int curPos = readPos + forward;
357	int backPos = curPos - dist - 1;
358	int len = 0;
359
360	while (len < lenLimit && buf[curPos + len] == buf[backPos + len])
361	++len;
362
363	return len;
364	}
365
366	/**
367	* Verifies that the matches returned by the match finder are valid.
368	* This is meant to be used in an assert statement. This is totally
369	* useless for actual encoding since match finder's results should
370	* naturally always be valid if it isn't broken.
371	*
372	* @param matches return value from <code>getMatches</code>
373	*
374	* @return true if matches are valid, false if match finder is broken
375	*/
376	public boolean verifyMatches(Matches matches) {
377	int lenLimit = Math.min(getAvail(), matchLenMax);
378
379	for (int i = 0; i < matches.count; ++i)
380	if (getMatchLen(matches.dist[i], lenLimit) != matches.len[i])
381	return false;
382
383	return true;
384	}
385
386	/**
387	* Moves to the next byte, checks if there is enough input available,
388	* and returns the amount of input available.
389	*
390	* @param requiredForFlushing
391	* minimum number of available bytes when
392	* flushing; encoding may be continued with
393	* new input after flushing
394	* @param requiredForFinishing
395	* minimum number of available bytes when
396	* finishing; encoding must not be continued
397	* after finishing or the match finder state
398	* may be corrupt
399	*
400	* @return the number of bytes available or zero if there
401	* is not enough input available
402	*/
403	int movePos(int requiredForFlushing, int requiredForFinishing) {
404	assert requiredForFlushing >= requiredForFinishing;
405
406	++readPos;
407	int avail = writePos - readPos;
408
409	if (avail < requiredForFlushing) {
410	if (avail < requiredForFinishing \|\| !finishing) {
411	++pendingSize;
412	avail = 0;
413	}
414	}
415
416	return avail;
417	}
418
419	/**
420	* Runs match finder for the next byte and returns the matches found.
421	*/
422	public abstract Matches getMatches();
423
424	/**
425	* Skips the given number of bytes in the match finder.
426	*/
427	public abstract void skip(int len);
428	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: