Context Navigation

← Previous Revision
Latest Revision
Next Revision →
Blame
Revision Log

source: osm/applications/editors/josm/plugins/imagerycache/src/org/mapdb/CompressLZF.java@ 29363

Last change on this file since 29363 was 29363, checked in by akks, 11 years ago
JOSM/ImageryCache: Initial commit
File size: 13.9 KB

Line
1	/*
2	* Copyright 2004-2011 H2 Group. Multiple-Licensed under the H2 License,
3	* Version 1.0, and under the Eclipse Public License, Version 1.0
4	* (http://h2database.com/html/license.html).
5	*
6	* This code is based on the LZF algorithm from Marc Lehmann. It is a
7	* re-implementation of the C code:
8	* http://cvs.schmorp.de/liblzf/lzf_c.c?view=markup
9	* http://cvs.schmorp.de/liblzf/lzf_d.c?view=markup
10	*
11	* According to a mail from Marc Lehmann, it's OK to use his algorithm:
12	* Date: 2010-07-15 15:57
13	* Subject: Re: Question about LZF licensing
14	* ...
15	* The algorithm is not copyrighted (and cannot be copyrighted afaik) - as long
16	* as you wrote everything yourself, without copying my code, that's just fine
17	* (looking is of course fine too).
18	* ...
19	*
20	* Still I would like to keep his copyright info:
21	*
22	* Copyright (c) 2000-2005 Marc Alexander Lehmann <schmorp@schmorp.de>
23	* Copyright (c) 2005 Oren J. Maurice <oymaurice@hazorea.org.il>
24	*
25	* Redistribution and use in source and binary forms, with or without
26	* modification, are permitted provided that the following conditions are met:
27	*
28	* 1. Redistributions of source code must retain the above copyright notice,
29	* this list of conditions and the following disclaimer.
30	*
31	* 2. Redistributions in binary form must reproduce the above copyright
32	* notice, this list of conditions and the following disclaimer in the
33	* documentation and/or other materials provided with the distribution.
34	*
35	* 3. The name of the author may not be used to endorse or promote products
36	* derived from this software without specific prior written permission.
37	*
38	* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ''AS IS'' AND ANY EXPRESS OR IMPLIED
39	* WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
40	* MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
41	* EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
42	* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
43	* PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
44	* OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
45	* WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
46	* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
47	* OF THE POSSIBILITY OF SUCH DAMAGE.
48	*/
49
50	package org.mapdb;
51
52	import java.io.DataInput;
53	import java.io.DataOutput;
54	import java.io.IOException;
55	import java.io.Serializable;
56	import java.util.Arrays;
57
58	/**
59	* <p>
60	* This class implements the LZF lossless data compression algorithm. LZF is a
61	* Lempel-Ziv variant with byte-aligned output, and optimized for speed.
62	* </p>
63	* <p>
64	* Safety/Use Notes:
65	* </p>
66	* <ul>
67	* <li>Each instance should be used by a single thread only.</li>
68	* <li>The data buffers should be smaller than 1 GB.</li>
69	* <li>For performance reasons, safety checks on expansion are omitted.</li>
70	* <li>Invalid compressed data can cause an ArrayIndexOutOfBoundsException.</li>
71	* </ul>
72	* <p>
73	* The LZF compressed format knows literal runs and back-references:
74	* </p>
75	* <ul>
76	* <li>Literal run: directly copy bytes from input to output.</li>
77	* <li>Back-reference: copy previous data to output stream, with specified
78	* offset from location and length. The length is at least 3 bytes.</li>
79	* </ul>
80	*<p>
81	* The first byte of the compressed stream is the control byte. For literal
82	* runs, the highest three bits of the control byte are not set, the the lower
83	* bits are the literal run length, and the next bytes are data to copy directly
84	* into the output. For back-references, the highest three bits of the control
85	* byte are the back-reference length. If all three bits are set, then the
86	* back-reference length is stored in the next byte. The lower bits of the
87	* control byte combined with the next byte form the offset for the
88	* back-reference.
89	* </p>
90	*/
91	public final class CompressLZF{
92
93	/**
94	* The number of entries in the hash table. The size is a trade-off between
95	* hash collisions (reduced compression) and speed (amount that fits in CPU
96	* cache).
97	*/
98	private static final int HASH_SIZE = 1 << 14;
99
100	/**
101	* The maximum number of literals in a chunk (32).
102	*/
103	private static final int MAX_LITERAL = 1 << 5;
104
105	/**
106	* The maximum offset allowed for a back-reference (8192).
107	*/
108	private static final int MAX_OFF = 1 << 13;
109
110	/**
111	* The maximum back-reference length (264).
112	*/
113	private static final int MAX_REF = (1 << 8) + (1 << 3);
114
115	/**
116	* Hash table for matching byte sequences (reused for performance).
117	*/
118	private int[] cachedHashTable;
119
120	/**
121	* Return byte with lower 2 bytes being byte at index, then index+1.
122	*/
123	private static int first(byte[] in, int inPos) {
124	return (in[inPos] << 8) \| (in[inPos + 1] & 255);
125	}
126
127	/**
128	* Shift v 1 byte left, add value at index inPos+2.
129	*/
130	private static int next(int v, byte[] in, int inPos) {
131	return (v << 8) \| (in[inPos + 2] & 255);
132	}
133
134	/**
135	* Compute the address in the hash table.
136	*/
137	private static int hash(int h) {
138	return ((h * 2777) >> 9) & (HASH_SIZE - 1);
139	}
140
141	public int compress(byte[] in, int inLen, byte[] out, int outPos) {
142	int inPos = 0;
143	if (cachedHashTable == null) {
144	cachedHashTable = new int[HASH_SIZE];
145	}
146	int[] hashTab = cachedHashTable;
147	int literals = 0;
148	outPos++;
149	int future = first(in, 0);
150	while (inPos < inLen - 4) {
151	byte p2 = in[inPos + 2];
152	// next
153	future = (future << 8) + (p2 & 255);
154	int off = hash(future);
155	int ref = hashTab[off];
156	hashTab[off] = inPos;
157	// if (ref < inPos
158	// && ref > 0
159	// && (off = inPos - ref - 1) < MAX_OFF
160	// && in[ref + 2] == p2
161	// && (((in[ref] & 255) << 8) \| (in[ref + 1] & 255)) ==
162	// ((future >> 8) & 0xffff)) {
163	if (ref < inPos
164	&& ref > 0
165	&& (off = inPos - ref - 1) < MAX_OFF
166	&& in[ref + 2] == p2
167	&& in[ref + 1] == (byte) (future >> 8)
168	&& in[ref] == (byte) (future >> 16)) {
169	// match
170	int maxLen = inLen - inPos - 2;
171	if (maxLen > MAX_REF) {
172	maxLen = MAX_REF;
173	}
174	if (literals == 0) {
175	// multiple back-references,
176	// so there is no literal run control byte
177	outPos--;
178	} else {
179	// set the control byte at the start of the literal run
180	// to store the number of literals
181	out[outPos - literals - 1] = (byte) (literals - 1);
182	literals = 0;
183	}
184	int len = 3;
185	while (len < maxLen && in[ref + len] == in[inPos + len]) {
186	len++;
187	}
188	len -= 2;
189	if (len < 7) {
190	out[outPos++] = (byte) ((off >> 8) + (len << 5));
191	} else {
192	out[outPos++] = (byte) ((off >> 8) + (7 << 5));
193	out[outPos++] = (byte) (len - 7);
194	}
195	out[outPos++] = (byte) off;
196	// move one byte forward to allow for a literal run control byte
197	outPos++;
198	inPos += len;
199	// rebuild the future, and store the last bytes to the hashtable.
200	// Storing hashes of the last bytes in back-reference improves
201	// the compression ratio and only reduces speed slightly.
202	future = first(in, inPos);
203	future = next(future, in, inPos);
204	hashTab[hash(future)] = inPos++;
205	future = next(future, in, inPos);
206	hashTab[hash(future)] = inPos++;
207	} else {
208	// copy one byte from input to output as part of literal
209	out[outPos++] = in[inPos++];
210	literals++;
211	// at the end of this literal chunk, write the length
212	// to the control byte and start a new chunk
213	if (literals == MAX_LITERAL) {
214	out[outPos - literals - 1] = (byte) (literals - 1);
215	literals = 0;
216	// move ahead one byte to allow for the
217	// literal run control byte
218	outPos++;
219	}
220	}
221	}
222	// write the remaining few bytes as literals
223	while (inPos < inLen) {
224	out[outPos++] = in[inPos++];
225	literals++;
226	if (literals == MAX_LITERAL) {
227	out[outPos - literals - 1] = (byte) (literals - 1);
228	literals = 0;
229	outPos++;
230	}
231	}
232	// writes the final literal run length to the control byte
233	out[outPos - literals - 1] = (byte) (literals - 1);
234	if (literals == 0) {
235	outPos--;
236	}
237	return outPos;
238	}
239
240	public void expand(byte[] in, int inPos, int inLen, byte[] out, int outPos, int outLen) {
241	// if ((inPos \| outPos \| outLen) < 0) {
242	if (inPos < 0 \|\| outPos < 0 \|\| outLen < 0) {
243	throw new IllegalArgumentException();
244	}
245	do {
246	int ctrl = in[inPos++] & 255;
247	if (ctrl < MAX_LITERAL) {
248	// literal run of length = ctrl + 1,
249	ctrl++;
250	// copy to output and move forward this many bytes
251	System.arraycopy(in, inPos, out, outPos, ctrl);
252	outPos += ctrl;
253	inPos += ctrl;
254	} else {
255	// back reference
256	// the highest 3 bits are the match length
257	int len = ctrl >> 5;
258	// if the length is maxed, add the next byte to the length
259	if (len == 7) {
260	len += in[inPos++] & 255;
261	}
262	// minimum back-reference is 3 bytes,
263	// so 2 was subtracted before storing size
264	len += 2;
265
266	// ctrl is now the offset for a back-reference...
267	// the logical AND operation removes the length bits
268	ctrl = -((ctrl & 0x1f) << 8) - 1;
269
270	// the next byte augments/increases the offset
271	ctrl -= in[inPos++] & 255;
272
273	// copy the back-reference bytes from the given
274	// location in output to current position
275	ctrl += outPos;
276	if (outPos + len >= out.length) {
277	// reduce array bounds checking
278	throw new ArrayIndexOutOfBoundsException();
279	}
280	for (int i = 0; i < len; i++) {
281	out[outPos++] = out[ctrl++];
282	}
283	}
284	} while (outPos < outLen);
285	}
286
287
288	public static final Serializer<byte[]> SERIALIZER = new Serializer<byte[]>() {
289
290	final ThreadLocal<CompressLZF> LZF = new ThreadLocal<CompressLZF>() {
291	@Override
292	protected CompressLZF initialValue() {
293	return new CompressLZF();
294	}
295	};
296
297	@Override
298	public void serialize(DataOutput out, byte[] value) throws IOException {
299	if (value == null) return;
300
301	CompressLZF lzf = LZF.get();
302	byte[] outbuf = new byte[value.length + 40];
303	int len = lzf.compress(value, value.length, outbuf, 0);
304	//check if compressed data are larger then original
305	if (value.length <= len) {
306	//in this case do not compress data, write 0 as indicator
307	Utils.packInt(out, 0);
308	out.write(value);
309	} else {
310	Utils.packInt(out, value.length); //write original decompressed size
311	out.write(outbuf, 0, len);
312	}
313	}
314
315	@Override
316	public byte[] deserialize(DataInput in, int available) throws IOException {
317	if (available == 0) return null;
318	//get original decompressed size
319	DataInput2 in2 = (DataInput2) in;
320	int origPos = in2.pos;
321	int expendedLen = Utils.unpackInt(in);
322	byte[] inbuf = new byte[available - (in2.pos - origPos)];
323	in.readFully(inbuf);
324	if (expendedLen == 0) {
325	//special case, data are not compressed
326	return inbuf;
327	}
328	byte[] outbuf = new byte[expendedLen + 40];
329
330	CompressLZF lzf = LZF.get();
331	lzf.expand(inbuf, 0, inbuf.length, outbuf, 0, expendedLen);
332	outbuf = Arrays.copyOf(outbuf, expendedLen);
333
334	return outbuf;
335	}
336
337
338	};
339
340	/**
341	* Wraps existing serializer and compresses its input/output
342	*/
343	public static <E> Serializer<E> serializerCompressWrapper(Serializer<E> serializer) {
344	return new SerializerCompressWrapper<E>(serializer);
345	}
346
347
348	protected static class SerializerCompressWrapper<E> implements Serializer<E>, Serializable {
349	protected final Serializer<E> serializer;
350	public SerializerCompressWrapper(Serializer<E> serializer) {
351	this.serializer = serializer;
352	}
353
354	@Override
355	public void serialize(DataOutput out, E value) throws IOException {
356	//serialize to byte[]
357	DataOutput2 out2 = new DataOutput2();
358	serializer.serialize(out2, value);
359	byte[] b = out2.copyBytes();
360	CompressLZF.SERIALIZER.serialize(out, b);
361	}
362
363	@Override
364	public E deserialize(DataInput in, int available) throws IOException {
365	byte[] b = CompressLZF.SERIALIZER.deserialize(in, available);
366	DataInput2 in2 = new DataInput2(b);
367	return serializer.deserialize(in2, b.length);
368	}
369	}
370
371	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: