source: osm/applications/editors/josm/plugins/imagerycache/src/org/mapdb/CompressLZF.java@ 29363

Last change on this file since 29363 was 29363, checked in by akks, 11 years ago

JOSM/ImageryCache: Initial commit

File size: 13.9 KB
Line 
1/*
2 * Copyright 2004-2011 H2 Group. Multiple-Licensed under the H2 License,
3 * Version 1.0, and under the Eclipse Public License, Version 1.0
4 * (http://h2database.com/html/license.html).
5 *
6 * This code is based on the LZF algorithm from Marc Lehmann. It is a
7 * re-implementation of the C code:
8 * http://cvs.schmorp.de/liblzf/lzf_c.c?view=markup
9 * http://cvs.schmorp.de/liblzf/lzf_d.c?view=markup
10 *
11 * According to a mail from Marc Lehmann, it's OK to use his algorithm:
12 * Date: 2010-07-15 15:57
13 * Subject: Re: Question about LZF licensing
14 * ...
15 * The algorithm is not copyrighted (and cannot be copyrighted afaik) - as long
16 * as you wrote everything yourself, without copying my code, that's just fine
17 * (looking is of course fine too).
18 * ...
19 *
20 * Still I would like to keep his copyright info:
21 *
22 * Copyright (c) 2000-2005 Marc Alexander Lehmann <schmorp@schmorp.de>
23 * Copyright (c) 2005 Oren J. Maurice <oymaurice@hazorea.org.il>
24 *
25 * Redistribution and use in source and binary forms, with or without
26 * modification, are permitted provided that the following conditions are met:
27 *
28 * 1. Redistributions of source code must retain the above copyright notice,
29 * this list of conditions and the following disclaimer.
30 *
31 * 2. Redistributions in binary form must reproduce the above copyright
32 * notice, this list of conditions and the following disclaimer in the
33 * documentation and/or other materials provided with the distribution.
34 *
35 * 3. The name of the author may not be used to endorse or promote products
36 * derived from this software without specific prior written permission.
37 *
38 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ''AS IS'' AND ANY EXPRESS OR IMPLIED
39 * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF
40 * MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO
41 * EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
42 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
43 * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
44 * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
45 * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
46 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
47 * OF THE POSSIBILITY OF SUCH DAMAGE.
48 */
49
50package org.mapdb;
51
52import java.io.DataInput;
53import java.io.DataOutput;
54import java.io.IOException;
55import java.io.Serializable;
56import java.util.Arrays;
57
58/**
59 * <p>
60 * This class implements the LZF lossless data compression algorithm. LZF is a
61 * Lempel-Ziv variant with byte-aligned output, and optimized for speed.
62 * </p>
63 * <p>
64 * Safety/Use Notes:
65 * </p>
66 * <ul>
67 * <li>Each instance should be used by a single thread only.</li>
68 * <li>The data buffers should be smaller than 1 GB.</li>
69 * <li>For performance reasons, safety checks on expansion are omitted.</li>
70 * <li>Invalid compressed data can cause an ArrayIndexOutOfBoundsException.</li>
71 * </ul>
72 * <p>
73 * The LZF compressed format knows literal runs and back-references:
74 * </p>
75 * <ul>
76 * <li>Literal run: directly copy bytes from input to output.</li>
77 * <li>Back-reference: copy previous data to output stream, with specified
78 * offset from location and length. The length is at least 3 bytes.</li>
79 * </ul>
80 *<p>
81 * The first byte of the compressed stream is the control byte. For literal
82 * runs, the highest three bits of the control byte are not set, the the lower
83 * bits are the literal run length, and the next bytes are data to copy directly
84 * into the output. For back-references, the highest three bits of the control
85 * byte are the back-reference length. If all three bits are set, then the
86 * back-reference length is stored in the next byte. The lower bits of the
87 * control byte combined with the next byte form the offset for the
88 * back-reference.
89 * </p>
90 */
91public final class CompressLZF{
92
93 /**
94 * The number of entries in the hash table. The size is a trade-off between
95 * hash collisions (reduced compression) and speed (amount that fits in CPU
96 * cache).
97 */
98 private static final int HASH_SIZE = 1 << 14;
99
100 /**
101 * The maximum number of literals in a chunk (32).
102 */
103 private static final int MAX_LITERAL = 1 << 5;
104
105 /**
106 * The maximum offset allowed for a back-reference (8192).
107 */
108 private static final int MAX_OFF = 1 << 13;
109
110 /**
111 * The maximum back-reference length (264).
112 */
113 private static final int MAX_REF = (1 << 8) + (1 << 3);
114
115 /**
116 * Hash table for matching byte sequences (reused for performance).
117 */
118 private int[] cachedHashTable;
119
120 /**
121 * Return byte with lower 2 bytes being byte at index, then index+1.
122 */
123 private static int first(byte[] in, int inPos) {
124 return (in[inPos] << 8) | (in[inPos + 1] & 255);
125 }
126
127 /**
128 * Shift v 1 byte left, add value at index inPos+2.
129 */
130 private static int next(int v, byte[] in, int inPos) {
131 return (v << 8) | (in[inPos + 2] & 255);
132 }
133
134 /**
135 * Compute the address in the hash table.
136 */
137 private static int hash(int h) {
138 return ((h * 2777) >> 9) & (HASH_SIZE - 1);
139 }
140
141 public int compress(byte[] in, int inLen, byte[] out, int outPos) {
142 int inPos = 0;
143 if (cachedHashTable == null) {
144 cachedHashTable = new int[HASH_SIZE];
145 }
146 int[] hashTab = cachedHashTable;
147 int literals = 0;
148 outPos++;
149 int future = first(in, 0);
150 while (inPos < inLen - 4) {
151 byte p2 = in[inPos + 2];
152 // next
153 future = (future << 8) + (p2 & 255);
154 int off = hash(future);
155 int ref = hashTab[off];
156 hashTab[off] = inPos;
157 // if (ref < inPos
158 // && ref > 0
159 // && (off = inPos - ref - 1) < MAX_OFF
160 // && in[ref + 2] == p2
161 // && (((in[ref] & 255) << 8) | (in[ref + 1] & 255)) ==
162 // ((future >> 8) & 0xffff)) {
163 if (ref < inPos
164 && ref > 0
165 && (off = inPos - ref - 1) < MAX_OFF
166 && in[ref + 2] == p2
167 && in[ref + 1] == (byte) (future >> 8)
168 && in[ref] == (byte) (future >> 16)) {
169 // match
170 int maxLen = inLen - inPos - 2;
171 if (maxLen > MAX_REF) {
172 maxLen = MAX_REF;
173 }
174 if (literals == 0) {
175 // multiple back-references,
176 // so there is no literal run control byte
177 outPos--;
178 } else {
179 // set the control byte at the start of the literal run
180 // to store the number of literals
181 out[outPos - literals - 1] = (byte) (literals - 1);
182 literals = 0;
183 }
184 int len = 3;
185 while (len < maxLen && in[ref + len] == in[inPos + len]) {
186 len++;
187 }
188 len -= 2;
189 if (len < 7) {
190 out[outPos++] = (byte) ((off >> 8) + (len << 5));
191 } else {
192 out[outPos++] = (byte) ((off >> 8) + (7 << 5));
193 out[outPos++] = (byte) (len - 7);
194 }
195 out[outPos++] = (byte) off;
196 // move one byte forward to allow for a literal run control byte
197 outPos++;
198 inPos += len;
199 // rebuild the future, and store the last bytes to the hashtable.
200 // Storing hashes of the last bytes in back-reference improves
201 // the compression ratio and only reduces speed slightly.
202 future = first(in, inPos);
203 future = next(future, in, inPos);
204 hashTab[hash(future)] = inPos++;
205 future = next(future, in, inPos);
206 hashTab[hash(future)] = inPos++;
207 } else {
208 // copy one byte from input to output as part of literal
209 out[outPos++] = in[inPos++];
210 literals++;
211 // at the end of this literal chunk, write the length
212 // to the control byte and start a new chunk
213 if (literals == MAX_LITERAL) {
214 out[outPos - literals - 1] = (byte) (literals - 1);
215 literals = 0;
216 // move ahead one byte to allow for the
217 // literal run control byte
218 outPos++;
219 }
220 }
221 }
222 // write the remaining few bytes as literals
223 while (inPos < inLen) {
224 out[outPos++] = in[inPos++];
225 literals++;
226 if (literals == MAX_LITERAL) {
227 out[outPos - literals - 1] = (byte) (literals - 1);
228 literals = 0;
229 outPos++;
230 }
231 }
232 // writes the final literal run length to the control byte
233 out[outPos - literals - 1] = (byte) (literals - 1);
234 if (literals == 0) {
235 outPos--;
236 }
237 return outPos;
238 }
239
240 public void expand(byte[] in, int inPos, int inLen, byte[] out, int outPos, int outLen) {
241 // if ((inPos | outPos | outLen) < 0) {
242 if (inPos < 0 || outPos < 0 || outLen < 0) {
243 throw new IllegalArgumentException();
244 }
245 do {
246 int ctrl = in[inPos++] & 255;
247 if (ctrl < MAX_LITERAL) {
248 // literal run of length = ctrl + 1,
249 ctrl++;
250 // copy to output and move forward this many bytes
251 System.arraycopy(in, inPos, out, outPos, ctrl);
252 outPos += ctrl;
253 inPos += ctrl;
254 } else {
255 // back reference
256 // the highest 3 bits are the match length
257 int len = ctrl >> 5;
258 // if the length is maxed, add the next byte to the length
259 if (len == 7) {
260 len += in[inPos++] & 255;
261 }
262 // minimum back-reference is 3 bytes,
263 // so 2 was subtracted before storing size
264 len += 2;
265
266 // ctrl is now the offset for a back-reference...
267 // the logical AND operation removes the length bits
268 ctrl = -((ctrl & 0x1f) << 8) - 1;
269
270 // the next byte augments/increases the offset
271 ctrl -= in[inPos++] & 255;
272
273 // copy the back-reference bytes from the given
274 // location in output to current position
275 ctrl += outPos;
276 if (outPos + len >= out.length) {
277 // reduce array bounds checking
278 throw new ArrayIndexOutOfBoundsException();
279 }
280 for (int i = 0; i < len; i++) {
281 out[outPos++] = out[ctrl++];
282 }
283 }
284 } while (outPos < outLen);
285 }
286
287
288 public static final Serializer<byte[]> SERIALIZER = new Serializer<byte[]>() {
289
290 final ThreadLocal<CompressLZF> LZF = new ThreadLocal<CompressLZF>() {
291 @Override
292 protected CompressLZF initialValue() {
293 return new CompressLZF();
294 }
295 };
296
297 @Override
298 public void serialize(DataOutput out, byte[] value) throws IOException {
299 if (value == null) return;
300
301 CompressLZF lzf = LZF.get();
302 byte[] outbuf = new byte[value.length + 40];
303 int len = lzf.compress(value, value.length, outbuf, 0);
304 //check if compressed data are larger then original
305 if (value.length <= len) {
306 //in this case do not compress data, write 0 as indicator
307 Utils.packInt(out, 0);
308 out.write(value);
309 } else {
310 Utils.packInt(out, value.length); //write original decompressed size
311 out.write(outbuf, 0, len);
312 }
313 }
314
315 @Override
316 public byte[] deserialize(DataInput in, int available) throws IOException {
317 if (available == 0) return null;
318 //get original decompressed size
319 DataInput2 in2 = (DataInput2) in;
320 int origPos = in2.pos;
321 int expendedLen = Utils.unpackInt(in);
322 byte[] inbuf = new byte[available - (in2.pos - origPos)];
323 in.readFully(inbuf);
324 if (expendedLen == 0) {
325 //special case, data are not compressed
326 return inbuf;
327 }
328 byte[] outbuf = new byte[expendedLen + 40];
329
330 CompressLZF lzf = LZF.get();
331 lzf.expand(inbuf, 0, inbuf.length, outbuf, 0, expendedLen);
332 outbuf = Arrays.copyOf(outbuf, expendedLen);
333
334 return outbuf;
335 }
336
337
338 };
339
340 /**
341 * Wraps existing serializer and compresses its input/output
342 */
343 public static <E> Serializer<E> serializerCompressWrapper(Serializer<E> serializer) {
344 return new SerializerCompressWrapper<E>(serializer);
345 }
346
347
348 protected static class SerializerCompressWrapper<E> implements Serializer<E>, Serializable {
349 protected final Serializer<E> serializer;
350 public SerializerCompressWrapper(Serializer<E> serializer) {
351 this.serializer = serializer;
352 }
353
354 @Override
355 public void serialize(DataOutput out, E value) throws IOException {
356 //serialize to byte[]
357 DataOutput2 out2 = new DataOutput2();
358 serializer.serialize(out2, value);
359 byte[] b = out2.copyBytes();
360 CompressLZF.SERIALIZER.serialize(out, b);
361 }
362
363 @Override
364 public E deserialize(DataInput in, int available) throws IOException {
365 byte[] b = CompressLZF.SERIALIZER.deserialize(in, available);
366 DataInput2 in2 = new DataInput2(b);
367 return serializer.deserialize(in2, b.length);
368 }
369 }
370
371}
Note: See TracBrowser for help on using the repository browser.