source: josm/trunk/src/org/tukaani/xz/lz/HC4.java@ 13867

Last change on this file since 13867 was 13350, checked in by stoecker, 7 years ago

see #15816 - add XZ support

File size: 7.0 KB
RevLine 
[13350]1/*
2 * Hash Chain match finder with 2-, 3-, and 4-byte hashing
3 *
4 * Authors: Lasse Collin <lasse.collin@tukaani.org>
5 * Igor Pavlov <http://7-zip.org/>
6 *
7 * This file has been put into the public domain.
8 * You can do whatever you want with this file.
9 */
10
11package org.tukaani.xz.lz;
12
13import org.tukaani.xz.ArrayCache;
14
15final class HC4 extends LZEncoder {
16 private final Hash234 hash;
17 private final int[] chain;
18 private final Matches matches;
19 private final int depthLimit;
20
21 private final int cyclicSize;
22 private int cyclicPos = -1;
23 private int lzPos;
24
25 /**
26 * Gets approximate memory usage of the match finder as kibibytes.
27 */
28 static int getMemoryUsage(int dictSize) {
29 return Hash234.getMemoryUsage(dictSize) + dictSize / (1024 / 4) + 10;
30 }
31
32 /**
33 * Creates a new LZEncoder with the HC4 match finder.
34 * See <code>LZEncoder.getInstance</code> for parameter descriptions.
35 */
36 HC4(int dictSize, int beforeSizeMin, int readAheadMax,
37 int niceLen, int matchLenMax, int depthLimit,
38 ArrayCache arrayCache) {
39 super(dictSize, beforeSizeMin, readAheadMax, niceLen, matchLenMax,
40 arrayCache);
41
42 hash = new Hash234(dictSize, arrayCache);
43
44 // +1 because we need dictSize bytes of history + the current byte.
45 cyclicSize = dictSize + 1;
46 chain = arrayCache.getIntArray(cyclicSize, false);
47 lzPos = cyclicSize;
48
49 // Substracting 1 because the shortest match that this match
50 // finder can find is 2 bytes, so there's no need to reserve
51 // space for one-byte matches.
52 matches = new Matches(niceLen - 1);
53
54 // Use a default depth limit if no other value was specified.
55 // The default is just something based on experimentation;
56 // it's nothing magic.
57 this.depthLimit = (depthLimit > 0) ? depthLimit : 4 + niceLen / 4;
58 }
59
60 public void putArraysToCache(ArrayCache arrayCache) {
61 arrayCache.putArray(chain);
62 hash.putArraysToCache(arrayCache);
63 super.putArraysToCache(arrayCache);
64 }
65
66 /**
67 * Moves to the next byte, checks that there is enough available space,
68 * and possibly normalizes the hash tables and the hash chain.
69 *
70 * @return number of bytes available, including the current byte
71 */
72 private int movePos() {
73 int avail = movePos(4, 4);
74
75 if (avail != 0) {
76 if (++lzPos == Integer.MAX_VALUE) {
77 int normalizationOffset = Integer.MAX_VALUE - cyclicSize;
78 hash.normalize(normalizationOffset);
79 normalize(chain, cyclicSize, normalizationOffset);
80 lzPos -= normalizationOffset;
81 }
82
83 if (++cyclicPos == cyclicSize)
84 cyclicPos = 0;
85 }
86
87 return avail;
88 }
89
90 public Matches getMatches() {
91 matches.count = 0;
92 int matchLenLimit = matchLenMax;
93 int niceLenLimit = niceLen;
94 int avail = movePos();
95
96 if (avail < matchLenLimit) {
97 if (avail == 0)
98 return matches;
99
100 matchLenLimit = avail;
101 if (niceLenLimit > avail)
102 niceLenLimit = avail;
103 }
104
105 hash.calcHashes(buf, readPos);
106 int delta2 = lzPos - hash.getHash2Pos();
107 int delta3 = lzPos - hash.getHash3Pos();
108 int currentMatch = hash.getHash4Pos();
109 hash.updateTables(lzPos);
110
111 chain[cyclicPos] = currentMatch;
112
113 int lenBest = 0;
114
115 // See if the hash from the first two bytes found a match.
116 // The hashing algorithm guarantees that if the first byte
117 // matches, also the second byte does, so there's no need to
118 // test the second byte.
119 if (delta2 < cyclicSize && buf[readPos - delta2] == buf[readPos]) {
120 lenBest = 2;
121 matches.len[0] = 2;
122 matches.dist[0] = delta2 - 1;
123 matches.count = 1;
124 }
125
126 // See if the hash from the first three bytes found a match that
127 // is different from the match possibly found by the two-byte hash.
128 // Also here the hashing algorithm guarantees that if the first byte
129 // matches, also the next two bytes do.
130 if (delta2 != delta3 && delta3 < cyclicSize
131 && buf[readPos - delta3] == buf[readPos]) {
132 lenBest = 3;
133 matches.dist[matches.count++] = delta3 - 1;
134 delta2 = delta3;
135 }
136
137 // If a match was found, see how long it is.
138 if (matches.count > 0) {
139 while (lenBest < matchLenLimit && buf[readPos + lenBest - delta2]
140 == buf[readPos + lenBest])
141 ++lenBest;
142
143 matches.len[matches.count - 1] = lenBest;
144
145 // Return if it is long enough (niceLen or reached the end of
146 // the dictionary).
147 if (lenBest >= niceLenLimit)
148 return matches;
149 }
150
151 // Long enough match wasn't found so easily. Look for better matches
152 // from the hash chain.
153 if (lenBest < 3)
154 lenBest = 3;
155
156 int depth = depthLimit;
157
158 while (true) {
159 int delta = lzPos - currentMatch;
160
161 // Return if the search depth limit has been reached or
162 // if the distance of the potential match exceeds the
163 // dictionary size.
164 if (depth-- == 0 || delta >= cyclicSize)
165 return matches;
166
167 currentMatch = chain[cyclicPos - delta
168 + (delta > cyclicPos ? cyclicSize : 0)];
169
170 // Test the first byte and the first new byte that would give us
171 // a match that is at least one byte longer than lenBest. This
172 // too short matches get quickly skipped.
173 if (buf[readPos + lenBest - delta] == buf[readPos + lenBest]
174 && buf[readPos - delta] == buf[readPos]) {
175 // Calculate the length of the match.
176 int len = 0;
177 while (++len < matchLenLimit)
178 if (buf[readPos + len - delta] != buf[readPos + len])
179 break;
180
181 // Use the match if and only if it is better than the longest
182 // match found so far.
183 if (len > lenBest) {
184 lenBest = len;
185 matches.len[matches.count] = len;
186 matches.dist[matches.count] = delta - 1;
187 ++matches.count;
188
189 // Return if it is long enough (niceLen or reached the
190 // end of the dictionary).
191 if (len >= niceLenLimit)
192 return matches;
193 }
194 }
195 }
196 }
197
198 public void skip(int len) {
199 assert len >= 0;
200
201 while (len-- > 0) {
202 if (movePos() != 0) {
203 // Update the hash chain and hash tables.
204 hash.calcHashes(buf, readPos);
205 chain[cyclicPos] = hash.getHash4Pos();
206 hash.updateTables(lzPos);
207 }
208 }
209 }
210}
Note: See TracBrowser for help on using the repository browser.