[13350] | 1 | /*
|
---|
| 2 | * Binary Tree match finder with 2-, 3-, and 4-byte hashing
|
---|
| 3 | *
|
---|
| 4 | * Authors: Lasse Collin <lasse.collin@tukaani.org>
|
---|
| 5 | * Igor Pavlov <http://7-zip.org/>
|
---|
| 6 | *
|
---|
| 7 | * This file has been put into the public domain.
|
---|
| 8 | * You can do whatever you want with this file.
|
---|
| 9 | */
|
---|
| 10 |
|
---|
| 11 | package org.tukaani.xz.lz;
|
---|
| 12 |
|
---|
| 13 | import org.tukaani.xz.ArrayCache;
|
---|
| 14 |
|
---|
| 15 | final class BT4 extends LZEncoder {
|
---|
| 16 | private final Hash234 hash;
|
---|
| 17 | private final int[] tree;
|
---|
| 18 | private final Matches matches;
|
---|
| 19 | private final int depthLimit;
|
---|
| 20 |
|
---|
| 21 | private final int cyclicSize;
|
---|
| 22 | private int cyclicPos = -1;
|
---|
| 23 | private int lzPos;
|
---|
| 24 |
|
---|
| 25 | static int getMemoryUsage(int dictSize) {
|
---|
| 26 | return Hash234.getMemoryUsage(dictSize) + dictSize / (1024 / 8) + 10;
|
---|
| 27 | }
|
---|
| 28 |
|
---|
| 29 | BT4(int dictSize, int beforeSizeMin, int readAheadMax,
|
---|
| 30 | int niceLen, int matchLenMax, int depthLimit,
|
---|
| 31 | ArrayCache arrayCache) {
|
---|
| 32 | super(dictSize, beforeSizeMin, readAheadMax, niceLen, matchLenMax,
|
---|
| 33 | arrayCache);
|
---|
| 34 |
|
---|
| 35 | cyclicSize = dictSize + 1;
|
---|
| 36 | lzPos = cyclicSize;
|
---|
| 37 |
|
---|
| 38 | hash = new Hash234(dictSize, arrayCache);
|
---|
| 39 | tree = arrayCache.getIntArray(cyclicSize * 2, false);
|
---|
| 40 |
|
---|
| 41 | // Substracting 1 because the shortest match that this match
|
---|
| 42 | // finder can find is 2 bytes, so there's no need to reserve
|
---|
| 43 | // space for one-byte matches.
|
---|
| 44 | matches = new Matches(niceLen - 1);
|
---|
| 45 |
|
---|
| 46 | this.depthLimit = depthLimit > 0 ? depthLimit : 16 + niceLen / 2;
|
---|
| 47 | }
|
---|
| 48 |
|
---|
| 49 | public void putArraysToCache(ArrayCache arrayCache) {
|
---|
| 50 | arrayCache.putArray(tree);
|
---|
| 51 | hash.putArraysToCache(arrayCache);
|
---|
| 52 | super.putArraysToCache(arrayCache);
|
---|
| 53 | }
|
---|
| 54 |
|
---|
| 55 | private int movePos() {
|
---|
| 56 | int avail = movePos(niceLen, 4);
|
---|
| 57 |
|
---|
| 58 | if (avail != 0) {
|
---|
| 59 | if (++lzPos == Integer.MAX_VALUE) {
|
---|
| 60 | int normalizationOffset = Integer.MAX_VALUE - cyclicSize;
|
---|
| 61 | hash.normalize(normalizationOffset);
|
---|
| 62 | normalize(tree, cyclicSize * 2, normalizationOffset);
|
---|
| 63 | lzPos -= normalizationOffset;
|
---|
| 64 | }
|
---|
| 65 |
|
---|
| 66 | if (++cyclicPos == cyclicSize)
|
---|
| 67 | cyclicPos = 0;
|
---|
| 68 | }
|
---|
| 69 |
|
---|
| 70 | return avail;
|
---|
| 71 | }
|
---|
| 72 |
|
---|
| 73 | public Matches getMatches() {
|
---|
| 74 | matches.count = 0;
|
---|
| 75 |
|
---|
| 76 | int matchLenLimit = matchLenMax;
|
---|
| 77 | int niceLenLimit = niceLen;
|
---|
| 78 | int avail = movePos();
|
---|
| 79 |
|
---|
| 80 | if (avail < matchLenLimit) {
|
---|
| 81 | if (avail == 0)
|
---|
| 82 | return matches;
|
---|
| 83 |
|
---|
| 84 | matchLenLimit = avail;
|
---|
| 85 | if (niceLenLimit > avail)
|
---|
| 86 | niceLenLimit = avail;
|
---|
| 87 | }
|
---|
| 88 |
|
---|
| 89 | hash.calcHashes(buf, readPos);
|
---|
| 90 | int delta2 = lzPos - hash.getHash2Pos();
|
---|
| 91 | int delta3 = lzPos - hash.getHash3Pos();
|
---|
| 92 | int currentMatch = hash.getHash4Pos();
|
---|
| 93 | hash.updateTables(lzPos);
|
---|
| 94 |
|
---|
| 95 | int lenBest = 0;
|
---|
| 96 |
|
---|
| 97 | // See if the hash from the first two bytes found a match.
|
---|
| 98 | // The hashing algorithm guarantees that if the first byte
|
---|
| 99 | // matches, also the second byte does, so there's no need to
|
---|
| 100 | // test the second byte.
|
---|
| 101 | if (delta2 < cyclicSize && buf[readPos - delta2] == buf[readPos]) {
|
---|
| 102 | lenBest = 2;
|
---|
| 103 | matches.len[0] = 2;
|
---|
| 104 | matches.dist[0] = delta2 - 1;
|
---|
| 105 | matches.count = 1;
|
---|
| 106 | }
|
---|
| 107 |
|
---|
| 108 | // See if the hash from the first three bytes found a match that
|
---|
| 109 | // is different from the match possibly found by the two-byte hash.
|
---|
| 110 | // Also here the hashing algorithm guarantees that if the first byte
|
---|
| 111 | // matches, also the next two bytes do.
|
---|
| 112 | if (delta2 != delta3 && delta3 < cyclicSize
|
---|
| 113 | && buf[readPos - delta3] == buf[readPos]) {
|
---|
| 114 | lenBest = 3;
|
---|
| 115 | matches.dist[matches.count++] = delta3 - 1;
|
---|
| 116 | delta2 = delta3;
|
---|
| 117 | }
|
---|
| 118 |
|
---|
| 119 | // If a match was found, see how long it is.
|
---|
| 120 | if (matches.count > 0) {
|
---|
| 121 | while (lenBest < matchLenLimit && buf[readPos + lenBest - delta2]
|
---|
| 122 | == buf[readPos + lenBest])
|
---|
| 123 | ++lenBest;
|
---|
| 124 |
|
---|
| 125 | matches.len[matches.count - 1] = lenBest;
|
---|
| 126 |
|
---|
| 127 | // Return if it is long enough (niceLen or reached the end of
|
---|
| 128 | // the dictionary).
|
---|
| 129 | if (lenBest >= niceLenLimit) {
|
---|
| 130 | skip(niceLenLimit, currentMatch);
|
---|
| 131 | return matches;
|
---|
| 132 | }
|
---|
| 133 | }
|
---|
| 134 |
|
---|
| 135 | // Long enough match wasn't found so easily. Look for better matches
|
---|
| 136 | // from the binary tree.
|
---|
| 137 | if (lenBest < 3)
|
---|
| 138 | lenBest = 3;
|
---|
| 139 |
|
---|
| 140 | int depth = depthLimit;
|
---|
| 141 |
|
---|
| 142 | int ptr0 = (cyclicPos << 1) + 1;
|
---|
| 143 | int ptr1 = cyclicPos << 1;
|
---|
| 144 | int len0 = 0;
|
---|
| 145 | int len1 = 0;
|
---|
| 146 |
|
---|
| 147 | while (true) {
|
---|
| 148 | int delta = lzPos - currentMatch;
|
---|
| 149 |
|
---|
| 150 | // Return if the search depth limit has been reached or
|
---|
| 151 | // if the distance of the potential match exceeds the
|
---|
| 152 | // dictionary size.
|
---|
| 153 | if (depth-- == 0 || delta >= cyclicSize) {
|
---|
| 154 | tree[ptr0] = 0;
|
---|
| 155 | tree[ptr1] = 0;
|
---|
| 156 | return matches;
|
---|
| 157 | }
|
---|
| 158 |
|
---|
| 159 | int pair = (cyclicPos - delta
|
---|
| 160 | + (delta > cyclicPos ? cyclicSize : 0)) << 1;
|
---|
| 161 | int len = Math.min(len0, len1);
|
---|
| 162 |
|
---|
| 163 | if (buf[readPos + len - delta] == buf[readPos + len]) {
|
---|
| 164 | while (++len < matchLenLimit)
|
---|
| 165 | if (buf[readPos + len - delta] != buf[readPos + len])
|
---|
| 166 | break;
|
---|
| 167 |
|
---|
| 168 | if (len > lenBest) {
|
---|
| 169 | lenBest = len;
|
---|
| 170 | matches.len[matches.count] = len;
|
---|
| 171 | matches.dist[matches.count] = delta - 1;
|
---|
| 172 | ++matches.count;
|
---|
| 173 |
|
---|
| 174 | if (len >= niceLenLimit) {
|
---|
| 175 | tree[ptr1] = tree[pair];
|
---|
| 176 | tree[ptr0] = tree[pair + 1];
|
---|
| 177 | return matches;
|
---|
| 178 | }
|
---|
| 179 | }
|
---|
| 180 | }
|
---|
| 181 |
|
---|
| 182 | if ((buf[readPos + len - delta] & 0xFF)
|
---|
| 183 | < (buf[readPos + len] & 0xFF)) {
|
---|
| 184 | tree[ptr1] = currentMatch;
|
---|
| 185 | ptr1 = pair + 1;
|
---|
| 186 | currentMatch = tree[ptr1];
|
---|
| 187 | len1 = len;
|
---|
| 188 | } else {
|
---|
| 189 | tree[ptr0] = currentMatch;
|
---|
| 190 | ptr0 = pair;
|
---|
| 191 | currentMatch = tree[ptr0];
|
---|
| 192 | len0 = len;
|
---|
| 193 | }
|
---|
| 194 | }
|
---|
| 195 | }
|
---|
| 196 |
|
---|
| 197 | private void skip(int niceLenLimit, int currentMatch) {
|
---|
| 198 | int depth = depthLimit;
|
---|
| 199 |
|
---|
| 200 | int ptr0 = (cyclicPos << 1) + 1;
|
---|
| 201 | int ptr1 = cyclicPos << 1;
|
---|
| 202 | int len0 = 0;
|
---|
| 203 | int len1 = 0;
|
---|
| 204 |
|
---|
| 205 | while (true) {
|
---|
| 206 | int delta = lzPos - currentMatch;
|
---|
| 207 |
|
---|
| 208 | if (depth-- == 0 || delta >= cyclicSize) {
|
---|
| 209 | tree[ptr0] = 0;
|
---|
| 210 | tree[ptr1] = 0;
|
---|
| 211 | return;
|
---|
| 212 | }
|
---|
| 213 |
|
---|
| 214 | int pair = (cyclicPos - delta
|
---|
| 215 | + (delta > cyclicPos ? cyclicSize : 0)) << 1;
|
---|
| 216 | int len = Math.min(len0, len1);
|
---|
| 217 |
|
---|
| 218 | if (buf[readPos + len - delta] == buf[readPos + len]) {
|
---|
| 219 | // No need to look for longer matches than niceLenLimit
|
---|
| 220 | // because we only are updating the tree, not returning
|
---|
| 221 | // matches found to the caller.
|
---|
| 222 | do {
|
---|
| 223 | if (++len == niceLenLimit) {
|
---|
| 224 | tree[ptr1] = tree[pair];
|
---|
| 225 | tree[ptr0] = tree[pair + 1];
|
---|
| 226 | return;
|
---|
| 227 | }
|
---|
| 228 | } while (buf[readPos + len - delta] == buf[readPos + len]);
|
---|
| 229 | }
|
---|
| 230 |
|
---|
| 231 | if ((buf[readPos + len - delta] & 0xFF)
|
---|
| 232 | < (buf[readPos + len] & 0xFF)) {
|
---|
| 233 | tree[ptr1] = currentMatch;
|
---|
| 234 | ptr1 = pair + 1;
|
---|
| 235 | currentMatch = tree[ptr1];
|
---|
| 236 | len1 = len;
|
---|
| 237 | } else {
|
---|
| 238 | tree[ptr0] = currentMatch;
|
---|
| 239 | ptr0 = pair;
|
---|
| 240 | currentMatch = tree[ptr0];
|
---|
| 241 | len0 = len;
|
---|
| 242 | }
|
---|
| 243 | }
|
---|
| 244 | }
|
---|
| 245 |
|
---|
| 246 | public void skip(int len) {
|
---|
| 247 | while (len-- > 0) {
|
---|
| 248 | int niceLenLimit = niceLen;
|
---|
| 249 | int avail = movePos();
|
---|
| 250 |
|
---|
| 251 | if (avail < niceLenLimit) {
|
---|
| 252 | if (avail == 0)
|
---|
| 253 | continue;
|
---|
| 254 |
|
---|
| 255 | niceLenLimit = avail;
|
---|
| 256 | }
|
---|
| 257 |
|
---|
| 258 | hash.calcHashes(buf, readPos);
|
---|
| 259 | int currentMatch = hash.getHash4Pos();
|
---|
| 260 | hash.updateTables(lzPos);
|
---|
| 261 |
|
---|
| 262 | skip(niceLenLimit, currentMatch);
|
---|
| 263 | }
|
---|
| 264 | }
|
---|
| 265 | }
|
---|