Context Navigation

← Previous Revision
Latest Revision
Next Revision →
Normal
Revision Log

source: josm/trunk/src/org/tukaani/xz/lz/HC4.java@ 13867

Last change on this file since 13867 was 13350, checked in by stoecker, 7 years ago
see #15816 - add XZ support
File size: 7.0 KB

Rev	Line
[13350]	1	/*
	2	* Hash Chain match finder with 2-, 3-, and 4-byte hashing
	3	*
	4	* Authors: Lasse Collin <lasse.collin@tukaani.org>
	5	* Igor Pavlov <http://7-zip.org/>
	6	*
	7	* This file has been put into the public domain.
	8	* You can do whatever you want with this file.
	9	*/
	10
	11	package org.tukaani.xz.lz;
	12
	13	import org.tukaani.xz.ArrayCache;
	14
	15	final class HC4 extends LZEncoder {
	16	private final Hash234 hash;
	17	private final int[] chain;
	18	private final Matches matches;
	19	private final int depthLimit;
	20
	21	private final int cyclicSize;
	22	private int cyclicPos = -1;
	23	private int lzPos;
	24
	25	/**
	26	* Gets approximate memory usage of the match finder as kibibytes.
	27	*/
	28	static int getMemoryUsage(int dictSize) {
	29	return Hash234.getMemoryUsage(dictSize) + dictSize / (1024 / 4) + 10;
	30	}
	31
	32	/**
	33	* Creates a new LZEncoder with the HC4 match finder.
	34	* See <code>LZEncoder.getInstance</code> for parameter descriptions.
	35	*/
	36	HC4(int dictSize, int beforeSizeMin, int readAheadMax,
	37	int niceLen, int matchLenMax, int depthLimit,
	38	ArrayCache arrayCache) {
	39	super(dictSize, beforeSizeMin, readAheadMax, niceLen, matchLenMax,
	40	arrayCache);
	41
	42	hash = new Hash234(dictSize, arrayCache);
	43
	44	// +1 because we need dictSize bytes of history + the current byte.
	45	cyclicSize = dictSize + 1;
	46	chain = arrayCache.getIntArray(cyclicSize, false);
	47	lzPos = cyclicSize;
	48
	49	// Substracting 1 because the shortest match that this match
	50	// finder can find is 2 bytes, so there's no need to reserve
	51	// space for one-byte matches.
	52	matches = new Matches(niceLen - 1);
	53
	54	// Use a default depth limit if no other value was specified.
	55	// The default is just something based on experimentation;
	56	// it's nothing magic.
	57	this.depthLimit = (depthLimit > 0) ? depthLimit : 4 + niceLen / 4;
	58	}
	59
	60	public void putArraysToCache(ArrayCache arrayCache) {
	61	arrayCache.putArray(chain);
	62	hash.putArraysToCache(arrayCache);
	63	super.putArraysToCache(arrayCache);
	64	}
	65
	66	/**
	67	* Moves to the next byte, checks that there is enough available space,
	68	* and possibly normalizes the hash tables and the hash chain.
	69	*
	70	* @return number of bytes available, including the current byte
	71	*/
	72	private int movePos() {
	73	int avail = movePos(4, 4);
	74
	75	if (avail != 0) {
	76	if (++lzPos == Integer.MAX_VALUE) {
	77	int normalizationOffset = Integer.MAX_VALUE - cyclicSize;
	78	hash.normalize(normalizationOffset);
	79	normalize(chain, cyclicSize, normalizationOffset);
	80	lzPos -= normalizationOffset;
	81	}
	82
	83	if (++cyclicPos == cyclicSize)
	84	cyclicPos = 0;
	85	}
	86
	87	return avail;
	88	}
	89
	90	public Matches getMatches() {
	91	matches.count = 0;
	92	int matchLenLimit = matchLenMax;
	93	int niceLenLimit = niceLen;
	94	int avail = movePos();
	95
	96	if (avail < matchLenLimit) {
	97	if (avail == 0)
	98	return matches;
	99
	100	matchLenLimit = avail;
	101	if (niceLenLimit > avail)
	102	niceLenLimit = avail;
	103	}
	104
	105	hash.calcHashes(buf, readPos);
	106	int delta2 = lzPos - hash.getHash2Pos();
	107	int delta3 = lzPos - hash.getHash3Pos();
	108	int currentMatch = hash.getHash4Pos();
	109	hash.updateTables(lzPos);
	110
	111	chain[cyclicPos] = currentMatch;
	112
	113	int lenBest = 0;
	114
	115	// See if the hash from the first two bytes found a match.
	116	// The hashing algorithm guarantees that if the first byte
	117	// matches, also the second byte does, so there's no need to
	118	// test the second byte.
	119	if (delta2 < cyclicSize && buf[readPos - delta2] == buf[readPos]) {
	120	lenBest = 2;
	121	matches.len[0] = 2;
	122	matches.dist[0] = delta2 - 1;
	123	matches.count = 1;
	124	}
	125
	126	// See if the hash from the first three bytes found a match that
	127	// is different from the match possibly found by the two-byte hash.
	128	// Also here the hashing algorithm guarantees that if the first byte
	129	// matches, also the next two bytes do.
	130	if (delta2 != delta3 && delta3 < cyclicSize
	131	&& buf[readPos - delta3] == buf[readPos]) {
	132	lenBest = 3;
	133	matches.dist[matches.count++] = delta3 - 1;
	134	delta2 = delta3;
	135	}
	136
	137	// If a match was found, see how long it is.
	138	if (matches.count > 0) {
	139	while (lenBest < matchLenLimit && buf[readPos + lenBest - delta2]
	140	== buf[readPos + lenBest])
	141	++lenBest;
	142
	143	matches.len[matches.count - 1] = lenBest;
	144
	145	// Return if it is long enough (niceLen or reached the end of
	146	// the dictionary).
	147	if (lenBest >= niceLenLimit)
	148	return matches;
	149	}
	150
	151	// Long enough match wasn't found so easily. Look for better matches
	152	// from the hash chain.
	153	if (lenBest < 3)
	154	lenBest = 3;
	155
	156	int depth = depthLimit;
	157
	158	while (true) {
	159	int delta = lzPos - currentMatch;
	160
	161	// Return if the search depth limit has been reached or
	162	// if the distance of the potential match exceeds the
	163	// dictionary size.
	164	if (depth-- == 0 \|\| delta >= cyclicSize)
	165	return matches;
	166
	167	currentMatch = chain[cyclicPos - delta
	168	+ (delta > cyclicPos ? cyclicSize : 0)];
	169
	170	// Test the first byte and the first new byte that would give us
	171	// a match that is at least one byte longer than lenBest. This
	172	// too short matches get quickly skipped.
	173	if (buf[readPos + lenBest - delta] == buf[readPos + lenBest]
	174	&& buf[readPos - delta] == buf[readPos]) {
	175	// Calculate the length of the match.
	176	int len = 0;
	177	while (++len < matchLenLimit)
	178	if (buf[readPos + len - delta] != buf[readPos + len])
	179	break;
	180
	181	// Use the match if and only if it is better than the longest
	182	// match found so far.
	183	if (len > lenBest) {
	184	lenBest = len;
	185	matches.len[matches.count] = len;
	186	matches.dist[matches.count] = delta - 1;
	187	++matches.count;
	188
	189	// Return if it is long enough (niceLen or reached the
	190	// end of the dictionary).
	191	if (len >= niceLenLimit)
	192	return matches;
	193	}
	194	}
	195	}
	196	}
	197
	198	public void skip(int len) {
	199	assert len >= 0;
	200
	201	while (len-- > 0) {
	202	if (movePos() != 0) {
	203	// Update the hash chain and hash tables.
	204	hash.calcHashes(buf, readPos);
	205	chain[cyclicPos] = hash.getHash4Pos();
	206	hash.updateTables(lzPos);
	207	}
	208	}
	209	}
	210	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: