1 | /*
|
---|
2 | * IndexDecoder
|
---|
3 | *
|
---|
4 | * Author: Lasse Collin <lasse.collin@tukaani.org>
|
---|
5 | *
|
---|
6 | * This file has been put into the public domain.
|
---|
7 | * You can do whatever you want with this file.
|
---|
8 | */
|
---|
9 |
|
---|
10 | package org.tukaani.xz.index;
|
---|
11 |
|
---|
12 | import java.io.IOException;
|
---|
13 | import java.io.EOFException;
|
---|
14 | import java.util.zip.CheckedInputStream;
|
---|
15 | import org.tukaani.xz.common.DecoderUtil;
|
---|
16 | import org.tukaani.xz.common.StreamFlags;
|
---|
17 | import org.tukaani.xz.SeekableInputStream;
|
---|
18 | import org.tukaani.xz.CorruptedInputException;
|
---|
19 | import org.tukaani.xz.MemoryLimitException;
|
---|
20 | import org.tukaani.xz.UnsupportedOptionsException;
|
---|
21 |
|
---|
22 | public class IndexDecoder extends IndexBase {
|
---|
23 | private final StreamFlags streamFlags;
|
---|
24 | private final long streamPadding;
|
---|
25 | private final int memoryUsage;
|
---|
26 |
|
---|
27 | // Unpadded Size and Uncompressed Size fields
|
---|
28 | private final long[] unpadded;
|
---|
29 | private final long[] uncompressed;
|
---|
30 |
|
---|
31 | // Uncompressed size of the largest Block. It is used by
|
---|
32 | // SeekableXZInputStream to find out the largest Block of the .xz file.
|
---|
33 | private long largestBlockSize = 0;
|
---|
34 |
|
---|
35 | // Offsets relative to the beginning of the .xz file. These are all zero
|
---|
36 | // for the first Stream in the file.
|
---|
37 | private int recordOffset = 0;
|
---|
38 | private long compressedOffset = 0;
|
---|
39 | private long uncompressedOffset = 0;
|
---|
40 |
|
---|
41 | public IndexDecoder(SeekableInputStream in, StreamFlags streamFooterFlags,
|
---|
42 | long streamPadding, int memoryLimit)
|
---|
43 | throws IOException {
|
---|
44 | super(new CorruptedInputException("XZ Index is corrupt"));
|
---|
45 | this.streamFlags = streamFooterFlags;
|
---|
46 | this.streamPadding = streamPadding;
|
---|
47 |
|
---|
48 | // If endPos is exceeded before the CRC32 field has been decoded,
|
---|
49 | // the Index is corrupt.
|
---|
50 | long endPos = in.position() + streamFooterFlags.backwardSize - 4;
|
---|
51 |
|
---|
52 | java.util.zip.CRC32 crc32 = new java.util.zip.CRC32();
|
---|
53 | CheckedInputStream inChecked = new CheckedInputStream(in, crc32);
|
---|
54 |
|
---|
55 | // Index Indicator
|
---|
56 | if (inChecked.read() != 0x00)
|
---|
57 | throw new CorruptedInputException("XZ Index is corrupt");
|
---|
58 |
|
---|
59 | try {
|
---|
60 | // Number of Records
|
---|
61 | long count = DecoderUtil.decodeVLI(inChecked);
|
---|
62 |
|
---|
63 | // Catch Record counts that obviously too high to be valid.
|
---|
64 | // This test isn't exact because it ignores Index Indicator,
|
---|
65 | // Number of Records, and CRC32 fields, but this is good enough
|
---|
66 | // to catch the most obvious problems.
|
---|
67 | if (count >= streamFooterFlags.backwardSize / 2)
|
---|
68 | throw new CorruptedInputException("XZ Index is corrupt");
|
---|
69 |
|
---|
70 | // If the Record count doesn't fit into an int, we cannot
|
---|
71 | // allocate the arrays to hold the Records.
|
---|
72 | if (count > Integer.MAX_VALUE)
|
---|
73 | throw new UnsupportedOptionsException("XZ Index has over "
|
---|
74 | + Integer.MAX_VALUE + " Records");
|
---|
75 |
|
---|
76 | // Calculate approximate memory requirements and check the
|
---|
77 | // memory usage limit.
|
---|
78 | memoryUsage = 1 + (int)((16L * count + 1023) / 1024);
|
---|
79 | if (memoryLimit >= 0 && memoryUsage > memoryLimit)
|
---|
80 | throw new MemoryLimitException(memoryUsage, memoryLimit);
|
---|
81 |
|
---|
82 | // Allocate the arrays for the Records.
|
---|
83 | unpadded = new long[(int)count];
|
---|
84 | uncompressed = new long[(int)count];
|
---|
85 | int record = 0;
|
---|
86 |
|
---|
87 | // Decode the Records.
|
---|
88 | for (int i = (int)count; i > 0; --i) {
|
---|
89 | // Get the next Record.
|
---|
90 | long unpaddedSize = DecoderUtil.decodeVLI(inChecked);
|
---|
91 | long uncompressedSize = DecoderUtil.decodeVLI(inChecked);
|
---|
92 |
|
---|
93 | // Check that the input position stays sane. Since this is
|
---|
94 | // checked only once per loop iteration instead of for
|
---|
95 | // every input byte read, it's still possible that
|
---|
96 | // EOFException gets thrown with corrupt input.
|
---|
97 | if (in.position() > endPos)
|
---|
98 | throw new CorruptedInputException("XZ Index is corrupt");
|
---|
99 |
|
---|
100 | // Add the new Record.
|
---|
101 | unpadded[record] = blocksSum + unpaddedSize;
|
---|
102 | uncompressed[record] = uncompressedSum + uncompressedSize;
|
---|
103 | ++record;
|
---|
104 | super.add(unpaddedSize, uncompressedSize);
|
---|
105 | assert record == recordCount;
|
---|
106 |
|
---|
107 | // Remember the uncompressed size of the largest Block.
|
---|
108 | if (largestBlockSize < uncompressedSize)
|
---|
109 | largestBlockSize = uncompressedSize;
|
---|
110 | }
|
---|
111 | } catch (EOFException e) {
|
---|
112 | // EOFException is caught just in case a corrupt input causes
|
---|
113 | // DecoderUtil.decodeVLI to read too much at once.
|
---|
114 | throw new CorruptedInputException("XZ Index is corrupt");
|
---|
115 | }
|
---|
116 |
|
---|
117 | // Validate that the size of the Index field matches
|
---|
118 | // Backward Size.
|
---|
119 | int indexPaddingSize = getIndexPaddingSize();
|
---|
120 | if (in.position() + indexPaddingSize != endPos)
|
---|
121 | throw new CorruptedInputException("XZ Index is corrupt");
|
---|
122 |
|
---|
123 | // Index Padding
|
---|
124 | while (indexPaddingSize-- > 0)
|
---|
125 | if (inChecked.read() != 0x00)
|
---|
126 | throw new CorruptedInputException("XZ Index is corrupt");
|
---|
127 |
|
---|
128 | // CRC32
|
---|
129 | long value = crc32.getValue();
|
---|
130 | for (int i = 0; i < 4; ++i)
|
---|
131 | if (((value >>> (i * 8)) & 0xFF) != in.read())
|
---|
132 | throw new CorruptedInputException("XZ Index is corrupt");
|
---|
133 | }
|
---|
134 |
|
---|
135 | public void setOffsets(IndexDecoder prev) {
|
---|
136 | // NOTE: SeekableXZInputStream checks that the total number of Blocks
|
---|
137 | // in concatenated Streams fits into an int.
|
---|
138 | recordOffset = prev.recordOffset + (int)prev.recordCount;
|
---|
139 | compressedOffset = prev.compressedOffset
|
---|
140 | + prev.getStreamSize() + prev.streamPadding;
|
---|
141 | assert (compressedOffset & 3) == 0;
|
---|
142 | uncompressedOffset = prev.uncompressedOffset + prev.uncompressedSum;
|
---|
143 | }
|
---|
144 |
|
---|
145 | public int getMemoryUsage() {
|
---|
146 | return memoryUsage;
|
---|
147 | }
|
---|
148 |
|
---|
149 | public StreamFlags getStreamFlags() {
|
---|
150 | return streamFlags;
|
---|
151 | }
|
---|
152 |
|
---|
153 | public int getRecordCount() {
|
---|
154 | // It was already checked in the constructor that it fits into an int.
|
---|
155 | // Otherwise we couldn't have allocated the arrays.
|
---|
156 | return (int)recordCount;
|
---|
157 | }
|
---|
158 |
|
---|
159 | public long getUncompressedSize() {
|
---|
160 | return uncompressedSum;
|
---|
161 | }
|
---|
162 |
|
---|
163 | public long getLargestBlockSize() {
|
---|
164 | return largestBlockSize;
|
---|
165 | }
|
---|
166 |
|
---|
167 | public boolean hasUncompressedOffset(long pos) {
|
---|
168 | return pos >= uncompressedOffset
|
---|
169 | && pos < uncompressedOffset + uncompressedSum;
|
---|
170 | }
|
---|
171 |
|
---|
172 | public boolean hasRecord(int blockNumber) {
|
---|
173 | return blockNumber >= recordOffset
|
---|
174 | && blockNumber < recordOffset + recordCount;
|
---|
175 | }
|
---|
176 |
|
---|
177 | public void locateBlock(BlockInfo info, long target) {
|
---|
178 | assert target >= uncompressedOffset;
|
---|
179 | target -= uncompressedOffset;
|
---|
180 | assert target < uncompressedSum;
|
---|
181 |
|
---|
182 | int left = 0;
|
---|
183 | int right = unpadded.length - 1;
|
---|
184 |
|
---|
185 | while (left < right) {
|
---|
186 | int i = left + (right - left) / 2;
|
---|
187 |
|
---|
188 | if (uncompressed[i] <= target)
|
---|
189 | left = i + 1;
|
---|
190 | else
|
---|
191 | right = i;
|
---|
192 | }
|
---|
193 |
|
---|
194 | setBlockInfo(info, recordOffset + left);
|
---|
195 | }
|
---|
196 |
|
---|
197 | public void setBlockInfo(BlockInfo info, int blockNumber) {
|
---|
198 | // The caller has checked that the given Block number is inside
|
---|
199 | // this Index.
|
---|
200 | assert blockNumber >= recordOffset;
|
---|
201 | assert blockNumber - recordOffset < recordCount;
|
---|
202 |
|
---|
203 | info.index = this;
|
---|
204 | info.blockNumber = blockNumber;
|
---|
205 |
|
---|
206 | int pos = blockNumber - recordOffset;
|
---|
207 |
|
---|
208 | if (pos == 0) {
|
---|
209 | info.compressedOffset = 0;
|
---|
210 | info.uncompressedOffset = 0;
|
---|
211 | } else {
|
---|
212 | info.compressedOffset = (unpadded[pos - 1] + 3) & ~3;
|
---|
213 | info.uncompressedOffset = uncompressed[pos - 1];
|
---|
214 | }
|
---|
215 |
|
---|
216 | info.unpaddedSize = unpadded[pos] - info.compressedOffset;
|
---|
217 | info.uncompressedSize = uncompressed[pos] - info.uncompressedOffset;
|
---|
218 |
|
---|
219 | info.compressedOffset += compressedOffset
|
---|
220 | + DecoderUtil.STREAM_HEADER_SIZE;
|
---|
221 | info.uncompressedOffset += uncompressedOffset;
|
---|
222 | }
|
---|
223 | }
|
---|