[13350] | 1 | /*
|
---|
| 2 | * IndexDecoder
|
---|
| 3 | *
|
---|
| 4 | * Author: Lasse Collin <lasse.collin@tukaani.org>
|
---|
| 5 | *
|
---|
| 6 | * This file has been put into the public domain.
|
---|
| 7 | * You can do whatever you want with this file.
|
---|
| 8 | */
|
---|
| 9 |
|
---|
| 10 | package org.tukaani.xz.index;
|
---|
| 11 |
|
---|
| 12 | import java.io.IOException;
|
---|
| 13 | import java.io.EOFException;
|
---|
| 14 | import java.util.zip.CheckedInputStream;
|
---|
| 15 | import org.tukaani.xz.common.DecoderUtil;
|
---|
| 16 | import org.tukaani.xz.common.StreamFlags;
|
---|
| 17 | import org.tukaani.xz.SeekableInputStream;
|
---|
| 18 | import org.tukaani.xz.CorruptedInputException;
|
---|
| 19 | import org.tukaani.xz.MemoryLimitException;
|
---|
| 20 | import org.tukaani.xz.UnsupportedOptionsException;
|
---|
| 21 |
|
---|
| 22 | public class IndexDecoder extends IndexBase {
|
---|
| 23 | private final StreamFlags streamFlags;
|
---|
| 24 | private final long streamPadding;
|
---|
| 25 | private final int memoryUsage;
|
---|
| 26 |
|
---|
| 27 | // Unpadded Size and Uncompressed Size fields
|
---|
| 28 | private final long[] unpadded;
|
---|
| 29 | private final long[] uncompressed;
|
---|
| 30 |
|
---|
| 31 | // Uncompressed size of the largest Block. It is used by
|
---|
| 32 | // SeekableXZInputStream to find out the largest Block of the .xz file.
|
---|
| 33 | private long largestBlockSize = 0;
|
---|
| 34 |
|
---|
| 35 | // Offsets relative to the beginning of the .xz file. These are all zero
|
---|
| 36 | // for the first Stream in the file.
|
---|
| 37 | private int recordOffset = 0;
|
---|
| 38 | private long compressedOffset = 0;
|
---|
| 39 | private long uncompressedOffset = 0;
|
---|
| 40 |
|
---|
| 41 | public IndexDecoder(SeekableInputStream in, StreamFlags streamFooterFlags,
|
---|
| 42 | long streamPadding, int memoryLimit)
|
---|
| 43 | throws IOException {
|
---|
| 44 | super(new CorruptedInputException("XZ Index is corrupt"));
|
---|
| 45 | this.streamFlags = streamFooterFlags;
|
---|
| 46 | this.streamPadding = streamPadding;
|
---|
| 47 |
|
---|
| 48 | // If endPos is exceeded before the CRC32 field has been decoded,
|
---|
| 49 | // the Index is corrupt.
|
---|
| 50 | long endPos = in.position() + streamFooterFlags.backwardSize - 4;
|
---|
| 51 |
|
---|
| 52 | java.util.zip.CRC32 crc32 = new java.util.zip.CRC32();
|
---|
| 53 | CheckedInputStream inChecked = new CheckedInputStream(in, crc32);
|
---|
| 54 |
|
---|
| 55 | // Index Indicator
|
---|
| 56 | if (inChecked.read() != 0x00)
|
---|
| 57 | throw new CorruptedInputException("XZ Index is corrupt");
|
---|
| 58 |
|
---|
| 59 | try {
|
---|
| 60 | // Number of Records
|
---|
| 61 | long count = DecoderUtil.decodeVLI(inChecked);
|
---|
| 62 |
|
---|
| 63 | // Catch Record counts that obviously too high to be valid.
|
---|
| 64 | // This test isn't exact because it ignores Index Indicator,
|
---|
| 65 | // Number of Records, and CRC32 fields, but this is good enough
|
---|
| 66 | // to catch the most obvious problems.
|
---|
| 67 | if (count >= streamFooterFlags.backwardSize / 2)
|
---|
| 68 | throw new CorruptedInputException("XZ Index is corrupt");
|
---|
| 69 |
|
---|
| 70 | // If the Record count doesn't fit into an int, we cannot
|
---|
| 71 | // allocate the arrays to hold the Records.
|
---|
| 72 | if (count > Integer.MAX_VALUE)
|
---|
| 73 | throw new UnsupportedOptionsException("XZ Index has over "
|
---|
| 74 | + Integer.MAX_VALUE + " Records");
|
---|
| 75 |
|
---|
| 76 | // Calculate approximate memory requirements and check the
|
---|
| 77 | // memory usage limit.
|
---|
| 78 | memoryUsage = 1 + (int)((16L * count + 1023) / 1024);
|
---|
| 79 | if (memoryLimit >= 0 && memoryUsage > memoryLimit)
|
---|
| 80 | throw new MemoryLimitException(memoryUsage, memoryLimit);
|
---|
| 81 |
|
---|
| 82 | // Allocate the arrays for the Records.
|
---|
| 83 | unpadded = new long[(int)count];
|
---|
| 84 | uncompressed = new long[(int)count];
|
---|
| 85 | int record = 0;
|
---|
| 86 |
|
---|
| 87 | // Decode the Records.
|
---|
| 88 | for (int i = (int)count; i > 0; --i) {
|
---|
| 89 | // Get the next Record.
|
---|
| 90 | long unpaddedSize = DecoderUtil.decodeVLI(inChecked);
|
---|
| 91 | long uncompressedSize = DecoderUtil.decodeVLI(inChecked);
|
---|
| 92 |
|
---|
| 93 | // Check that the input position stays sane. Since this is
|
---|
| 94 | // checked only once per loop iteration instead of for
|
---|
| 95 | // every input byte read, it's still possible that
|
---|
| 96 | // EOFException gets thrown with corrupt input.
|
---|
| 97 | if (in.position() > endPos)
|
---|
| 98 | throw new CorruptedInputException("XZ Index is corrupt");
|
---|
| 99 |
|
---|
| 100 | // Add the new Record.
|
---|
| 101 | unpadded[record] = blocksSum + unpaddedSize;
|
---|
| 102 | uncompressed[record] = uncompressedSum + uncompressedSize;
|
---|
| 103 | ++record;
|
---|
| 104 | super.add(unpaddedSize, uncompressedSize);
|
---|
| 105 | assert record == recordCount;
|
---|
| 106 |
|
---|
| 107 | // Remember the uncompressed size of the largest Block.
|
---|
| 108 | if (largestBlockSize < uncompressedSize)
|
---|
| 109 | largestBlockSize = uncompressedSize;
|
---|
| 110 | }
|
---|
| 111 | } catch (EOFException e) {
|
---|
| 112 | // EOFException is caught just in case a corrupt input causes
|
---|
| 113 | // DecoderUtil.decodeVLI to read too much at once.
|
---|
| 114 | throw new CorruptedInputException("XZ Index is corrupt");
|
---|
| 115 | }
|
---|
| 116 |
|
---|
| 117 | // Validate that the size of the Index field matches
|
---|
| 118 | // Backward Size.
|
---|
| 119 | int indexPaddingSize = getIndexPaddingSize();
|
---|
| 120 | if (in.position() + indexPaddingSize != endPos)
|
---|
| 121 | throw new CorruptedInputException("XZ Index is corrupt");
|
---|
| 122 |
|
---|
| 123 | // Index Padding
|
---|
| 124 | while (indexPaddingSize-- > 0)
|
---|
| 125 | if (inChecked.read() != 0x00)
|
---|
| 126 | throw new CorruptedInputException("XZ Index is corrupt");
|
---|
| 127 |
|
---|
| 128 | // CRC32
|
---|
| 129 | long value = crc32.getValue();
|
---|
| 130 | for (int i = 0; i < 4; ++i)
|
---|
| 131 | if (((value >>> (i * 8)) & 0xFF) != in.read())
|
---|
| 132 | throw new CorruptedInputException("XZ Index is corrupt");
|
---|
| 133 | }
|
---|
| 134 |
|
---|
| 135 | public void setOffsets(IndexDecoder prev) {
|
---|
| 136 | // NOTE: SeekableXZInputStream checks that the total number of Blocks
|
---|
| 137 | // in concatenated Streams fits into an int.
|
---|
| 138 | recordOffset = prev.recordOffset + (int)prev.recordCount;
|
---|
| 139 | compressedOffset = prev.compressedOffset
|
---|
| 140 | + prev.getStreamSize() + prev.streamPadding;
|
---|
| 141 | assert (compressedOffset & 3) == 0;
|
---|
| 142 | uncompressedOffset = prev.uncompressedOffset + prev.uncompressedSum;
|
---|
| 143 | }
|
---|
| 144 |
|
---|
| 145 | public int getMemoryUsage() {
|
---|
| 146 | return memoryUsage;
|
---|
| 147 | }
|
---|
| 148 |
|
---|
| 149 | public StreamFlags getStreamFlags() {
|
---|
| 150 | return streamFlags;
|
---|
| 151 | }
|
---|
| 152 |
|
---|
| 153 | public int getRecordCount() {
|
---|
| 154 | // It was already checked in the constructor that it fits into an int.
|
---|
| 155 | // Otherwise we couldn't have allocated the arrays.
|
---|
| 156 | return (int)recordCount;
|
---|
| 157 | }
|
---|
| 158 |
|
---|
| 159 | public long getUncompressedSize() {
|
---|
| 160 | return uncompressedSum;
|
---|
| 161 | }
|
---|
| 162 |
|
---|
| 163 | public long getLargestBlockSize() {
|
---|
| 164 | return largestBlockSize;
|
---|
| 165 | }
|
---|
| 166 |
|
---|
| 167 | public boolean hasUncompressedOffset(long pos) {
|
---|
| 168 | return pos >= uncompressedOffset
|
---|
| 169 | && pos < uncompressedOffset + uncompressedSum;
|
---|
| 170 | }
|
---|
| 171 |
|
---|
| 172 | public boolean hasRecord(int blockNumber) {
|
---|
| 173 | return blockNumber >= recordOffset
|
---|
| 174 | && blockNumber < recordOffset + recordCount;
|
---|
| 175 | }
|
---|
| 176 |
|
---|
| 177 | public void locateBlock(BlockInfo info, long target) {
|
---|
| 178 | assert target >= uncompressedOffset;
|
---|
| 179 | target -= uncompressedOffset;
|
---|
| 180 | assert target < uncompressedSum;
|
---|
| 181 |
|
---|
| 182 | int left = 0;
|
---|
| 183 | int right = unpadded.length - 1;
|
---|
| 184 |
|
---|
| 185 | while (left < right) {
|
---|
| 186 | int i = left + (right - left) / 2;
|
---|
| 187 |
|
---|
| 188 | if (uncompressed[i] <= target)
|
---|
| 189 | left = i + 1;
|
---|
| 190 | else
|
---|
| 191 | right = i;
|
---|
| 192 | }
|
---|
| 193 |
|
---|
| 194 | setBlockInfo(info, recordOffset + left);
|
---|
| 195 | }
|
---|
| 196 |
|
---|
| 197 | public void setBlockInfo(BlockInfo info, int blockNumber) {
|
---|
| 198 | // The caller has checked that the given Block number is inside
|
---|
| 199 | // this Index.
|
---|
| 200 | assert blockNumber >= recordOffset;
|
---|
| 201 | assert blockNumber - recordOffset < recordCount;
|
---|
| 202 |
|
---|
| 203 | info.index = this;
|
---|
| 204 | info.blockNumber = blockNumber;
|
---|
| 205 |
|
---|
| 206 | int pos = blockNumber - recordOffset;
|
---|
| 207 |
|
---|
| 208 | if (pos == 0) {
|
---|
| 209 | info.compressedOffset = 0;
|
---|
| 210 | info.uncompressedOffset = 0;
|
---|
| 211 | } else {
|
---|
| 212 | info.compressedOffset = (unpadded[pos - 1] + 3) & ~3;
|
---|
| 213 | info.uncompressedOffset = uncompressed[pos - 1];
|
---|
| 214 | }
|
---|
| 215 |
|
---|
| 216 | info.unpaddedSize = unpadded[pos] - info.compressedOffset;
|
---|
| 217 | info.uncompressedSize = uncompressed[pos] - info.uncompressedOffset;
|
---|
| 218 |
|
---|
| 219 | info.compressedOffset += compressedOffset
|
---|
| 220 | + DecoderUtil.STREAM_HEADER_SIZE;
|
---|
| 221 | info.uncompressedOffset += uncompressedOffset;
|
---|
| 222 | }
|
---|
| 223 | }
|
---|