source: josm/trunk/src/com/drew/imaging/tiff/TiffReader.java@ 10849

Last change on this file since 10849 was 8132, checked in by Don-vip, 10 years ago

fix #11162 - update to metadata-extractor 2.7.2

File size: 17.2 KB
Line 
1/*
2 * Copyright 2002-2015 Drew Noakes
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 *
16 * More information about this project is available at:
17 *
18 * https://drewnoakes.com/code/exif/
19 * https://github.com/drewnoakes/metadata-extractor
20 */
21package com.drew.imaging.tiff;
22
23import com.drew.lang.RandomAccessReader;
24import com.drew.lang.Rational;
25import com.drew.lang.annotations.NotNull;
26
27import java.io.IOException;
28import java.util.HashSet;
29import java.util.Set;
30
31/**
32 * Processes TIFF-formatted data, calling into client code via that {@link TiffHandler} interface.
33 *
34 * @author Drew Noakes https://drewnoakes.com
35 */
36public class TiffReader
37{
38 /**
39 * Processes a TIFF data sequence.
40 *
41 * @param reader the {@link RandomAccessReader} from which the data should be read
42 * @param handler the {@link TiffHandler} that will coordinate processing and accept read values
43 * @param tiffHeaderOffset the offset within <code>reader</code> at which the TIFF header starts
44 * @throws TiffProcessingException if an error occurred during the processing of TIFF data that could not be
45 * ignored or recovered from
46 * @throws IOException an error occurred while accessing the required data
47 */
48 public void processTiff(@NotNull final RandomAccessReader reader,
49 @NotNull final TiffHandler handler,
50 final int tiffHeaderOffset) throws TiffProcessingException, IOException
51 {
52 // This must be either "MM" or "II".
53 short byteOrderIdentifier = reader.getInt16(tiffHeaderOffset);
54
55 if (byteOrderIdentifier == 0x4d4d) { // "MM"
56 reader.setMotorolaByteOrder(true);
57 } else if (byteOrderIdentifier == 0x4949) { // "II"
58 reader.setMotorolaByteOrder(false);
59 } else {
60 throw new TiffProcessingException("Unclear distinction between Motorola/Intel byte ordering: " + byteOrderIdentifier);
61 }
62
63 // Check the next two values for correctness.
64 final int tiffMarker = reader.getUInt16(2 + tiffHeaderOffset);
65 handler.setTiffMarker(tiffMarker);
66
67 int firstIfdOffset = reader.getInt32(4 + tiffHeaderOffset) + tiffHeaderOffset;
68
69 // David Ekholm sent a digital camera image that has this problem
70 // TODO getLength should be avoided as it causes RandomAccessStreamReader to read to the end of the stream
71 if (firstIfdOffset >= reader.getLength() - 1) {
72 handler.warn("First IFD offset is beyond the end of the TIFF data segment -- trying default offset");
73 // First directory normally starts immediately after the offset bytes, so try that
74 firstIfdOffset = tiffHeaderOffset + 2 + 2 + 4;
75 }
76
77 Set<Integer> processedIfdOffsets = new HashSet<Integer>();
78 processIfd(handler, reader, processedIfdOffsets, firstIfdOffset, tiffHeaderOffset);
79
80 handler.completed(reader, tiffHeaderOffset);
81 }
82
83 /**
84 * Processes a TIFF IFD.
85 *
86 * IFD Header:
87 * <ul>
88 * <li><b>2 bytes</b> number of tags</li>
89 * </ul>
90 * Tag structure:
91 * <ul>
92 * <li><b>2 bytes</b> tag type</li>
93 * <li><b>2 bytes</b> format code (values 1 to 12, inclusive)</li>
94 * <li><b>4 bytes</b> component count</li>
95 * <li><b>4 bytes</b> inline value, or offset pointer if too large to fit in four bytes</li>
96 * </ul>
97 *
98 *
99 * @param handler the {@link com.drew.imaging.tiff.TiffHandler} that will coordinate processing and accept read values
100 * @param reader the {@link com.drew.lang.RandomAccessReader} from which the data should be read
101 * @param processedIfdOffsets the set of visited IFD offsets, to avoid revisiting the same IFD in an endless loop
102 * @param ifdOffset the offset within <code>reader</code> at which the IFD data starts
103 * @param tiffHeaderOffset the offset within <code>reader</code> at which the TIFF header starts
104 * @throws IOException an error occurred while accessing the required data
105 */
106 public static void processIfd(@NotNull final TiffHandler handler,
107 @NotNull final RandomAccessReader reader,
108 @NotNull final Set<Integer> processedIfdOffsets,
109 final int ifdOffset,
110 final int tiffHeaderOffset) throws IOException
111 {
112 try {
113 // check for directories we've already visited to avoid stack overflows when recursive/cyclic directory structures exist
114 if (processedIfdOffsets.contains(Integer.valueOf(ifdOffset))) {
115 return;
116 }
117
118 // remember that we've visited this directory so that we don't visit it again later
119 processedIfdOffsets.add(ifdOffset);
120
121 if (ifdOffset >= reader.getLength() || ifdOffset < 0) {
122 handler.error("Ignored IFD marked to start outside data segment");
123 return;
124 }
125
126 // First two bytes in the IFD are the number of tags in this directory
127 int dirTagCount = reader.getUInt16(ifdOffset);
128
129 int dirLength = (2 + (12 * dirTagCount) + 4);
130 if (dirLength + ifdOffset > reader.getLength()) {
131 handler.error("Illegally sized IFD");
132 return;
133 }
134
135 //
136 // Handle each tag in this directory
137 //
138 int invalidTiffFormatCodeCount = 0;
139 for (int tagNumber = 0; tagNumber < dirTagCount; tagNumber++) {
140 final int tagOffset = calculateTagOffset(ifdOffset, tagNumber);
141
142 // 2 bytes for the tag id
143 final int tagId = reader.getUInt16(tagOffset);
144
145 // 2 bytes for the format code
146 final int formatCode = reader.getUInt16(tagOffset + 2);
147 final TiffDataFormat format = TiffDataFormat.fromTiffFormatCode(formatCode);
148
149 if (format == null) {
150 // This error suggests that we are processing at an incorrect index and will generate
151 // rubbish until we go out of bounds (which may be a while). Exit now.
152 handler.error("Invalid TIFF tag format code: " + formatCode);
153 // TODO specify threshold as a parameter, or provide some other external control over this behaviour
154 if (++invalidTiffFormatCodeCount > 5) {
155 handler.error("Stopping processing as too many errors seen in TIFF IFD");
156 return;
157 }
158 continue;
159 }
160
161 // 4 bytes dictate the number of components in this tag's data
162 final int componentCount = reader.getInt32(tagOffset + 4);
163 if (componentCount < 0) {
164 handler.error("Negative TIFF tag component count");
165 continue;
166 }
167
168 final int byteCount = componentCount * format.getComponentSizeBytes();
169
170 final int tagValueOffset;
171 if (byteCount > 4) {
172 // If it's bigger than 4 bytes, the dir entry contains an offset.
173 final int offsetVal = reader.getInt32(tagOffset + 8);
174 if (offsetVal + byteCount > reader.getLength()) {
175 // Bogus pointer offset and / or byteCount value
176 handler.error("Illegal TIFF tag pointer offset");
177 continue;
178 }
179 tagValueOffset = tiffHeaderOffset + offsetVal;
180 } else {
181 // 4 bytes or less and value is in the dir entry itself.
182 tagValueOffset = tagOffset + 8;
183 }
184
185 if (tagValueOffset < 0 || tagValueOffset > reader.getLength()) {
186 handler.error("Illegal TIFF tag pointer offset");
187 continue;
188 }
189
190 // Check that this tag isn't going to allocate outside the bounds of the data array.
191 // This addresses an uncommon OutOfMemoryError.
192 if (byteCount < 0 || tagValueOffset + byteCount > reader.getLength()) {
193 handler.error("Illegal number of bytes for TIFF tag data: " + byteCount);
194 continue;
195 }
196
197 //
198 // Special handling for tags that point to other IFDs
199 //
200 if (byteCount == 4 && handler.isTagIfdPointer(tagId)) {
201 final int subDirOffset = tiffHeaderOffset + reader.getInt32(tagValueOffset);
202 processIfd(handler, reader, processedIfdOffsets, subDirOffset, tiffHeaderOffset);
203 } else {
204 if (!handler.customProcessTag(tagValueOffset, processedIfdOffsets, tiffHeaderOffset, reader, tagId, byteCount)) {
205 processTag(handler, tagId, tagValueOffset, componentCount, formatCode, reader);
206 }
207 }
208 }
209
210 // at the end of each IFD is an optional link to the next IFD
211 final int finalTagOffset = calculateTagOffset(ifdOffset, dirTagCount);
212 int nextIfdOffset = reader.getInt32(finalTagOffset);
213 if (nextIfdOffset != 0) {
214 nextIfdOffset += tiffHeaderOffset;
215 if (nextIfdOffset >= reader.getLength()) {
216 // Last 4 bytes of IFD reference another IFD with an address that is out of bounds
217 // Note this could have been caused by jhead 1.3 cropping too much
218 return;
219 } else if (nextIfdOffset < ifdOffset) {
220 // TODO is this a valid restriction?
221 // Last 4 bytes of IFD reference another IFD with an address that is before the start of this directory
222 return;
223 }
224
225 if (handler.hasFollowerIfd()) {
226 processIfd(handler, reader, processedIfdOffsets, nextIfdOffset, tiffHeaderOffset);
227 }
228 }
229 } finally {
230 handler.endingIFD();
231 }
232 }
233
234 private static void processTag(@NotNull final TiffHandler handler,
235 final int tagId,
236 final int tagValueOffset,
237 final int componentCount,
238 final int formatCode,
239 @NotNull final RandomAccessReader reader) throws IOException
240 {
241 switch (formatCode) {
242 case TiffDataFormat.CODE_UNDEFINED:
243 // this includes exif user comments
244 handler.setByteArray(tagId, reader.getBytes(tagValueOffset, componentCount));
245 break;
246 case TiffDataFormat.CODE_STRING:
247 handler.setString(tagId, reader.getNullTerminatedString(tagValueOffset, componentCount));
248 break;
249 case TiffDataFormat.CODE_RATIONAL_S:
250 if (componentCount == 1) {
251 handler.setRational(tagId, new Rational(reader.getInt32(tagValueOffset), reader.getInt32(tagValueOffset + 4)));
252 } else if (componentCount > 1) {
253 Rational[] array = new Rational[componentCount];
254 for (int i = 0; i < componentCount; i++)
255 array[i] = new Rational(reader.getInt32(tagValueOffset + (8 * i)), reader.getInt32(tagValueOffset + 4 + (8 * i)));
256 handler.setRationalArray(tagId, array);
257 }
258 break;
259 case TiffDataFormat.CODE_RATIONAL_U:
260 if (componentCount == 1) {
261 handler.setRational(tagId, new Rational(reader.getUInt32(tagValueOffset), reader.getUInt32(tagValueOffset + 4)));
262 } else if (componentCount > 1) {
263 Rational[] array = new Rational[componentCount];
264 for (int i = 0; i < componentCount; i++)
265 array[i] = new Rational(reader.getUInt32(tagValueOffset + (8 * i)), reader.getUInt32(tagValueOffset + 4 + (8 * i)));
266 handler.setRationalArray(tagId, array);
267 }
268 break;
269 case TiffDataFormat.CODE_SINGLE:
270 if (componentCount == 1) {
271 handler.setFloat(tagId, reader.getFloat32(tagValueOffset));
272 } else {
273 float[] array = new float[componentCount];
274 for (int i = 0; i < componentCount; i++)
275 array[i] = reader.getFloat32(tagValueOffset + (i * 4));
276 handler.setFloatArray(tagId, array);
277 }
278 break;
279 case TiffDataFormat.CODE_DOUBLE:
280 if (componentCount == 1) {
281 handler.setDouble(tagId, reader.getDouble64(tagValueOffset));
282 } else {
283 double[] array = new double[componentCount];
284 for (int i = 0; i < componentCount; i++)
285 array[i] = reader.getDouble64(tagValueOffset + (i * 4));
286 handler.setDoubleArray(tagId, array);
287 }
288 break;
289 case TiffDataFormat.CODE_INT8_S:
290 if (componentCount == 1) {
291 handler.setInt8s(tagId, reader.getInt8(tagValueOffset));
292 } else {
293 byte[] array = new byte[componentCount];
294 for (int i = 0; i < componentCount; i++)
295 array[i] = reader.getInt8(tagValueOffset + i);
296 handler.setInt8sArray(tagId, array);
297 }
298 break;
299 case TiffDataFormat.CODE_INT8_U:
300 if (componentCount == 1) {
301 handler.setInt8u(tagId, reader.getUInt8(tagValueOffset));
302 } else {
303 short[] array = new short[componentCount];
304 for (int i = 0; i < componentCount; i++)
305 array[i] = reader.getUInt8(tagValueOffset + i);
306 handler.setInt8uArray(tagId, array);
307 }
308 break;
309 case TiffDataFormat.CODE_INT16_S:
310 if (componentCount == 1) {
311 handler.setInt16s(tagId, (int)reader.getInt16(tagValueOffset));
312 } else {
313 short[] array = new short[componentCount];
314 for (int i = 0; i < componentCount; i++)
315 array[i] = reader.getInt16(tagValueOffset + (i * 2));
316 handler.setInt16sArray(tagId, array);
317 }
318 break;
319 case TiffDataFormat.CODE_INT16_U:
320 if (componentCount == 1) {
321 handler.setInt16u(tagId, reader.getUInt16(tagValueOffset));
322 } else {
323 int[] array = new int[componentCount];
324 for (int i = 0; i < componentCount; i++)
325 array[i] = reader.getUInt16(tagValueOffset + (i * 2));
326 handler.setInt16uArray(tagId, array);
327 }
328 break;
329 case TiffDataFormat.CODE_INT32_S:
330 // NOTE 'long' in this case means 32 bit, not 64
331 if (componentCount == 1) {
332 handler.setInt32s(tagId, reader.getInt32(tagValueOffset));
333 } else {
334 int[] array = new int[componentCount];
335 for (int i = 0; i < componentCount; i++)
336 array[i] = reader.getInt32(tagValueOffset + (i * 4));
337 handler.setInt32sArray(tagId, array);
338 }
339 break;
340 case TiffDataFormat.CODE_INT32_U:
341 // NOTE 'long' in this case means 32 bit, not 64
342 if (componentCount == 1) {
343 handler.setInt32u(tagId, reader.getUInt32(tagValueOffset));
344 } else {
345 long[] array = new long[componentCount];
346 for (int i = 0; i < componentCount; i++)
347 array[i] = reader.getUInt32(tagValueOffset + (i * 4));
348 handler.setInt32uArray(tagId, array);
349 }
350 break;
351 default:
352 handler.error(String.format("Unknown format code %d for tag %d", formatCode, tagId));
353 }
354 }
355
356 /**
357 * Determine the offset of a given tag within the specified IFD.
358 *
359 * @param ifdStartOffset the offset at which the IFD starts
360 * @param entryNumber the zero-based entry number
361 */
362 private static int calculateTagOffset(int ifdStartOffset, int entryNumber)
363 {
364 // Add 2 bytes for the tag count.
365 // Each entry is 12 bytes.
366 return ifdStartOffset + 2 + (12 * entryNumber);
367 }
368}
Note: See TracBrowser for help on using the repository browser.