/* * EXIFExtractor.java * * This class based upon code from Jhead, a C program for extracting and * manipulating the Exif data within files written by Matthias Wandel. * http://www.sentex.net/~mwandel/jhead/ * * Jhead is public domain software - that is, you can do whatever you want * with it, and include it software that is licensed under the GNU or the * BSD license, or whatever other licence you choose, including proprietary * closed source licenses. Similarly, I release this Java version under the * same license, though I do ask that you leave this header in tact. * * If you make modifications to this code that you think would benefit the * wider community, please send me a copy and I'll post it on my site. Unlike * Jhead, this code (as it stands) only supports reading of Exif data - no * manipulation, and no thumbnail stuff. * * If you make use of this code, I'd appreciate hearing about it. * drew.noakes@drewnoakes.com * Latest version of this software kept at * http://drewnoakes.com/ * * Created on 28 April 2002, 23:54 * Modified 04 Aug 2002 * - Renamed constants to be inline with changes to ExifTagValues interface * - Substituted usage of JDK 1.4 features (java.nio package) * Modified 29 Oct 2002 (v1.2) * - Proper traversing of Exif file structure and complete refactor & tidy of * the codebase (a few unnoticed bugs removed) * - Reads makernote data for 6 families of camera (5 makes) * - Tags now stored in directories... use the IFD_* constants to refer to the * image file directory you require (Exif, Interop, GPS and Makernote*) -- * this avoids collisions where two tags share the same code * - Takes componentCount of unknown tags into account * - Now understands GPS tags (thanks to Colin Briton for his help with this) * - Some other bug fixes, pointed out by users around the world. Thanks! * Modified 27 Nov 2002 (v2.0) * - Renamed to ExifReader * - Moved to new package com.drew.metadata.exif * Modified since, however changes have not been logged. See release notes for * library-wide modifications. */ package com.drew.metadata.exif; import com.drew.imaging.jpeg.JpegProcessingException; import com.drew.imaging.jpeg.JpegSegmentData; import com.drew.imaging.jpeg.JpegSegmentReader; import com.drew.lang.Rational; import com.drew.metadata.Directory; import com.drew.metadata.Metadata; import com.drew.metadata.MetadataReader; import java.io.File; import java.io.InputStream; import java.util.HashMap; /** * Extracts Exif data from a JPEG header segment, providing information about the * camera/scanner/capture device (if available). Information is encapsulated in * an Metadata object. * @author Drew Noakes http://drewnoakes.com */ public class ExifReader implements MetadataReader { /** * The JPEG segment as an array of bytes. */ private final byte[] _data; /** * Represents the native byte ordering used in the JPEG segment. If true, * then we're using Motorolla ordering (Big endian), else we're using Intel * ordering (Little endian). */ private boolean _isMotorollaByteOrder; /** * Bean instance to store information about the image and camera/scanner/capture * device. */ private Metadata _metadata; /** * The number of bytes used per format descriptor. */ private static final int[] BYTES_PER_FORMAT = {0, 1, 1, 2, 4, 8, 1, 1, 2, 4, 8, 4, 8}; /** * The number of formats known. */ private static final int MAX_FORMAT_CODE = 12; // Format types // Note: Cannot use the DataFormat enumeration in the case statement that uses these tags. // Is there a better way? private static final int FMT_BYTE = 1; private static final int FMT_STRING = 2; private static final int FMT_USHORT = 3; private static final int FMT_ULONG = 4; private static final int FMT_URATIONAL = 5; private static final int FMT_SBYTE = 6; private static final int FMT_UNDEFINED = 7; private static final int FMT_SSHORT = 8; private static final int FMT_SLONG = 9; private static final int FMT_SRATIONAL = 10; private static final int FMT_SINGLE = 11; private static final int FMT_DOUBLE = 12; public static final int TAG_EXIF_OFFSET = 0x8769; public static final int TAG_INTEROP_OFFSET = 0xA005; public static final int TAG_GPS_INFO_OFFSET = 0x8825; public static final int TAG_MAKER_NOTE = 0x927C; public static final int TIFF_HEADER_START_OFFSET = 6; /** * Creates an ExifReader for a JpegSegmentData object. * @param segmentData */ public ExifReader(JpegSegmentData segmentData) { this(segmentData.getSegment(JpegSegmentReader.SEGMENT_APP1)); } /** * Creates an ExifReader for a Jpeg file. * @param file * @throws JpegProcessingException */ public ExifReader(File file) throws JpegProcessingException { this(new JpegSegmentReader(file).readSegment(JpegSegmentReader.SEGMENT_APP1)); } /** * Creates an ExifReader for a Jpeg stream. * @param is JPEG stream. Stream will be closed. */ public ExifReader(InputStream is) throws JpegProcessingException { this(new JpegSegmentReader(is).readSegment(JpegSegmentReader.SEGMENT_APP1)); } /** * Creates an ExifReader for the given JPEG header segment. */ public ExifReader(byte[] data) { _data = data; } /** * Performs the Exif data extraction, returning a new instance of Metadata. */ public Metadata extract() { return extract(new Metadata()); } /** * Performs the Exif data extraction, adding found values to the specified * instance of Metadata. */ public Metadata extract(Metadata metadata) { _metadata = metadata; if (_data==null) return _metadata; // once we know there's some data, create the directory and start working on it ExifDirectory directory = (ExifDirectory)_metadata.getDirectory(ExifDirectory.class); // check for the header length if (_data.length<=14) { directory.addError("Exif data segment must contain at least 14 bytes"); return _metadata; } // check for the header preamble if (!"Exif\0\0".equals(new String(_data, 0, 6))) { directory.addError("Exif data segment doesn't begin with 'Exif'"); return _metadata; } // this should be either "MM" or "II" String byteOrderIdentifier = new String(_data, 6, 2); if (!setByteOrder(byteOrderIdentifier)) { directory.addError("Unclear distinction between Motorola/Intel byte ordering: " + byteOrderIdentifier); return _metadata; } // Check the next two values for correctness. if (get16Bits(8)!=0x2a) { directory.addError("Invalid Exif start - should have 0x2A at offset 8 in Exif header"); return _metadata; } int firstDirectoryOffset = get32Bits(10) + TIFF_HEADER_START_OFFSET; // David Ekholm sent an digital camera image that has this problem if (firstDirectoryOffset>=_data.length - 1) { directory.addError("First exif directory offset is beyond end of Exif data segment"); // First directory normally starts 14 bytes in -- try it here and catch another error in the worst case firstDirectoryOffset = 14; } HashMap processedDirectoryOffsets = new HashMap(); // 0th IFD (we merge with Exif IFD) processDirectory(directory, processedDirectoryOffsets, firstDirectoryOffset, TIFF_HEADER_START_OFFSET); // after the extraction process, if we have the correct tags, we may be able to store thumbnail information storeThumbnailBytes(directory, TIFF_HEADER_START_OFFSET); return _metadata; } private void storeThumbnailBytes(ExifDirectory exifDirectory, int tiffHeaderOffset) { if (!exifDirectory.containsTag(ExifDirectory.TAG_COMPRESSION)) return; if (!exifDirectory.containsTag(ExifDirectory.TAG_THUMBNAIL_LENGTH) || !exifDirectory.containsTag(ExifDirectory.TAG_THUMBNAIL_OFFSET)) return; try { int offset = exifDirectory.getInt(ExifDirectory.TAG_THUMBNAIL_OFFSET); int length = exifDirectory.getInt(ExifDirectory.TAG_THUMBNAIL_LENGTH); byte[] result = new byte[length]; for (int i = 0; i=_data.length || dirStartOffset<0) { directory.addError("Ignored directory marked to start outside data segement"); return; } if (!isDirectoryLengthValid(dirStartOffset, tiffHeaderOffset)) { directory.addError("Illegally sized directory"); return; } // First two bytes in the IFD are the number of tags in this directory int dirTagCount = get16Bits(dirStartOffset); // Handle each tag in this directory for (int tagNumber = 0; tagNumberMAX_FORMAT_CODE) { directory.addError("Invalid format code: " + formatCode); continue; } // 4 bytes dictate the number of components in this tag's data final int componentCount = get32Bits(tagOffset + 4); if (componentCount<0) { directory.addError("Negative component count in EXIF"); continue; } // each component may have more than one byte... calculate the total number of bytes final int byteCount = componentCount * BYTES_PER_FORMAT[formatCode]; final int tagValueOffset = calculateTagValueOffset(byteCount, tagOffset, tiffHeaderOffset); if (tagValueOffset<0 || tagValueOffset > _data.length) { directory.addError("Illegal pointer offset value in EXIF"); continue; } // Check that this tag isn't going to allocate outside the bounds of the data array. // This addresses an uncommon OutOfMemoryError. if (byteCount < 0 || tagValueOffset + byteCount > _data.length) { directory.addError("Illegal number of bytes: " + byteCount); continue; } // Calculate the value as an offset for cases where the tag represents directory final int subdirOffset = tiffHeaderOffset + get32Bits(tagValueOffset); switch (tagType) { case TAG_EXIF_OFFSET: processDirectory(_metadata.getDirectory(ExifDirectory.class), processedDirectoryOffsets, subdirOffset, tiffHeaderOffset); continue; case TAG_INTEROP_OFFSET: processDirectory(_metadata.getDirectory(ExifInteropDirectory.class), processedDirectoryOffsets, subdirOffset, tiffHeaderOffset); continue; case TAG_GPS_INFO_OFFSET: processDirectory(_metadata.getDirectory(GpsDirectory.class), processedDirectoryOffsets, subdirOffset, tiffHeaderOffset); continue; case TAG_MAKER_NOTE: processMakerNote(tagValueOffset, processedDirectoryOffsets, tiffHeaderOffset); continue; default: processTag(directory, tagType, tagValueOffset, componentCount, formatCode); break; } } // at the end of each IFD is an optional link to the next IFD final int finalTagOffset = calculateTagOffset(dirStartOffset, dirTagCount); int nextDirectoryOffset = get32Bits(finalTagOffset); if (nextDirectoryOffset!=0) { nextDirectoryOffset += tiffHeaderOffset; if (nextDirectoryOffset>=_data.length) { // Last 4 bytes of IFD reference another IFD with an address that is out of bounds // Note this could have been caused by jhead 1.3 cropping too much return; } else if (nextDirectoryOffset < dirStartOffset) { // Last 4 bytes of IFD reference another IFD with an address that is before the start of this directory return; } // the next directory is of same type as this one processDirectory(directory, processedDirectoryOffsets, nextDirectoryOffset, tiffHeaderOffset); } } private void processMakerNote(int subdirOffset, HashMap processedDirectoryOffsets, int tiffHeaderOffset) { // Determine the camera model and makernote format Directory exifDirectory = _metadata.getDirectory(ExifDirectory.class); if (exifDirectory==null) return; String cameraModel = exifDirectory.getString(ExifDirectory.TAG_MAKE); final String firstTwoChars = new String(_data, subdirOffset, 2); final String firstThreeChars = new String(_data, subdirOffset, 3); final String firstFourChars = new String(_data, subdirOffset, 4); final String firstFiveChars = new String(_data, subdirOffset, 5); final String firstSixChars = new String(_data, subdirOffset, 6); final String firstSevenChars = new String(_data, subdirOffset, 7); final String firstEightChars = new String(_data, subdirOffset, 8); if ("OLYMP".equals(firstFiveChars) || "EPSON".equals(firstFiveChars) || "AGFA".equals(firstFourChars)) { // Olympus Makernote // Epson and Agfa use Olypus maker note standard, see: // http://www.ozhiker.com/electronics/pjmt/jpeg_info/ processDirectory(_metadata.getDirectory(OlympusMakernoteDirectory.class), processedDirectoryOffsets, subdirOffset + 8, tiffHeaderOffset); } else if (cameraModel!=null && cameraModel.trim().toUpperCase().startsWith("NIKON")) { if ("Nikon".equals(firstFiveChars)) { /* There are two scenarios here: * Type 1: ** * :0000: 4E 69 6B 6F 6E 00 01 00-05 00 02 00 02 00 06 00 Nikon........... * :0010: 00 00 EC 02 00 00 03 00-03 00 01 00 00 00 06 00 ................ * Type 3: ** * :0000: 4E 69 6B 6F 6E 00 02 00-00 00 4D 4D 00 2A 00 00 Nikon....MM.*... * :0010: 00 08 00 1E 00 01 00 07-00 00 00 04 30 32 30 30 ............0200 */ if (_data[subdirOffset+6]==1) processDirectory(_metadata.getDirectory(NikonType1MakernoteDirectory.class), processedDirectoryOffsets, subdirOffset + 8, tiffHeaderOffset); else if (_data[subdirOffset+6]==2) processDirectory(_metadata.getDirectory(NikonType2MakernoteDirectory.class), processedDirectoryOffsets, subdirOffset + 18, subdirOffset + 10); else exifDirectory.addError("Unsupported makernote data ignored."); } else { // The IFD begins with the first MakerNote byte (no ASCII name). This occurs with CoolPix 775, E990 and D1 models. processDirectory(_metadata.getDirectory(NikonType2MakernoteDirectory.class), processedDirectoryOffsets, subdirOffset, tiffHeaderOffset); } } else if ("SONY CAM".equals(firstEightChars) || "SONY DSC".equals(firstEightChars)) { processDirectory(_metadata.getDirectory(SonyMakernoteDirectory.class), processedDirectoryOffsets, subdirOffset + 12, tiffHeaderOffset); } else if ("KDK".equals(firstThreeChars)) { processDirectory(_metadata.getDirectory(KodakMakernoteDirectory.class), processedDirectoryOffsets, subdirOffset + 20, tiffHeaderOffset); } else if ("Canon".equalsIgnoreCase(cameraModel)) { processDirectory(_metadata.getDirectory(CanonMakernoteDirectory.class), processedDirectoryOffsets, subdirOffset, tiffHeaderOffset); } else if (cameraModel!=null && cameraModel.toUpperCase().startsWith("CASIO")) { if ("QVC\u0000\u0000\u0000".equals(firstSixChars)) processDirectory(_metadata.getDirectory(CasioType2MakernoteDirectory.class), processedDirectoryOffsets, subdirOffset + 6, tiffHeaderOffset); else processDirectory(_metadata.getDirectory(CasioType1MakernoteDirectory.class), processedDirectoryOffsets, subdirOffset, tiffHeaderOffset); } else if ("FUJIFILM".equals(firstEightChars) || "Fujifilm".equalsIgnoreCase(cameraModel)) { // TODO make this field a passed parameter, to avoid threading issues boolean byteOrderBefore = _isMotorollaByteOrder; // bug in fujifilm makernote ifd means we temporarily use Intel byte ordering _isMotorollaByteOrder = false; // the 4 bytes after "FUJIFILM" in the makernote point to the start of the makernote // IFD, though the offset is relative to the start of the makernote, not the TIFF // header (like everywhere else) int ifdStart = subdirOffset + get32Bits(subdirOffset + 8); processDirectory(_metadata.getDirectory(FujifilmMakernoteDirectory.class), processedDirectoryOffsets, ifdStart, tiffHeaderOffset); _isMotorollaByteOrder = byteOrderBefore; } else if (cameraModel!=null && cameraModel.toUpperCase().startsWith("MINOLTA")) { // Cases seen with the model starting with MINOLTA in capitals seem to have a valid Olympus makernote // area that commences immediately. processDirectory(_metadata.getDirectory(OlympusMakernoteDirectory.class), processedDirectoryOffsets, subdirOffset, tiffHeaderOffset); } else if ("KC".equals(firstTwoChars) || "MINOL".equals(firstFiveChars) || "MLY".equals(firstThreeChars) || "+M+M+M+M".equals(firstEightChars)) { // This Konica data is not understood. Header identified in accordance with information at this site: // http://www.ozhiker.com/electronics/pjmt/jpeg_info/minolta_mn.html // TODO determine how to process the information described at the above website exifDirectory.addError("Unsupported Konica/Minolta data ignored."); } else if ("KYOCERA".equals(firstSevenChars)) { // http://www.ozhiker.com/electronics/pjmt/jpeg_info/kyocera_mn.html processDirectory(_metadata.getDirectory(KyoceraMakernoteDirectory.class), processedDirectoryOffsets, subdirOffset + 22, tiffHeaderOffset); } else if ("Panasonic\u0000\u0000\u0000".equals(new String(_data, subdirOffset, 12))) { // NON-Standard TIFF IFD Data using Panasonic Tags. There is no Next-IFD pointer after the IFD // Offsets are relative to the start of the TIFF header at the beginning of the EXIF segment // more information here: http://www.ozhiker.com/electronics/pjmt/jpeg_info/panasonic_mn.html processDirectory(_metadata.getDirectory(PanasonicMakernoteDirectory.class), processedDirectoryOffsets, subdirOffset + 12, tiffHeaderOffset); } else if ("AOC\u0000".equals(firstFourChars)) { // NON-Standard TIFF IFD Data using Casio Type 2 Tags // IFD has no Next-IFD pointer at end of IFD, and // Offsets are relative to the start of the current IFD tag, not the TIFF header // Observed for: // - Pentax ist D processDirectory(_metadata.getDirectory(CasioType2MakernoteDirectory.class), processedDirectoryOffsets, subdirOffset + 6, subdirOffset); } else if (cameraModel!=null && (cameraModel.toUpperCase().startsWith("PENTAX") || cameraModel.toUpperCase().startsWith("ASAHI"))) { // NON-Standard TIFF IFD Data using Pentax Tags // IFD has no Next-IFD pointer at end of IFD, and // Offsets are relative to the start of the current IFD tag, not the TIFF header // Observed for: // - PENTAX Optio 330 // - PENTAX Optio 430 processDirectory(_metadata.getDirectory(PentaxMakernoteDirectory.class), processedDirectoryOffsets, subdirOffset, subdirOffset); } else { // TODO how to store makernote data when it's not from a supported camera model? // this is difficult as the starting offset is not known. we could look for it... exifDirectory.addError("Unsupported makernote data ignored."); } } private boolean isDirectoryLengthValid(int dirStartOffset, int tiffHeaderOffset) { int dirTagCount = get16Bits(dirStartOffset); int dirLength = (2 + (12 * dirTagCount) + 4); if (dirLength + dirStartOffset + tiffHeaderOffset>=_data.length) { // Note: Files that had thumbnails trimmed with jhead 1.3 or earlier might trigger this return false; } return true; } private void processTag(Directory directory, int tagType, int tagValueOffset, int componentCount, int formatCode) { // Directory simply stores raw values // The display side uses a Descriptor class per directory to turn the raw values into 'pretty' descriptions switch (formatCode) { case FMT_UNDEFINED: // this includes exif user comments final byte[] tagBytes = new byte[componentCount]; final int byteCount = componentCount * BYTES_PER_FORMAT[formatCode]; for (int i=0; i4) { // If its bigger than 4 bytes, the dir entry contains an offset. // dirEntryOffset must be passed, as some makernote implementations (e.g. FujiFilm) incorrectly use an // offset relative to the start of the makernote itself, not the TIFF segment. final int offsetVal = get32Bits(dirEntryOffset + 8); if (offsetVal + byteCount>_data.length) { // Bogus pointer offset and / or bytecount value return -1; // signal error } return tiffHeaderOffset + offsetVal; } else { // 4 bytes or less and value is in the dir entry itself return dirEntryOffset + 8; } } /** * Creates a String from the _data buffer starting at the specified offset, * and ending where byte=='\0' or where length==maxLength. */ private String readString(int offset, int maxLength) { int length = 0; while ((offset + length)<_data.length && _data[offset + length]!='\0' && length_data.length) throw new ArrayIndexOutOfBoundsException("attempt to read data outside of exif segment (index " + offset + " where max index is " + (_data.length - 1) + ")"); if (_isMotorollaByteOrder) { // Motorola - MSB first return (_data[offset] << 8 & 0xFF00) | (_data[offset + 1] & 0xFF); } else { // Intel ordering - LSB first return (_data[offset + 1] << 8 & 0xFF00) | (_data[offset] & 0xFF); } } /** * Get a 32 bit value from file's native byte order. */ private int get32Bits(int offset) { if (offset<0 || offset+4>_data.length) throw new ArrayIndexOutOfBoundsException("attempt to read data outside of exif segment (index " + offset + " where max index is " + (_data.length - 1) + ")"); if (_isMotorollaByteOrder) { // Motorola - MSB first return (_data[offset] << 24 & 0xFF000000) | (_data[offset + 1] << 16 & 0xFF0000) | (_data[offset + 2] << 8 & 0xFF00) | (_data[offset + 3] & 0xFF); } else { // Intel ordering - LSB first return (_data[offset + 3] << 24 & 0xFF000000) | (_data[offset + 2] << 16 & 0xFF0000) | (_data[offset + 1] << 8 & 0xFF00) | (_data[offset] & 0xFF); } } }