source: josm/trunk/src/com/drew/metadata/iptc/IptcReader.java@ 12187

Last change on this file since 12187 was 10862, checked in by Don-vip, 8 years ago

update to metadata-extractor 2.9.1

File size: 9.0 KB
Line 
1/*
2 * Copyright 2002-2016 Drew Noakes
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 *
16 * More information about this project is available at:
17 *
18 * https://drewnoakes.com/code/exif/
19 * https://github.com/drewnoakes/metadata-extractor
20 */
21package com.drew.metadata.iptc;
22
23import com.drew.imaging.jpeg.JpegSegmentMetadataReader;
24import com.drew.imaging.jpeg.JpegSegmentType;
25import com.drew.lang.SequentialByteArrayReader;
26import com.drew.lang.SequentialReader;
27import com.drew.lang.annotations.NotNull;
28import com.drew.lang.annotations.Nullable;
29import com.drew.metadata.Directory;
30import com.drew.metadata.Metadata;
31
32import java.io.IOException;
33import java.util.Collections;
34
35/**
36 * Decodes IPTC binary data, populating a {@link Metadata} object with tag values in an {@link IptcDirectory}.
37 * <p>
38 * http://www.iptc.org/std/IIM/4.1/specification/IIMV4.1.pdf
39 *
40 * @author Drew Noakes https://drewnoakes.com
41 */
42public class IptcReader implements JpegSegmentMetadataReader
43{
44 // TODO consider breaking the IPTC section up into multiple directories and providing segregation of each IPTC directory
45/*
46 public static final int DIRECTORY_IPTC = 2;
47
48 public static final int ENVELOPE_RECORD = 1;
49 public static final int APPLICATION_RECORD_2 = 2;
50 public static final int APPLICATION_RECORD_3 = 3;
51 public static final int APPLICATION_RECORD_4 = 4;
52 public static final int APPLICATION_RECORD_5 = 5;
53 public static final int APPLICATION_RECORD_6 = 6;
54 public static final int PRE_DATA_RECORD = 7;
55 public static final int DATA_RECORD = 8;
56 public static final int POST_DATA_RECORD = 9;
57*/
58
59 @NotNull
60 public Iterable<JpegSegmentType> getSegmentTypes()
61 {
62 return Collections.singletonList(JpegSegmentType.APPD);
63 }
64
65 public void readJpegSegments(@NotNull Iterable<byte[]> segments, @NotNull Metadata metadata, @NotNull JpegSegmentType segmentType)
66 {
67 for (byte[] segmentBytes : segments) {
68 // Ensure data starts with the IPTC marker byte
69 if (segmentBytes.length != 0 && segmentBytes[0] == 0x1c) {
70 extract(new SequentialByteArrayReader(segmentBytes), metadata, segmentBytes.length);
71 }
72 }
73 }
74
75 /**
76 * Performs the IPTC data extraction, adding found values to the specified instance of {@link Metadata}.
77 */
78 public void extract(@NotNull final SequentialReader reader, @NotNull final Metadata metadata, long length)
79 {
80 extract(reader, metadata, length, null);
81 }
82
83 /**
84 * Performs the IPTC data extraction, adding found values to the specified instance of {@link Metadata}.
85 */
86 public void extract(@NotNull final SequentialReader reader, @NotNull final Metadata metadata, long length, @Nullable Directory parentDirectory)
87 {
88 IptcDirectory directory = new IptcDirectory();
89 metadata.addDirectory(directory);
90
91 if (parentDirectory != null)
92 directory.setParent(parentDirectory);
93
94 int offset = 0;
95
96 // for each tag
97 while (offset < length) {
98
99 // identifies start of a tag
100 short startByte;
101 try {
102 startByte = reader.getUInt8();
103 offset++;
104 } catch (IOException e) {
105 directory.addError("Unable to read starting byte of IPTC tag");
106 return;
107 }
108
109 if (startByte != 0x1c) {
110 // NOTE have seen images where there was one extra byte at the end, giving
111 // offset==length at this point, which is not worth logging as an error.
112 if (offset != length)
113 directory.addError("Invalid IPTC tag marker at offset " + (offset - 1) + ". Expected '0x1c' but got '0x" + Integer.toHexString(startByte) + "'.");
114 return;
115 }
116
117 // we need at least five bytes left to read a tag
118 if (offset + 5 > length) {
119 directory.addError("Too few bytes remain for a valid IPTC tag");
120 return;
121 }
122
123 int directoryType;
124 int tagType;
125 int tagByteCount;
126 try {
127 directoryType = reader.getUInt8();
128 tagType = reader.getUInt8();
129 // TODO support Extended DataSet Tag (see 1.5(c), p14, IPTC-IIMV4.2.pdf)
130 tagByteCount = reader.getUInt16();
131 offset += 4;
132 } catch (IOException e) {
133 directory.addError("IPTC data segment ended mid-way through tag descriptor");
134 return;
135 }
136
137 if (offset + tagByteCount > length) {
138 directory.addError("Data for tag extends beyond end of IPTC segment");
139 return;
140 }
141
142 try {
143 processTag(reader, directory, directoryType, tagType, tagByteCount);
144 } catch (IOException e) {
145 directory.addError("Error processing IPTC tag");
146 return;
147 }
148
149 offset += tagByteCount;
150 }
151 }
152
153 private void processTag(@NotNull SequentialReader reader, @NotNull Directory directory, int directoryType, int tagType, int tagByteCount) throws IOException
154 {
155 int tagIdentifier = tagType | (directoryType << 8);
156
157 // Some images have been seen that specify a zero byte tag, which cannot be of much use.
158 // We elect here to completely ignore the tag. The IPTC specification doesn't mention
159 // anything about the interpretation of this situation.
160 // https://raw.githubusercontent.com/wiki/drewnoakes/metadata-extractor/docs/IPTC-IIMV4.2.pdf
161 if (tagByteCount == 0) {
162 directory.setString(tagIdentifier, "");
163 return;
164 }
165
166 String string = null;
167
168 switch (tagIdentifier) {
169 case IptcDirectory.TAG_CODED_CHARACTER_SET:
170 byte[] bytes = reader.getBytes(tagByteCount);
171 String charset = Iso2022Converter.convertISO2022CharsetToJavaCharset(bytes);
172 if (charset == null) {
173 // Unable to determine the charset, so fall through and treat tag as a regular string
174 string = new String(bytes);
175 break;
176 }
177 directory.setString(tagIdentifier, charset);
178 return;
179 case IptcDirectory.TAG_ENVELOPE_RECORD_VERSION:
180 case IptcDirectory.TAG_APPLICATION_RECORD_VERSION:
181 case IptcDirectory.TAG_FILE_VERSION:
182 case IptcDirectory.TAG_ARM_VERSION:
183 case IptcDirectory.TAG_PROGRAM_VERSION:
184 // short
185 if (tagByteCount >= 2) {
186 int shortValue = reader.getUInt16();
187 reader.skip(tagByteCount - 2);
188 directory.setInt(tagIdentifier, shortValue);
189 return;
190 }
191 break;
192 case IptcDirectory.TAG_URGENCY:
193 // byte
194 directory.setInt(tagIdentifier, reader.getUInt8());
195 reader.skip(tagByteCount - 1);
196 return;
197 default:
198 // fall through
199 }
200
201 // If we haven't returned yet, treat it as a string
202 // NOTE that there's a chance we've already loaded the value as a string above, but failed to parse the value
203 if (string == null) {
204 String encoding = directory.getString(IptcDirectory.TAG_CODED_CHARACTER_SET);
205 if (encoding != null) {
206 string = reader.getString(tagByteCount, encoding);
207 } else {
208 byte[] bytes = reader.getBytes(tagByteCount);
209 encoding = Iso2022Converter.guessEncoding(bytes);
210 string = encoding != null ? new String(bytes, encoding) : new String(bytes);
211 }
212 }
213
214 if (directory.containsTag(tagIdentifier)) {
215 // this fancy string[] business avoids using an ArrayList for performance reasons
216 String[] oldStrings = directory.getStringArray(tagIdentifier);
217 String[] newStrings;
218 if (oldStrings == null) {
219 // TODO hitting this block means any prior value(s) are discarded
220 newStrings = new String[1];
221 } else {
222 newStrings = new String[oldStrings.length + 1];
223 System.arraycopy(oldStrings, 0, newStrings, 0, oldStrings.length);
224 }
225 newStrings[newStrings.length - 1] = string;
226 directory.setStringArray(tagIdentifier, newStrings);
227 } else {
228 directory.setString(tagIdentifier, string);
229 }
230 }
231}
Note: See TracBrowser for help on using the repository browser.