Context Navigation

← Previous Revision
Latest Revision
Next Revision →
Blame
Revision Log

source: josm/trunk/src/com/drew/metadata/iptc/Iso2022Converter.java@ 10746

Last change on this file since 10746 was 8132, checked in by Don-vip, 10 years ago
fix #11162 - update to metadata-extractor 2.7.2
File size: 2.9 KB

Line
1	package com.drew.metadata.iptc;
2
3	import com.drew.lang.annotations.NotNull;
4	import com.drew.lang.annotations.Nullable;
5
6	import java.nio.ByteBuffer;
7	import java.nio.charset.CharacterCodingException;
8	import java.nio.charset.Charset;
9	import java.nio.charset.CharsetDecoder;
10
11	public final class Iso2022Converter
12	{
13	private static final String ISO_8859_1 = "ISO-8859-1";
14	private static final String UTF_8 = "UTF-8";
15
16	private static final byte LATIN_CAPITAL_A = 0x41;
17	private static final int DOT = 0xe280a2;
18	private static final byte LATIN_CAPITAL_G = 0x47;
19	private static final byte PERCENT_SIGN = 0x25;
20	private static final byte ESC = 0x1B;
21
22	/**
23	* Converts the given ISO2022 char set to a Java charset name.
24	*
25	* @param bytes string data encoded using ISO2022
26	* @return the Java charset name as a string, or <code>null</code> if the conversion was not possible
27	*/
28	@Nullable
29	public static String convertISO2022CharsetToJavaCharset(@NotNull final byte[] bytes)
30	{
31	if (bytes.length > 2 && bytes[0] == ESC && bytes[1] == PERCENT_SIGN && bytes[2] == LATIN_CAPITAL_G)
32	return UTF_8;
33
34	if (bytes.length > 3 && bytes[0] == ESC && (bytes[3] & 0xFF \| ((bytes[2] & 0xFF) << 8) \| ((bytes[1] & 0xFF) << 16)) == DOT && bytes[4] == LATIN_CAPITAL_A)
35	return ISO_8859_1;
36
37	return null;
38	}
39
40	/**
41	* Attempts to guess the encoding of a string provided as a byte array.
42	* <p/>
43	* Encodings trialled are, in order:
44	* <ul>
45	* <li>UTF-8</li>
46	* <li><code>System.getProperty("file.encoding")</code></li>
47	* <li>ISO-8859-1</li>
48	* </ul>
49	* <p/>
50	* Its only purpose is to guess the encoding if and only if iptc tag coded character set is not set. If the
51	* encoding is not UTF-8, the tag should be set. Otherwise it is bad practice. This method tries to
52	* workaround this issue since some metadata manipulating tools do not prevent such bad practice.
53	* <p/>
54	* About the reliability of this method: The check if some bytes are UTF-8 or not has a very high reliability.
55	* The two other checks are less reliable.
56	*
57	* @param bytes some text as bytes
58	* @return the name of the encoding or null if none could be guessed
59	*/
60	@Nullable
61	static String guessEncoding(@NotNull final byte[] bytes)
62	{
63	String[] encodings = { UTF_8, System.getProperty("file.encoding"), ISO_8859_1 };
64
65	for (String encoding : encodings)
66	{
67	CharsetDecoder cs = Charset.forName(encoding).newDecoder();
68
69	try {
70	cs.decode(ByteBuffer.wrap(bytes));
71	return encoding;
72	} catch (CharacterCodingException e) {
73	// fall through...
74	}
75	}
76
77	// No encodings succeeded. Return null.
78	return null;
79	}
80
81	private Iso2022Converter()
82	{}
83	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: