Context Navigation

← Previous Revision
Latest Revision
Next Revision →
Normal
Revision Log

source: josm/trunk/src/com/drew/metadata/iptc/Iso2022Converter.java@ 10746

Last change on this file since 10746 was 8132, checked in by Don-vip, 10 years ago
fix #11162 - update to metadata-extractor 2.7.2
File size: 2.9 KB

Rev	Line
[8132]	1	package com.drew.metadata.iptc;
	2
	3	import com.drew.lang.annotations.NotNull;
	4	import com.drew.lang.annotations.Nullable;
	5
	6	import java.nio.ByteBuffer;
	7	import java.nio.charset.CharacterCodingException;
	8	import java.nio.charset.Charset;
	9	import java.nio.charset.CharsetDecoder;
	10
	11	public final class Iso2022Converter
	12	{
	13	private static final String ISO_8859_1 = "ISO-8859-1";
	14	private static final String UTF_8 = "UTF-8";
	15
	16	private static final byte LATIN_CAPITAL_A = 0x41;
	17	private static final int DOT = 0xe280a2;
	18	private static final byte LATIN_CAPITAL_G = 0x47;
	19	private static final byte PERCENT_SIGN = 0x25;
	20	private static final byte ESC = 0x1B;
	21
	22	/**
	23	* Converts the given ISO2022 char set to a Java charset name.
	24	*
	25	* @param bytes string data encoded using ISO2022
	26	* @return the Java charset name as a string, or <code>null</code> if the conversion was not possible
	27	*/
	28	@Nullable
	29	public static String convertISO2022CharsetToJavaCharset(@NotNull final byte[] bytes)
	30	{
	31	if (bytes.length > 2 && bytes[0] == ESC && bytes[1] == PERCENT_SIGN && bytes[2] == LATIN_CAPITAL_G)
	32	return UTF_8;
	33
	34	if (bytes.length > 3 && bytes[0] == ESC && (bytes[3] & 0xFF \| ((bytes[2] & 0xFF) << 8) \| ((bytes[1] & 0xFF) << 16)) == DOT && bytes[4] == LATIN_CAPITAL_A)
	35	return ISO_8859_1;
	36
	37	return null;
	38	}
	39
	40	/**
	41	* Attempts to guess the encoding of a string provided as a byte array.
	42	* <p/>
	43	* Encodings trialled are, in order:
	44	* <ul>
	45	* <li>UTF-8</li>
	46	* <li><code>System.getProperty("file.encoding")</code></li>
	47	* <li>ISO-8859-1</li>
	48	* </ul>
	49	* <p/>
	50	* Its only purpose is to guess the encoding if and only if iptc tag coded character set is not set. If the
	51	* encoding is not UTF-8, the tag should be set. Otherwise it is bad practice. This method tries to
	52	* workaround this issue since some metadata manipulating tools do not prevent such bad practice.
	53	* <p/>
	54	* About the reliability of this method: The check if some bytes are UTF-8 or not has a very high reliability.
	55	* The two other checks are less reliable.
	56	*
	57	* @param bytes some text as bytes
	58	* @return the name of the encoding or null if none could be guessed
	59	*/
	60	@Nullable
	61	static String guessEncoding(@NotNull final byte[] bytes)
	62	{
	63	String[] encodings = { UTF_8, System.getProperty("file.encoding"), ISO_8859_1 };
	64
	65	for (String encoding : encodings)
	66	{
	67	CharsetDecoder cs = Charset.forName(encoding).newDecoder();
	68
	69	try {
	70	cs.decode(ByteBuffer.wrap(bytes));
	71	return encoding;
	72	} catch (CharacterCodingException e) {
	73	// fall through...
	74	}
	75	}
	76
	77	// No encodings succeeded. Return null.
	78	return null;
	79	}
	80
	81	private Iso2022Converter()
	82	{}
	83	}

Note: See TracBrowser for help on using the repository browser.

Download in other formats: