001 // License: GPL. See LICENSE file for details.
002 package org.openstreetmap.josm.io;
003
004 import java.io.InputStream;
005 import java.io.InputStreamReader;
006 import java.io.IOException;
007 import java.io.PushbackInputStream;
008 import java.io.UnsupportedEncodingException;
009
010 /**
011 * Detects the different UTF encodings from byte order mark
012 */
013 public class UTFInputStreamReader extends InputStreamReader {
014 /**
015 * converts input stream to reader
016 * @param defaultEncoding Used, when no BOM was recognized. Can be null.
017 * @return A reader with the correct encoding. Starts to read after the BOM.
018 */
019 public static UTFInputStreamReader create(InputStream input, String defaultEncoding) throws IOException {
020 byte bom[] = new byte[4];
021 String encoding = defaultEncoding;
022 int unread;
023 PushbackInputStream pushbackStream = new PushbackInputStream(input, 4);
024 int n = pushbackStream.read(bom, 0, 4);
025
026 if ((bom[0] == (byte) 0xEF) && (bom[1] == (byte) 0xBB) && (bom[2] == (byte) 0xBF)) {
027 encoding = "UTF-8";
028 unread = n - 3;
029 } else if ((bom[0] == (byte) 0x00) && (bom[1] == (byte) 0x00) && (bom[2] == (byte) 0xFE) && (bom[3] == (byte) 0xFF)) {
030 encoding = "UTF-32BE";
031 unread = n - 4;
032 } else if ((bom[0] == (byte) 0xFF) && (bom[1] == (byte) 0xFE) && (bom[2] == (byte) 0x00) && (bom[3] == (byte) 0x00)) {
033 encoding = "UTF-32LE";
034 unread = n - 4;
035 } else if ((bom[0] == (byte) 0xFE) && (bom[1] == (byte) 0xFF)) {
036 encoding = "UTF-16BE";
037 unread = n - 2;
038 } else if ((bom[0] == (byte) 0xFF) && (bom[1] == (byte) 0xFE)) {
039 encoding = "UTF-16LE";
040 unread = n - 2;
041 } else {
042 unread = n;
043 }
044
045 if (unread > 0) {
046 pushbackStream.unread(bom, (n - unread), unread);
047 } else if (unread < -1) {
048 pushbackStream.unread(bom, 0, 0);
049 }
050
051 if (encoding == null) {
052 return new UTFInputStreamReader(pushbackStream);
053 } else {
054 return new UTFInputStreamReader(pushbackStream, encoding);
055 }
056 }
057
058 private UTFInputStreamReader(InputStream in) {
059 super(in);
060 }
061 private UTFInputStreamReader(InputStream in, String cs) throws UnsupportedEncodingException {
062 super(in, cs);
063 }
064 }