001// License: GPL. See LICENSE file for details.
002package org.openstreetmap.josm.io;
003
004import java.io.IOException;
005import java.io.InputStream;
006import java.io.InputStreamReader;
007import java.io.PushbackInputStream;
008import java.io.UnsupportedEncodingException;
009
010/**
011 * Detects the different UTF encodings from byte order mark
012 */
013public final class UTFInputStreamReader extends InputStreamReader {
014    
015    /**
016     * Creates a new {@link InputStreamReader} from the {@link InputStream} with UTF-8 as default encoding.
017     * @return A reader with the correct encoding. Starts to read after the BOM.
018     * @see #create(java.io.InputStream, String)
019     */
020    public static UTFInputStreamReader create(InputStream input) throws IOException {
021        return create(input, "UTF-8");
022    }
023
024    /**
025     * Creates a new {@link InputStreamReader} from the {@link InputStream}.
026     * @param defaultEncoding Used, when no BOM was recognized. Can be null.
027     * @return A reader with the correct encoding. Starts to read after the BOM.
028     */
029    public static UTFInputStreamReader create(InputStream input, String defaultEncoding) throws IOException {
030        byte[] bom = new byte[4];
031        String encoding = defaultEncoding;
032        int unread;
033        PushbackInputStream pushbackStream = new PushbackInputStream(input, 4);
034        int n = pushbackStream.read(bom, 0, 4);
035
036        if ((bom[0] == (byte) 0xEF) && (bom[1] == (byte) 0xBB) && (bom[2] == (byte) 0xBF)) {
037            encoding = "UTF-8";
038            unread = n - 3;
039        } else if ((bom[0] == (byte) 0x00) && (bom[1] == (byte) 0x00) && (bom[2] == (byte) 0xFE) && (bom[3] == (byte) 0xFF)) {
040            encoding = "UTF-32BE";
041            unread = n - 4;
042        } else if ((bom[0] == (byte) 0xFF) && (bom[1] == (byte) 0xFE) && (bom[2] == (byte) 0x00) && (bom[3] == (byte) 0x00)) {
043            encoding = "UTF-32LE";
044            unread = n - 4;
045        } else if ((bom[0] == (byte) 0xFE) && (bom[1] == (byte) 0xFF)) {
046            encoding = "UTF-16BE";
047            unread = n - 2;
048        } else if ((bom[0] == (byte) 0xFF) && (bom[1] == (byte) 0xFE)) {
049            encoding = "UTF-16LE";
050            unread = n - 2;
051        } else {
052            unread = n;
053        }
054
055        if (unread > 0) {
056            pushbackStream.unread(bom, (n - unread), unread);
057        } else if (unread < -1) {
058            pushbackStream.unread(bom, 0, 0);
059        }
060
061        if (encoding == null) {
062            return new UTFInputStreamReader(pushbackStream);
063        } else {
064            return new UTFInputStreamReader(pushbackStream, encoding);
065        }
066    }
067
068    private UTFInputStreamReader(InputStream in) {
069        super(in);
070    }
071    private UTFInputStreamReader(InputStream in, String cs) throws UnsupportedEncodingException {
072        super(in, cs);
073    }
074}