001// License: GPL. For details, see LICENSE file.
002package org.openstreetmap.josm.io;
003
004import java.io.IOException;
005import java.io.InputStream;
006import java.io.InputStreamReader;
007import java.io.PushbackInputStream;
008import java.io.UnsupportedEncodingException;
009
010/**
011 * Detects the different UTF encodings from byte order mark
012 */
013public final class UTFInputStreamReader extends InputStreamReader {
014
015    /**
016     * Creates a new {@link InputStreamReader} from the {@link InputStream} with UTF-8 as default encoding.
017     * @return A reader with the correct encoding. Starts to read after the BOM.
018     * @throws IOException if any I/O error occurs
019     * @see #create(java.io.InputStream, String)
020     */
021    public static UTFInputStreamReader create(InputStream input) throws IOException {
022        return create(input, "UTF-8");
023    }
024
025    /**
026     * Creates a new {@link InputStreamReader} from the {@link InputStream}.
027     * @param defaultEncoding Used, when no BOM was recognized. Can be null.
028     * @return A reader with the correct encoding. Starts to read after the BOM.
029     * @throws IOException if any I/O error occurs
030     */
031    public static UTFInputStreamReader create(InputStream input, String defaultEncoding) throws IOException {
032        byte[] bom = new byte[4];
033        String encoding = defaultEncoding;
034        int unread;
035        PushbackInputStream pushbackStream = new PushbackInputStream(input, 4);
036        int n = pushbackStream.read(bom, 0, 4);
037
038        if ((bom[0] == (byte) 0xEF) && (bom[1] == (byte) 0xBB) && (bom[2] == (byte) 0xBF)) {
039            encoding = "UTF-8";
040            unread = n - 3;
041        } else if ((bom[0] == (byte) 0x00) && (bom[1] == (byte) 0x00) && (bom[2] == (byte) 0xFE) && (bom[3] == (byte) 0xFF)) {
042            encoding = "UTF-32BE";
043            unread = n - 4;
044        } else if ((bom[0] == (byte) 0xFF) && (bom[1] == (byte) 0xFE) && (bom[2] == (byte) 0x00) && (bom[3] == (byte) 0x00)) {
045            encoding = "UTF-32LE";
046            unread = n - 4;
047        } else if ((bom[0] == (byte) 0xFE) && (bom[1] == (byte) 0xFF)) {
048            encoding = "UTF-16BE";
049            unread = n - 2;
050        } else if ((bom[0] == (byte) 0xFF) && (bom[1] == (byte) 0xFE)) {
051            encoding = "UTF-16LE";
052            unread = n - 2;
053        } else {
054            unread = n;
055        }
056
057        if (unread > 0) {
058            pushbackStream.unread(bom, n - unread, unread);
059        } else if (unread < -1) {
060            pushbackStream.unread(bom, 0, 0);
061        }
062
063        if (encoding == null) {
064            return new UTFInputStreamReader(pushbackStream);
065        } else {
066            return new UTFInputStreamReader(pushbackStream, encoding);
067        }
068    }
069
070    private UTFInputStreamReader(InputStream in) {
071        super(in);
072    }
073
074    private UTFInputStreamReader(InputStream in, String cs) throws UnsupportedEncodingException {
075        super(in, cs);
076    }
077}