001// License: GPL. For details, see LICENSE file.
002package org.openstreetmap.josm.io;
003
004import java.io.IOException;
005import java.io.InputStream;
006import java.io.InputStreamReader;
007import java.io.PushbackInputStream;
008import java.io.UnsupportedEncodingException;
009
010/**
011 * Detects the different UTF encodings from byte order mark
012 */
013public final class UTFInputStreamReader extends InputStreamReader {
014
015    /**
016     * Creates a new {@link InputStreamReader} from the {@link InputStream} with UTF-8 as default encoding.
017     * @param input input stream
018     * @return A reader with the correct encoding. Starts to read after the BOM.
019     * @throws IOException if any I/O error occurs
020     * @see #create(java.io.InputStream, String)
021     */
022    public static UTFInputStreamReader create(InputStream input) throws IOException {
023        return create(input, "UTF-8");
024    }
025
026    /**
027     * Creates a new {@link InputStreamReader} from the {@link InputStream}.
028     * @param input input stream
029     * @param defaultEncoding Used, when no BOM was recognized. Can be null.
030     * @return A reader with the correct encoding. Starts to read after the BOM.
031     * @throws IOException if any I/O error occurs
032     */
033    public static UTFInputStreamReader create(InputStream input, String defaultEncoding) throws IOException {
034        byte[] bom = new byte[4];
035        String encoding = defaultEncoding;
036        int unread;
037        PushbackInputStream pushbackStream = new PushbackInputStream(input, 4);
038        int n = pushbackStream.read(bom, 0, 4);
039
040        if ((bom[0] == (byte) 0xEF) && (bom[1] == (byte) 0xBB) && (bom[2] == (byte) 0xBF)) {
041            encoding = "UTF-8";
042            unread = n - 3;
043        } else if ((bom[0] == (byte) 0x00) && (bom[1] == (byte) 0x00) && (bom[2] == (byte) 0xFE) && (bom[3] == (byte) 0xFF)) {
044            encoding = "UTF-32BE";
045            unread = n - 4;
046        } else if ((bom[0] == (byte) 0xFF) && (bom[1] == (byte) 0xFE) && (bom[2] == (byte) 0x00) && (bom[3] == (byte) 0x00)) {
047            encoding = "UTF-32LE";
048            unread = n - 4;
049        } else if ((bom[0] == (byte) 0xFE) && (bom[1] == (byte) 0xFF)) {
050            encoding = "UTF-16BE";
051            unread = n - 2;
052        } else if ((bom[0] == (byte) 0xFF) && (bom[1] == (byte) 0xFE)) {
053            encoding = "UTF-16LE";
054            unread = n - 2;
055        } else {
056            unread = n;
057        }
058
059        if (unread > 0) {
060            pushbackStream.unread(bom, n - unread, unread);
061        } else if (unread < -1) {
062            pushbackStream.unread(bom, 0, 0);
063        }
064
065        if (encoding == null) {
066            return new UTFInputStreamReader(pushbackStream);
067        } else {
068            return new UTFInputStreamReader(pushbackStream, encoding);
069        }
070    }
071
072    private UTFInputStreamReader(InputStream in) {
073        super(in);
074    }
075
076    private UTFInputStreamReader(InputStream in, String cs) throws UnsupportedEncodingException {
077        super(in, cs);
078    }
079}