001// License: GPL. For details, see LICENSE file.
002package org.openstreetmap.josm.io;
003
004import java.io.IOException;
005import java.io.InputStream;
006import java.io.InputStreamReader;
007import java.io.PushbackInputStream;
008import java.io.UnsupportedEncodingException;
009import java.util.Optional;
010
011/**
012 * Detects the different UTF encodings from byte order mark.
013 * @since 3372
014 */
015public final class UTFInputStreamReader extends InputStreamReader {
016
017    private UTFInputStreamReader(InputStream in, String cs) throws UnsupportedEncodingException {
018        super(in, cs);
019    }
020
021    /**
022     * Creates a new {@link InputStreamReader} from the {@link InputStream} with UTF-8 as default encoding.
023     * @param input input stream
024     * @return A reader with the correct encoding. Starts to read after the BOM.
025     * @throws IOException if any I/O error occurs
026     * @see #create(java.io.InputStream, String)
027     */
028    public static UTFInputStreamReader create(InputStream input) throws IOException {
029        return create(input, "UTF-8");
030    }
031
032    /**
033     * Creates a new {@link InputStreamReader} from the {@link InputStream}.
034     * @param input input stream
035     * @param defaultEncoding Used, when no BOM was recognized. Can be null.
036     * @return A reader with the correct encoding. Starts to read after the BOM.
037     * @throws IOException if any I/O error occurs
038     */
039    public static UTFInputStreamReader create(InputStream input, String defaultEncoding) throws IOException {
040        byte[] bom = new byte[4];
041        String encoding = defaultEncoding;
042        int unread;
043        PushbackInputStream pushbackStream = new PushbackInputStream(input, 4);
044        int n = pushbackStream.read(bom, 0, 4);
045
046        if ((bom[0] == (byte) 0xEF) && (bom[1] == (byte) 0xBB) && (bom[2] == (byte) 0xBF)) {
047            encoding = "UTF-8";
048            unread = n - 3;
049        } else if ((bom[0] == (byte) 0x00) && (bom[1] == (byte) 0x00) && (bom[2] == (byte) 0xFE) && (bom[3] == (byte) 0xFF)) {
050            encoding = "UTF-32BE";
051            unread = n - 4;
052        } else if ((bom[0] == (byte) 0xFF) && (bom[1] == (byte) 0xFE) && (bom[2] == (byte) 0x00) && (bom[3] == (byte) 0x00)) {
053            encoding = "UTF-32LE";
054            unread = n - 4;
055        } else if ((bom[0] == (byte) 0xFE) && (bom[1] == (byte) 0xFF)) {
056            encoding = "UTF-16BE";
057            unread = n - 2;
058        } else if ((bom[0] == (byte) 0xFF) && (bom[1] == (byte) 0xFE)) {
059            encoding = "UTF-16LE";
060            unread = n - 2;
061        } else {
062            unread = n;
063        }
064
065        if (unread > 0) {
066            pushbackStream.unread(bom, n - unread, unread);
067        } else if (unread < -1) {
068            pushbackStream.unread(bom, 0, 0);
069        }
070        return new UTFInputStreamReader(pushbackStream, Optional.ofNullable(encoding).orElse("UTF-8"));
071    }
072}