001// License: GPL. For details, see LICENSE file. 002package org.openstreetmap.josm.io; 003 004import java.io.IOException; 005import java.io.InputStream; 006import java.io.InputStreamReader; 007import java.io.PushbackInputStream; 008import java.io.UnsupportedEncodingException; 009 010/** 011 * Detects the different UTF encodings from byte order mark 012 */ 013public final class UTFInputStreamReader extends InputStreamReader { 014 015 /** 016 * Creates a new {@link InputStreamReader} from the {@link InputStream} with UTF-8 as default encoding. 017 * @return A reader with the correct encoding. Starts to read after the BOM. 018 * @throws IOException if any I/O error occurs 019 * @see #create(java.io.InputStream, String) 020 */ 021 public static UTFInputStreamReader create(InputStream input) throws IOException { 022 return create(input, "UTF-8"); 023 } 024 025 /** 026 * Creates a new {@link InputStreamReader} from the {@link InputStream}. 027 * @param defaultEncoding Used, when no BOM was recognized. Can be null. 028 * @return A reader with the correct encoding. Starts to read after the BOM. 029 * @throws IOException if any I/O error occurs 030 */ 031 public static UTFInputStreamReader create(InputStream input, String defaultEncoding) throws IOException { 032 byte[] bom = new byte[4]; 033 String encoding = defaultEncoding; 034 int unread; 035 PushbackInputStream pushbackStream = new PushbackInputStream(input, 4); 036 int n = pushbackStream.read(bom, 0, 4); 037 038 if ((bom[0] == (byte) 0xEF) && (bom[1] == (byte) 0xBB) && (bom[2] == (byte) 0xBF)) { 039 encoding = "UTF-8"; 040 unread = n - 3; 041 } else if ((bom[0] == (byte) 0x00) && (bom[1] == (byte) 0x00) && (bom[2] == (byte) 0xFE) && (bom[3] == (byte) 0xFF)) { 042 encoding = "UTF-32BE"; 043 unread = n - 4; 044 } else if ((bom[0] == (byte) 0xFF) && (bom[1] == (byte) 0xFE) && (bom[2] == (byte) 0x00) && (bom[3] == (byte) 0x00)) { 045 encoding = "UTF-32LE"; 046 unread = n - 4; 047 } else if ((bom[0] == (byte) 0xFE) && (bom[1] == (byte) 0xFF)) { 048 encoding = "UTF-16BE"; 049 unread = n - 2; 050 } else if ((bom[0] == (byte) 0xFF) && (bom[1] == (byte) 0xFE)) { 051 encoding = "UTF-16LE"; 052 unread = n - 2; 053 } else { 054 unread = n; 055 } 056 057 if (unread > 0) { 058 pushbackStream.unread(bom, n - unread, unread); 059 } else if (unread < -1) { 060 pushbackStream.unread(bom, 0, 0); 061 } 062 063 if (encoding == null) { 064 return new UTFInputStreamReader(pushbackStream); 065 } else { 066 return new UTFInputStreamReader(pushbackStream, encoding); 067 } 068 } 069 070 private UTFInputStreamReader(InputStream in) { 071 super(in); 072 } 073 074 private UTFInputStreamReader(InputStream in, String cs) throws UnsupportedEncodingException { 075 super(in, cs); 076 } 077}