001// License: GPL. See LICENSE file for details. 002package org.openstreetmap.josm.io; 003 004import java.io.IOException; 005import java.io.InputStream; 006import java.io.InputStreamReader; 007import java.io.PushbackInputStream; 008import java.io.UnsupportedEncodingException; 009 010/** 011 * Detects the different UTF encodings from byte order mark 012 */ 013public final class UTFInputStreamReader extends InputStreamReader { 014 015 /** 016 * Creates a new {@link InputStreamReader} from the {@link InputStream} with UTF-8 as default encoding. 017 * @return A reader with the correct encoding. Starts to read after the BOM. 018 * @see #create(java.io.InputStream, String) 019 */ 020 public static UTFInputStreamReader create(InputStream input) throws IOException { 021 return create(input, "UTF-8"); 022 } 023 024 /** 025 * Creates a new {@link InputStreamReader} from the {@link InputStream}. 026 * @param defaultEncoding Used, when no BOM was recognized. Can be null. 027 * @return A reader with the correct encoding. Starts to read after the BOM. 028 */ 029 public static UTFInputStreamReader create(InputStream input, String defaultEncoding) throws IOException { 030 byte[] bom = new byte[4]; 031 String encoding = defaultEncoding; 032 int unread; 033 PushbackInputStream pushbackStream = new PushbackInputStream(input, 4); 034 int n = pushbackStream.read(bom, 0, 4); 035 036 if ((bom[0] == (byte) 0xEF) && (bom[1] == (byte) 0xBB) && (bom[2] == (byte) 0xBF)) { 037 encoding = "UTF-8"; 038 unread = n - 3; 039 } else if ((bom[0] == (byte) 0x00) && (bom[1] == (byte) 0x00) && (bom[2] == (byte) 0xFE) && (bom[3] == (byte) 0xFF)) { 040 encoding = "UTF-32BE"; 041 unread = n - 4; 042 } else if ((bom[0] == (byte) 0xFF) && (bom[1] == (byte) 0xFE) && (bom[2] == (byte) 0x00) && (bom[3] == (byte) 0x00)) { 043 encoding = "UTF-32LE"; 044 unread = n - 4; 045 } else if ((bom[0] == (byte) 0xFE) && (bom[1] == (byte) 0xFF)) { 046 encoding = "UTF-16BE"; 047 unread = n - 2; 048 } else if ((bom[0] == (byte) 0xFF) && (bom[1] == (byte) 0xFE)) { 049 encoding = "UTF-16LE"; 050 unread = n - 2; 051 } else { 052 unread = n; 053 } 054 055 if (unread > 0) { 056 pushbackStream.unread(bom, (n - unread), unread); 057 } else if (unread < -1) { 058 pushbackStream.unread(bom, 0, 0); 059 } 060 061 if (encoding == null) { 062 return new UTFInputStreamReader(pushbackStream); 063 } else { 064 return new UTFInputStreamReader(pushbackStream, encoding); 065 } 066 } 067 068 private UTFInputStreamReader(InputStream in) { 069 super(in); 070 } 071 private UTFInputStreamReader(InputStream in, String cs) throws UnsupportedEncodingException { 072 super(in, cs); 073 } 074}