001// License: GPL. For details, see LICENSE file.
002package org.openstreetmap.josm.io;
003
004import java.io.IOException;
005import java.io.Reader;
006
007import org.openstreetmap.josm.Main;
008
009/**
010 * FilterInputStream that gets rid of characters that are invalid in an XML 1.0
011 * document.
012 *
013 * Although these characters are forbidden, in the real wold they still appear
014 * in XML files. Java's SAX parser throws an exception, so we have to filter
015 * at a lower level.
016 *
017 * Only handles control characters (<0x20). Invalid characters are replaced
018 * by space (0x20).
019 */
020public class InvalidXmlCharacterFilter extends Reader {
021
022    private Reader reader;
023
024    private static boolean firstWarning = true;
025
026    private static final boolean[] INVALID_CHARS;
027
028    static {
029        INVALID_CHARS = new boolean[0x20];
030        for (int i = 0; i < INVALID_CHARS.length; ++i) {
031            INVALID_CHARS[i] = true;
032        }
033        INVALID_CHARS[0x9] = false; // tab
034        INVALID_CHARS[0xA] = false; // LF
035        INVALID_CHARS[0xD] = false; // CR
036    }
037
038    /**
039     * Constructs a new {@code InvalidXmlCharacterFilter} for the given Reader.
040     * @param reader The reader to filter
041     */
042    public InvalidXmlCharacterFilter(Reader reader) {
043        this.reader = reader;
044    }
045
046    @Override
047    public int read(char[] b, int off, int len) throws IOException {
048        int n = reader.read(b, off, len);
049        if (n == -1) {
050            return -1;
051        }
052        for (int i = off; i < off + n; ++i) {
053            b[i] = filter(b[i]);
054        }
055        return n;
056    }
057
058    @Override
059    public void close() throws IOException {
060        reader.close();
061    }
062
063    private static char filter(char in) {
064        if (in < 0x20 && INVALID_CHARS[in]) {
065            if (firstWarning) {
066                Main.warn("Invalid xml character encountered: '"+in+"'.");
067                firstWarning = false;
068            }
069            return 0x20;
070        }
071        return in;
072    }
073}