001// License: GPL. For details, see LICENSE file.
002package org.openstreetmap.josm.tools;
003
004import java.io.IOException;
005import java.io.InputStream;
006import java.net.URL;
007import java.util.List;
008import java.util.Optional;
009import java.util.stream.Collectors;
010
011import javax.xml.parsers.ParserConfigurationException;
012import javax.xml.xpath.XPath;
013import javax.xml.xpath.XPathConstants;
014import javax.xml.xpath.XPathExpressionException;
015import javax.xml.xpath.XPathFactory;
016
017import org.w3c.dom.Document;
018import org.w3c.dom.Node;
019import org.xml.sax.SAXException;
020
021/**
022 * Interaction with Mediawiki instances, such as the OSM wiki.
023 * @since 14641
024 */
025public class Mediawiki {
026
027    private final String baseUrl;
028
029    /**
030     * Constructs a new {@code Mediawiki} for the given base URL.
031     * @param baseUrl The wiki base URL
032     */
033    public Mediawiki(String baseUrl) {
034        this.baseUrl = baseUrl;
035    }
036
037    /**
038     * Determines which page exists on the Mediawiki instance.
039     * @param pages the pages to check
040     * @return the first existing page
041     * @throws IOException if any I/O error occurs
042     * @throws ParserConfigurationException if a parser cannot be created
043     * @throws SAXException if any XML error occurs
044     * @throws XPathExpressionException if any error in an XPath expression occurs
045     */
046    public Optional<String> findExistingPage(List<String> pages)
047            throws IOException, ParserConfigurationException, SAXException, XPathExpressionException {
048        List<String> distinctPages = pages.stream().distinct().collect(Collectors.toList());
049        // find a page that actually exists in the wiki
050        // API documentation: https://wiki.openstreetmap.org/w/api.php?action=help&modules=query
051        final URL url = new URL(baseUrl + "/w/api.php?action=query&format=xml&titles=" + distinctPages.stream()
052                .map(Utils::encodeUrl)
053                .collect(Collectors.joining(Utils.encodeUrl("|")))
054        );
055        final HttpClient.Response conn = HttpClient.create(url).connect();
056        final Document document;
057        try (InputStream content = conn.getContent()) {
058            document = XmlUtils.parseSafeDOM(content);
059        }
060        conn.disconnect();
061        final XPath xPath = XPathFactory.newInstance().newXPath();
062        for (String page : distinctPages) {
063            String normalized = xPath.evaluate("/api/query/normalized/n[@from='" + page + "']/@to", document);
064            if (normalized == null || normalized.isEmpty()) {
065                normalized = page;
066            }
067            final Node node = (Node) xPath.evaluate("/api/query/pages/page[@title='" + normalized + "']", document, XPathConstants.NODE);
068            if (node != null
069                    && node.getAttributes().getNamedItem("missing") == null
070                    && node.getAttributes().getNamedItem("invalid") == null) {
071                return Optional.of(page);
072            }
073        }
074        return Optional.empty();
075    }
076}