001// License: GPL. For details, see LICENSE file.
002package org.openstreetmap.josm.io;
003
004import static org.openstreetmap.josm.tools.I18n.tr;
005
006import java.io.BufferedReader;
007import java.io.Closeable;
008import java.io.File;
009import java.io.IOException;
010import java.io.InputStream;
011import java.math.BigInteger;
012import java.net.HttpURLConnection;
013import java.net.MalformedURLException;
014import java.net.URL;
015import java.nio.charset.StandardCharsets;
016import java.nio.file.Files;
017import java.nio.file.InvalidPathException;
018import java.nio.file.StandardCopyOption;
019import java.security.MessageDigest;
020import java.security.NoSuchAlgorithmException;
021import java.util.ArrayList;
022import java.util.Arrays;
023import java.util.Enumeration;
024import java.util.List;
025import java.util.Map;
026import java.util.Optional;
027import java.util.concurrent.ConcurrentHashMap;
028import java.util.concurrent.TimeUnit;
029import java.util.zip.ZipEntry;
030import java.util.zip.ZipFile;
031
032import org.openstreetmap.josm.data.Preferences;
033import org.openstreetmap.josm.spi.preferences.Config;
034import org.openstreetmap.josm.tools.HttpClient;
035import org.openstreetmap.josm.tools.Logging;
036import org.openstreetmap.josm.tools.Pair;
037import org.openstreetmap.josm.tools.PlatformManager;
038import org.openstreetmap.josm.tools.ResourceProvider;
039import org.openstreetmap.josm.tools.Utils;
040
041/**
042 * Downloads a file and caches it on disk in order to reduce network load.
043 *
044 * Supports URLs, local files, and a custom scheme (<code>resource:</code>) to get
045 * resources from the current JOSM *.jar file as well as plugins *.jar files.
046 * (Local caching is only done for URLs.)
047 * <p>
048 * The mirrored file is only downloaded if it has been more than 7 days since
049 * last download. (Time can be configured.)
050 * <p>
051 * The file content is normally accessed with {@link #getInputStream()}, but
052 * you can also get the mirrored copy with {@link #getFile()}.
053 */
054public class CachedFile implements Closeable {
055
056    /**
057     * Caching strategy.
058     */
059    public enum CachingStrategy {
060        /**
061         * If cached file on disk is older than a certain time (7 days by default),
062         * consider the cache stale and try to download the file again.
063         */
064        MaxAge,
065        /**
066         * Similar to MaxAge, considers the cache stale when a certain age is
067         * exceeded. In addition, a If-Modified-Since HTTP header is added.
068         * When the server replies "304 Not Modified", this is considered the same
069         * as a full download.
070         */
071        IfModifiedSince
072    }
073
074    protected String name;
075    protected long maxAge;
076    protected String destDir;
077    protected String httpAccept;
078    protected CachingStrategy cachingStrategy;
079
080    private boolean fastFail;
081    private HttpClient activeConnection;
082    protected File cacheFile;
083    protected boolean initialized;
084    protected String parameter;
085
086    public static final long DEFAULT_MAXTIME = -1L;
087    public static final long DAYS = TimeUnit.DAYS.toSeconds(1); // factor to get caching time in days
088
089    private final Map<String, String> httpHeaders = new ConcurrentHashMap<>();
090
091    /**
092     * Constructs a CachedFile object from a given filename, URL or internal resource.
093     *
094     * @param name can be:<ul>
095     *  <li>relative or absolute file name</li>
096     *  <li>{@code file:///SOME/FILE} the same as above</li>
097     *  <li>{@code http://...} a URL. It will be cached on disk.</li>
098     *  <li>{@code resource://SOME/FILE} file from the classpath (usually in the current *.jar)</li>
099     *  <li>{@code josmdir://SOME/FILE} file inside josm user data directory (since r7058)</li>
100     *  <li>{@code josmplugindir://SOME/FILE} file inside josm plugin directory (since r7834)</li></ul>
101     */
102    public CachedFile(String name) {
103        this.name = name;
104    }
105
106    /**
107     * Set the name of the resource.
108     * @param name can be:<ul>
109     *  <li>relative or absolute file name</li>
110     *  <li>{@code file:///SOME/FILE} the same as above</li>
111     *  <li>{@code http://...} a URL. It will be cached on disk.</li>
112     *  <li>{@code resource://SOME/FILE} file from the classpath (usually in the current *.jar)</li>
113     *  <li>{@code josmdir://SOME/FILE} file inside josm user data directory (since r7058)</li>
114     *  <li>{@code josmplugindir://SOME/FILE} file inside josm plugin directory (since r7834)</li></ul>
115     * @return this object
116     */
117    public CachedFile setName(String name) {
118        this.name = name;
119        return this;
120    }
121
122    /**
123     * Set maximum age of cache file. Only applies to URLs.
124     * When this time has passed after the last download of the file, the
125     * cache is considered stale and a new download will be attempted.
126     * @param maxAge the maximum cache age in seconds
127     * @return this object
128     */
129    public CachedFile setMaxAge(long maxAge) {
130        this.maxAge = maxAge;
131        return this;
132    }
133
134    /**
135     * Set the destination directory for the cache file. Only applies to URLs.
136     * @param destDir the destination directory
137     * @return this object
138     */
139    public CachedFile setDestDir(String destDir) {
140        this.destDir = destDir;
141        return this;
142    }
143
144    /**
145     * Set the accepted MIME types sent in the HTTP Accept header. Only applies to URLs.
146     * @param httpAccept the accepted MIME types
147     * @return this object
148     */
149    public CachedFile setHttpAccept(String httpAccept) {
150        this.httpAccept = httpAccept;
151        return this;
152    }
153
154    /**
155     * Set the caching strategy. Only applies to URLs.
156     * @param cachingStrategy caching strategy
157     * @return this object
158     */
159    public CachedFile setCachingStrategy(CachingStrategy cachingStrategy) {
160        this.cachingStrategy = cachingStrategy;
161        return this;
162    }
163
164    /**
165     * Sets the http headers. Only applies to URL pointing to http or https resources
166     * @param headers that should be sent together with request
167     * @return this object
168     */
169    public CachedFile setHttpHeaders(Map<String, String> headers) {
170        this.httpHeaders.putAll(headers);
171        return this;
172    }
173
174    /**
175     * Sets whether opening HTTP connections should fail fast, i.e., whether a
176     * {@link HttpClient#setConnectTimeout(int) low connect timeout} should be used.
177     * @param fastFail whether opening HTTP connections should fail fast
178     */
179    public void setFastFail(boolean fastFail) {
180        this.fastFail = fastFail;
181    }
182
183    /**
184     * Sets additional URL parameter (used e.g. for maps)
185     * @param parameter the URL parameter
186     * @since 13536
187     */
188    public void setParam(String parameter) {
189        this.parameter = parameter;
190    }
191
192    public String getName() {
193        if (parameter != null)
194            return name.replaceAll("%<(.*)>", "");
195        return name;
196    }
197
198    /**
199     * Returns maximum age of cache file. Only applies to URLs.
200     * When this time has passed after the last download of the file, the
201     * cache is considered stale and a new download will be attempted.
202     * @return the maximum cache age in seconds
203     */
204    public long getMaxAge() {
205        return maxAge;
206    }
207
208    public String getDestDir() {
209        return destDir;
210    }
211
212    public String getHttpAccept() {
213        return httpAccept;
214    }
215
216    public CachingStrategy getCachingStrategy() {
217        return cachingStrategy;
218    }
219
220    /**
221     * Get InputStream to the requested resource.
222     * @return the InputStream
223     * @throws IOException when the resource with the given name could not be retrieved
224     * @throws InvalidPathException if a Path object cannot be constructed from the inner file path
225     */
226    public InputStream getInputStream() throws IOException {
227        File file = getFile();
228        if (file == null) {
229            if (name != null && name.startsWith("resource://")) {
230                return Optional.ofNullable(ResourceProvider.getResourceAsStream(name.substring("resource:/".length())))
231                        .orElseThrow(() -> new IOException(tr("Failed to open input stream for resource ''{0}''", name)));
232            } else {
233                throw new IOException("No file found for: "+name);
234            }
235        }
236        return Files.newInputStream(file.toPath());
237    }
238
239    /**
240     * Get the full content of the requested resource as a byte array.
241     * @return the full content of the requested resource as byte array
242     * @throws IOException in case of an I/O error
243     */
244    public byte[] getByteContent() throws IOException {
245        return Utils.readBytesFromStream(getInputStream());
246    }
247
248    /**
249     * Returns {@link #getInputStream()} wrapped in a buffered reader.
250     * <p>
251     * Detects Unicode charset in use utilizing {@link UTFInputStreamReader}.
252     *
253     * @return buffered reader
254     * @throws IOException if any I/O error occurs
255     * @since 9411
256     */
257    public BufferedReader getContentReader() throws IOException {
258        return new BufferedReader(UTFInputStreamReader.create(getInputStream()));
259    }
260
261    /**
262     * Get local file for the requested resource.
263     * @return The local cache file for URLs. If the resource is a local file,
264     * returns just that file.
265     * @throws IOException when the resource with the given name could not be retrieved
266     */
267    public synchronized File getFile() throws IOException {
268        if (initialized)
269            return cacheFile;
270        initialized = true;
271        if (name == null || name.startsWith("resource://")) {
272            return null;
273        }
274        URL url;
275        try {
276            url = new URL(name);
277            if ("file".equals(url.getProtocol())) {
278                cacheFile = new File(name.substring("file:/".length() - 1));
279                if (!cacheFile.exists()) {
280                    cacheFile = new File(name.substring("file://".length() - 1));
281                }
282            } else {
283                try {
284                    cacheFile = checkLocal(url);
285                } catch (SecurityException e) {
286                    throw new IOException(e);
287                }
288            }
289        } catch (MalformedURLException e) {
290            if (name.startsWith("josmdir://")) {
291                cacheFile = new File(Config.getDirs().getUserDataDirectory(false), name.substring("josmdir://".length()));
292            } else if (name.startsWith("josmplugindir://")) {
293                cacheFile = new File(Preferences.main().getPluginsDirectory(), name.substring("josmplugindir://".length()));
294            } else {
295                cacheFile = new File(name);
296            }
297        }
298        if (cacheFile == null)
299            throw new IOException("Unable to get cache file for "+getName());
300        return cacheFile;
301    }
302
303    /**
304     * Looks for a certain entry inside a zip file and returns the entry path.
305     *
306     * Replies a file in the top level directory of the ZIP file which has an
307     * extension <code>extension</code>. If more than one files have this
308     * extension, the last file whose name includes <code>namepart</code>
309     * is opened.
310     *
311     * @param extension  the extension of the file we're looking for
312     * @param namepart the name part
313     * @return The zip entry path of the matching file. <code>null</code> if this cached file
314     * doesn't represent a zip file or if there was no matching
315     * file in the ZIP file.
316     */
317    public String findZipEntryPath(String extension, String namepart) {
318        Pair<String, InputStream> ze = findZipEntryImpl(extension, namepart);
319        if (ze == null) return null;
320        return ze.a;
321    }
322
323    /**
324     * Like {@link #findZipEntryPath}, but returns the corresponding InputStream.
325     * @param extension  the extension of the file we're looking for
326     * @param namepart the name part
327     * @return InputStream to the matching file. <code>null</code> if this cached file
328     * doesn't represent a zip file or if there was no matching
329     * file in the ZIP file.
330     * @since 6148
331     */
332    public InputStream findZipEntryInputStream(String extension, String namepart) {
333        Pair<String, InputStream> ze = findZipEntryImpl(extension, namepart);
334        if (ze == null) return null;
335        return ze.b;
336    }
337
338    private Pair<String, InputStream> findZipEntryImpl(String extension, String namepart) {
339        File file = null;
340        try {
341            file = getFile();
342        } catch (IOException ex) {
343            Logging.log(Logging.LEVEL_WARN, ex);
344        }
345        if (file == null)
346            return null;
347        Pair<String, InputStream> res = null;
348        try {
349            ZipFile zipFile = new ZipFile(file, StandardCharsets.UTF_8); // NOPMD
350            ZipEntry resentry = null;
351            Enumeration<? extends ZipEntry> entries = zipFile.entries();
352            while (entries.hasMoreElements()) {
353                ZipEntry entry = entries.nextElement();
354                // choose any file with correct extension. When more than one file, prefer the one which matches namepart
355                if (entry.getName().endsWith('.' + extension) && (resentry == null || entry.getName().indexOf(namepart) >= 0)) {
356                    resentry = entry;
357                }
358            }
359            if (resentry != null) {
360                InputStream is = zipFile.getInputStream(resentry); // NOPMD
361                res = Pair.create(resentry.getName(), is);
362            } else {
363                Utils.close(zipFile);
364            }
365        } catch (IOException e) {
366            if (file.getName().endsWith(".zip")) {
367                Logging.log(Logging.LEVEL_WARN,
368                        tr("Failed to open file with extension ''{2}'' and namepart ''{3}'' in zip file ''{0}''. Exception was: {1}",
369                        file.getName(), e.toString(), extension, namepart), e);
370            }
371        }
372        return res;
373    }
374
375    /**
376     * Clear the cache for the given resource.
377     * This forces a fresh download.
378     * @param name the URL
379     */
380    public static void cleanup(String name) {
381        cleanup(name, null);
382    }
383
384    /**
385     * Clear the cache for the given resource.
386     * This forces a fresh download.
387     * @param name the URL
388     * @param destDir the destination directory (see {@link #setDestDir(java.lang.String)})
389     */
390    public static void cleanup(String name, String destDir) {
391        URL url;
392        try {
393            url = new URL(name);
394            if (!"file".equals(url.getProtocol())) {
395                String prefKey = getPrefKey(url, destDir);
396                List<String> localPath = new ArrayList<>(Config.getPref().getList(prefKey));
397                if (localPath.size() == 2) {
398                    File lfile = new File(localPath.get(1));
399                    if (lfile.exists()) {
400                        Utils.deleteFile(lfile);
401                    }
402                }
403                Config.getPref().putList(prefKey, null);
404            }
405        } catch (MalformedURLException e) {
406            Logging.warn(e);
407        }
408    }
409
410    /**
411     * Get preference key to store the location and age of the cached file.
412     * 2 resources that point to the same url, but that are to be stored in different
413     * directories will not share a cache file.
414     * @param url URL
415     * @param destDir destination directory
416     * @return Preference key
417     */
418    private static String getPrefKey(URL url, String destDir) {
419        StringBuilder prefKey = new StringBuilder("mirror.");
420        if (destDir != null) {
421            prefKey.append(destDir).append('.');
422        }
423        prefKey.append(url.toString().replaceAll("%<(.*)>", ""));
424        return prefKey.toString().replace("=", "_");
425    }
426
427    private File checkLocal(URL url) throws IOException {
428        String prefKey = getPrefKey(url, destDir);
429        String urlStr = url.toExternalForm();
430        if (parameter != null)
431            urlStr = urlStr.replaceAll("%<(.*)>", "");
432        long age = 0L;
433        long maxAgeMillis = TimeUnit.SECONDS.toMillis(maxAge);
434        Long ifModifiedSince = null;
435        File localFile = null;
436        List<String> localPathEntry = new ArrayList<>(Config.getPref().getList(prefKey));
437        boolean offline = false;
438        try {
439            checkOfflineAccess(urlStr);
440        } catch (OfflineAccessException e) {
441            Logging.trace(e);
442            offline = true;
443        }
444        if (localPathEntry.size() == 2) {
445            localFile = new File(localPathEntry.get(1));
446            if (!localFile.exists()) {
447                localFile = null;
448            } else {
449                if (maxAge == DEFAULT_MAXTIME
450                        || maxAge <= 0 // arbitrary value <= 0 is deprecated
451                ) {
452                    maxAgeMillis = TimeUnit.SECONDS.toMillis(Config.getPref().getLong("mirror.maxtime", TimeUnit.DAYS.toSeconds(7)));
453                }
454                age = System.currentTimeMillis() - Long.parseLong(localPathEntry.get(0));
455                if (offline || age < maxAgeMillis) {
456                    return localFile;
457                }
458                if (cachingStrategy == CachingStrategy.IfModifiedSince) {
459                    ifModifiedSince = Long.valueOf(localPathEntry.get(0));
460                }
461            }
462        }
463        if (destDir == null) {
464            destDir = Config.getDirs().getCacheDirectory(true).getPath();
465        }
466
467        File destDirFile = new File(destDir);
468        if (!destDirFile.exists()) {
469            Utils.mkDirs(destDirFile);
470        }
471
472        // No local file + offline => nothing to do
473        if (offline) {
474            return null;
475        }
476
477        if (parameter != null) {
478            String u = url.toExternalForm();
479            String uc;
480            if (parameter.isEmpty()) {
481                uc = u.replaceAll("%<(.*)>", "");
482            } else {
483                uc = u.replaceAll("%<(.*)>", "$1" + Utils.encodeUrl(parameter));
484            }
485            if (!uc.equals(u))
486                url = new URL(uc);
487        }
488
489        String a = urlStr.replaceAll("[^A-Za-z0-9_.-]", "_");
490        String localPath = "mirror_" + a;
491        localPath = truncatePath(destDir, localPath);
492        destDirFile = new File(destDir, localPath + ".tmp");
493        try {
494            activeConnection = HttpClient.create(url)
495                    .setAccept(httpAccept)
496                    .setIfModifiedSince(ifModifiedSince == null ? 0L : ifModifiedSince)
497                    .setHeaders(httpHeaders);
498            if (fastFail) {
499                activeConnection.setReadTimeout(1000);
500            }
501            final HttpClient.Response con = activeConnection.connect();
502            if (ifModifiedSince != null && con.getResponseCode() == HttpURLConnection.HTTP_NOT_MODIFIED) {
503                Logging.debug("304 Not Modified ({0})", urlStr);
504                if (localFile == null)
505                    throw new AssertionError();
506                Config.getPref().putList(prefKey,
507                        Arrays.asList(Long.toString(System.currentTimeMillis()), localPathEntry.get(1)));
508                return localFile;
509            } else if (con.getResponseCode() == HttpURLConnection.HTTP_NOT_FOUND) {
510                throw new IOException(tr("The requested URL {0} was not found", urlStr));
511            }
512            try (InputStream is = con.getContent()) {
513                Files.copy(is, destDirFile.toPath(), StandardCopyOption.REPLACE_EXISTING);
514            }
515            activeConnection = null;
516            localFile = new File(destDir, localPath);
517            if (PlatformManager.getPlatform().rename(destDirFile, localFile)) {
518                Config.getPref().putList(prefKey,
519                        Arrays.asList(Long.toString(System.currentTimeMillis()), localFile.toString()));
520            } else {
521                Logging.warn(tr("Failed to rename file {0} to {1}.",
522                destDirFile.getPath(), localFile.getPath()));
523            }
524        } catch (IOException e) {
525            if (age >= maxAgeMillis && age < maxAgeMillis*2) {
526                Logging.warn(tr("Failed to load {0}, use cached file and retry next time: {1}", urlStr, e));
527                return localFile;
528            } else {
529                throw e;
530            }
531        }
532
533        return localFile;
534    }
535
536    private static void checkOfflineAccess(String urlString) {
537        OnlineResource.JOSM_WEBSITE.checkOfflineAccess(urlString, Config.getUrls().getJOSMWebsite());
538        OnlineResource.OSM_API.checkOfflineAccess(urlString, OsmApi.getOsmApi().getServerUrl());
539    }
540
541    private static String truncatePath(String directory, String fileName) {
542        if (directory.length() + fileName.length() > 255) {
543            // Windows doesn't support paths longer than 260, leave 5 chars as safe buffer, 4 will be used by ".tmp"
544            // TODO: what about filename size on other systems? 255?
545            if (directory.length() > 191 && PlatformManager.isPlatformWindows()) {
546                // digest length + name prefix == 64
547                // 255 - 64 = 191
548                // TODO: use this check only on Windows?
549                throw new IllegalArgumentException("Path " + directory + " too long to cached files");
550            }
551
552            MessageDigest md;
553            try {
554                md = MessageDigest.getInstance("SHA-256");
555                md.update(fileName.getBytes(StandardCharsets.UTF_8));
556                String digest = String.format("%064x", new BigInteger(1, md.digest()));
557                return fileName.substring(0, Math.min(fileName.length(), 32)) + digest.substring(0, 32);
558            } catch (NoSuchAlgorithmException e) {
559                Logging.error(e);
560                // TODO: what better can we do here?
561                throw new IllegalArgumentException("Missing digest algorithm SHA-256", e);
562            }
563        }
564        return fileName;
565    }
566
567    /**
568     * Attempts to disconnect an URL connection.
569     * @see HttpClient#disconnect()
570     * @since 9411
571     */
572    @Override
573    public void close() {
574        if (activeConnection != null) {
575            activeConnection.disconnect();
576        }
577    }
578
579    /**
580     * Clears the cached file
581     * @throws IOException if any I/O error occurs
582     * @since 10993
583     */
584    public void clear() throws IOException {
585        URL url;
586        try {
587            url = new URL(name);
588            if ("file".equals(url.getProtocol())) {
589                return; // this is local file - do not delete it
590            }
591        } catch (MalformedURLException e) {
592            return; // if it's not a URL, then it still might be a local file - better not to delete
593        }
594        File f = getFile();
595        if (f != null && f.exists()) {
596            Utils.deleteFile(f);
597        }
598    }
599}