001// License: GPL. For details, see LICENSE file.
002package org.openstreetmap.josm.io;
003
004import static org.openstreetmap.josm.tools.I18n.tr;
005
006import java.io.BufferedInputStream;
007import java.io.BufferedOutputStream;
008import java.io.File;
009import java.io.FileInputStream;
010import java.io.FileOutputStream;
011import java.io.IOException;
012import java.io.InputStream;
013import java.io.OutputStream;
014import java.net.HttpURLConnection;
015import java.net.MalformedURLException;
016import java.net.URL;
017import java.nio.charset.StandardCharsets;
018import java.util.ArrayList;
019import java.util.Arrays;
020import java.util.Enumeration;
021import java.util.List;
022import java.util.zip.ZipEntry;
023import java.util.zip.ZipFile;
024
025import org.openstreetmap.josm.Main;
026import org.openstreetmap.josm.tools.Pair;
027import org.openstreetmap.josm.tools.Utils;
028
029/**
030 * Downloads a file and caches it on disk in order to reduce network load.
031 * 
032 * Supports URLs, local files, and a custom scheme (<code>resource:</code>) to get
033 * resources from the current *.jar file. (Local caching is only done for URLs.)
034 * <p>
035 * The mirrored file is only downloaded if it has been more than 7 days since
036 * last download. (Time can be configured.)
037 * <p>
038 * The file content is normally accessed with {@link #getInputStream()}, but
039 * you can also get the mirrored copy with {@link #getFile()}.
040 */
041public class CachedFile {
042
043    /**
044     * Caching strategy.
045     */
046    public enum CachingStrategy {
047        /**
048         * If cached file on disk is older than a certain time (7 days by default),
049         * consider the cache stale and try to download the file again.
050         */
051        MaxAge, 
052        /**
053         * Similar to MaxAge, considers the cache stale when a certain age is
054         * exceeded. In addition, a If-Modified-Since HTTP header is added.
055         * When the server replies "304 Not Modified", this is considered the same
056         * as a full download.
057         */
058        IfModifiedSince 
059    }
060    protected String name;
061    protected long maxAge;
062    protected String destDir;
063    protected String httpAccept;
064    protected CachingStrategy cachingStrategy;
065    
066    protected File cacheFile = null;
067    boolean initialized = false;
068
069    public static final long DEFAULT_MAXTIME = -1L;
070    public static final long DAYS = 24*60*60; // factor to get caching time in days
071
072    /**
073     * Constructs a CachedFile object from a given filename, URL or internal resource.
074     *
075     * @param name can be:<ul>
076     *  <li>relative or absolute file name</li>
077     *  <li>{@code file:///SOME/FILE} the same as above</li>
078     *  <li>{@code http://...} a URL. It will be cached on disk.</li></ul>
079     *  <li>{@code resource://SOME/FILE} file from the classpath (usually in the current *.jar)</li>
080     *  <li>{@code josmdir://SOME/FILE} file inside josm config directory (since r7058)</li></ul>
081     */
082    public CachedFile(String name) {
083        this.name = name;
084    }
085
086    /**
087     * Set the name of the resource.
088     * @param name can be:<ul>
089     *  <li>relative or absolute file name</li>
090     *  <li>{@code file:///SOME/FILE} the same as above</li>
091     *  <li>{@code http://...} a URL. It will be cached on disk.</li></ul>
092     *  <li>{@code resource://SOME/FILE} file from the classpath (usually in the current *.jar)</li>
093     *  <li>{@code josmdir://SOME/FILE} file inside josm config directory (since r7058)</li></ul>
094     * @return this object
095     */
096    public CachedFile setName(String name) {
097        this.name = name;
098        return this;
099    }
100    
101    /**
102     * Set maximum age of cache file. Only applies to URLs.
103     * When this time has passed after the last download of the file, the
104     * cache is considered stale and a new download will be attempted.
105     * @param maxAge the maximum cache age in seconds
106     * @return this object
107     */
108    public CachedFile setMaxAge(long maxAge) {
109        this.maxAge = maxAge;
110        return this;
111    }
112
113    /**
114     * Set the destination directory for the cache file. Only applies to URLs.
115     * @param destDir the destination directory
116     * @return this object
117     */
118    public CachedFile setDestDir(String destDir) {
119        this.destDir = destDir;
120        return this;
121    }
122
123    /**
124     * Set the accepted MIME types sent in the HTTP Accept header. Only applies to URLs.
125     * @param httpAccept the accepted MIME types
126     * @return this object
127     */
128    public CachedFile setHttpAccept(String httpAccept) {
129        this.httpAccept = httpAccept;
130        return this;
131    }
132
133    /**
134     * Set the caching strategy. Only applies to URLs.
135     * @param cachingStrategy
136     * @return this object
137     */
138    public CachedFile setCachingStrategy(CachingStrategy cachingStrategy) {
139        this.cachingStrategy = cachingStrategy;
140        return this;
141    }
142
143    public String getName() {
144        return name;
145    }
146
147    public long getMaxAge() {
148        return maxAge;
149    }
150
151    public String getDestDir() {
152        return destDir;
153    }
154
155    public String getHttpAccept() {
156        return httpAccept;
157    }
158
159    public CachingStrategy getCachingStrategy() {
160        return cachingStrategy;
161    }
162
163    /**
164     * Get InputStream to the requested resource.
165     * @return the InputStream
166     * @throws IOException when the resource with the given name could not be retrieved
167     */
168    public InputStream getInputStream() throws IOException {
169        File file = getFile();
170        if (file == null) {
171            if (name.startsWith("resource://")) {
172                InputStream is = getClass().getResourceAsStream(
173                        name.substring("resource:/".length()));
174                if (is == null)
175                    throw new IOException(tr("Failed to open input stream for resource ''{0}''", name));
176                return is;
177            } else throw new IOException();
178        }
179        return new FileInputStream(file);
180    }
181
182    /**
183     * Get local file for the requested resource.
184     * @return The local cache file for URLs. If the resource is a local file,
185     * returns just that file.
186     * @throws IOException when the resource with the given name could not be retrieved
187     */
188    public File getFile() throws IOException {
189        if (initialized)
190            return cacheFile;
191        initialized = true;
192        URL url;
193        try {
194            url = new URL(name);
195            if ("file".equals(url.getProtocol())) {
196                cacheFile = new File(name.substring("file:/".length()));
197                if (!cacheFile.exists()) {
198                    cacheFile = new File(name.substring("file://".length()));
199                }
200            } else {
201                cacheFile = checkLocal(url);
202            }
203        } catch (java.net.MalformedURLException e) {
204            if (name.startsWith("resource://")) {
205                return null;
206            } else if (name.startsWith("josmdir://")) {
207                cacheFile = new File(Main.pref.getPreferencesDir(), name.substring("josmdir://".length()));
208            } else {
209                cacheFile = new File(name);
210            }
211        }
212        if (cacheFile == null)
213            throw new IOException();
214        return cacheFile;
215    }
216    
217    /**
218     * Looks for a certain entry inside a zip file and returns the entry path.
219     *
220     * Replies a file in the top level directory of the ZIP file which has an
221     * extension <code>extension</code>. If more than one files have this
222     * extension, the last file whose name includes <code>namepart</code>
223     * is opened.
224     *
225     * @param extension  the extension of the file we're looking for
226     * @param namepart the name part
227     * @return The zip entry path of the matching file. Null if this cached file
228     * doesn't represent a zip file or if there was no matching
229     * file in the ZIP file.
230     */
231    public String findZipEntryPath(String extension, String namepart) {
232        Pair<String, InputStream> ze = findZipEntryImpl(extension, namepart);
233        if (ze == null) return null;
234        return ze.a;
235    }
236
237    /**
238     * Like {@link #findZipEntryPath}, but returns the corresponding InputStream.
239     * @param extension  the extension of the file we're looking for
240     * @param namepart the name part
241     * @return InputStream to the matching file. Null if this cached file
242     * doesn't represent a zip file or if there was no matching
243     * file in the ZIP file.
244     * @since 6148
245     */
246    public InputStream findZipEntryInputStream(String extension, String namepart) {
247        Pair<String, InputStream> ze = findZipEntryImpl(extension, namepart);
248        if (ze == null) return null;
249        return ze.b;
250    }
251
252    @SuppressWarnings("resource")
253    private Pair<String, InputStream> findZipEntryImpl(String extension, String namepart) {
254        File file = null;
255        try {
256            file = getFile();
257        } catch (IOException ex) {
258        }
259        if (file == null)
260            return null;
261        Pair<String, InputStream> res = null;
262        try {
263            ZipFile zipFile = new ZipFile(file, StandardCharsets.UTF_8);
264            ZipEntry resentry = null;
265            Enumeration<? extends ZipEntry> entries = zipFile.entries();
266            while (entries.hasMoreElements()) {
267                ZipEntry entry = entries.nextElement();
268                if (entry.getName().endsWith("." + extension)) {
269                    /* choose any file with correct extension. When more than
270                        one file, prefer the one which matches namepart */
271                    if (resentry == null || entry.getName().indexOf(namepart) >= 0) {
272                        resentry = entry;
273                    }
274                }
275            }
276            if (resentry != null) {
277                InputStream is = zipFile.getInputStream(resentry);
278                res = Pair.create(resentry.getName(), is);
279            } else {
280                Utils.close(zipFile);
281            }
282        } catch (Exception e) {
283            if (file.getName().endsWith(".zip")) {
284                Main.warn(tr("Failed to open file with extension ''{2}'' and namepart ''{3}'' in zip file ''{0}''. Exception was: {1}",
285                        file.getName(), e.toString(), extension, namepart));
286            }
287        }
288        return res;
289    }
290
291    /**
292     * Clear the cache for the given resource.
293     * This forces a fresh download.
294     * @param name the URL 
295     */
296    public static void cleanup(String name) {
297        cleanup(name, null);
298    }
299
300    /**
301     * Clear the cache for the given resource.
302     * This forces a fresh download.
303     * @param name the URL
304     * @param destDir the destination directory (see {@link #setDestDir(java.lang.String)})
305     */
306    public static void cleanup(String name, String destDir) {
307        URL url;
308        try {
309            url = new URL(name);
310            if (!"file".equals(url.getProtocol())) {
311                String prefKey = getPrefKey(url, destDir);
312                List<String> localPath = new ArrayList<>(Main.pref.getCollection(prefKey));
313                if (localPath.size() == 2) {
314                    File lfile = new File(localPath.get(1));
315                    if(lfile.exists()) {
316                        lfile.delete();
317                    }
318                }
319                Main.pref.putCollection(prefKey, null);
320            }
321        } catch (MalformedURLException e) {
322            Main.warn(e);
323        }
324    }
325
326    /**
327     * Get preference key to store the location and age of the cached file.
328     * 2 resources that point to the same url, but that are to be stored in different
329     * directories will not share a cache file.
330     */
331    private static String getPrefKey(URL url, String destDir) {
332        StringBuilder prefKey = new StringBuilder("mirror.");
333        if (destDir != null) {
334            prefKey.append(destDir);
335            prefKey.append(".");
336        }
337        prefKey.append(url.toString());
338        return prefKey.toString().replaceAll("=","_");
339    }
340
341    private File checkLocal(URL url) throws IOException {
342        String prefKey = getPrefKey(url, destDir);
343        long age = 0L;
344        long lMaxAge = maxAge;
345        Long ifModifiedSince = null;
346        File localFile = null;
347        List<String> localPathEntry = new ArrayList<>(Main.pref.getCollection(prefKey));
348        if (localPathEntry.size() == 2) {
349            localFile = new File(localPathEntry.get(1));
350            if(!localFile.exists())
351                localFile = null;
352            else {
353                if ( maxAge == DEFAULT_MAXTIME
354                        || maxAge <= 0 // arbitrary value <= 0 is deprecated
355                ) {
356                    lMaxAge = Main.pref.getInteger("mirror.maxtime", 7*24*60*60); // one week
357                }
358                age = System.currentTimeMillis() - Long.parseLong(localPathEntry.get(0));
359                if (age < lMaxAge*1000) {
360                    return localFile;
361                }
362                if (cachingStrategy == CachingStrategy.IfModifiedSince) {
363                    ifModifiedSince = Long.parseLong(localPathEntry.get(0));
364                }
365            }
366        }
367        if (destDir == null) {
368            destDir = Main.pref.getCacheDirectory().getPath();
369        }
370
371        File destDirFile = new File(destDir);
372        if (!destDirFile.exists()) {
373            destDirFile.mkdirs();
374        }
375        
376        String a = url.toString().replaceAll("[^A-Za-z0-9_.-]", "_");
377        String localPath = "mirror_" + a;
378        destDirFile = new File(destDir, localPath + ".tmp");
379        try {
380            HttpURLConnection con = connectFollowingRedirect(url, httpAccept, ifModifiedSince);
381            if (ifModifiedSince != null && con.getResponseCode() == HttpURLConnection.HTTP_NOT_MODIFIED) {
382                Main.debug("304 Not Modified ("+url+")");
383                if (localFile == null) throw new AssertionError();
384                Main.pref.putCollection(prefKey, 
385                        Arrays.asList(Long.toString(System.currentTimeMillis()), localPathEntry.get(1)));
386                return localFile;
387            } 
388            try (
389                InputStream bis = new BufferedInputStream(con.getInputStream());
390                OutputStream fos = new FileOutputStream(destDirFile);
391                OutputStream bos = new BufferedOutputStream(fos)
392            ) {
393                byte[] buffer = new byte[4096];
394                int length;
395                while ((length = bis.read(buffer)) > -1) {
396                    bos.write(buffer, 0, length);
397                }
398            }
399            localFile = new File(destDir, localPath);
400            if(Main.platform.rename(destDirFile, localFile)) {
401                Main.pref.putCollection(prefKey, 
402                        Arrays.asList(Long.toString(System.currentTimeMillis()), localFile.toString()));
403            } else {
404                Main.warn(tr("Failed to rename file {0} to {1}.",
405                destDirFile.getPath(), localFile.getPath()));
406            }
407        } catch (IOException e) {
408            if (age >= lMaxAge*1000 && age < lMaxAge*1000*2) {
409                Main.warn(tr("Failed to load {0}, use cached file and retry next time: {1}", url, e));
410                return localFile;
411            } else {
412                throw e;
413            }
414        }
415
416        return localFile;
417    }
418
419    /**
420     * Opens a connection for downloading a resource.
421     * <p>
422     * Manually follows redirects because
423     * {@link HttpURLConnection#setFollowRedirects(boolean)} fails if the redirect
424     * is going from a http to a https URL, see <a href="https://bugs.openjdk.java.net/browse/JDK-4620571">bug report</a>.
425     * <p>
426     * This can causes problems when downloading from certain GitHub URLs.
427     *
428     * @param downloadUrl The resource URL to download
429     * @param httpAccept The accepted MIME types sent in the HTTP Accept header. Can be {@code null}
430     * @param ifModifiedSince The download time of the cache file, optional
431     * @return The HTTP connection effectively linked to the resource, after all potential redirections
432     * @throws MalformedURLException If a redirected URL is wrong
433     * @throws IOException If any I/O operation goes wrong
434     * @since 6867
435     */
436    public static HttpURLConnection connectFollowingRedirect(URL downloadUrl, String httpAccept, Long ifModifiedSince) throws MalformedURLException, IOException {
437        HttpURLConnection con = null;
438        int numRedirects = 0;
439        while(true) {
440            con = Utils.openHttpConnection(downloadUrl);
441            if (ifModifiedSince != null) {
442                con.setIfModifiedSince(ifModifiedSince);
443            }
444            con.setInstanceFollowRedirects(false);
445            con.setConnectTimeout(Main.pref.getInteger("socket.timeout.connect",15)*1000);
446            con.setReadTimeout(Main.pref.getInteger("socket.timeout.read",30)*1000);
447            Main.debug("GET "+downloadUrl);
448            if (httpAccept != null) {
449                Main.debug("Accept: "+httpAccept);
450                con.setRequestProperty("Accept", httpAccept);
451            }
452            try {
453                con.connect();
454            } catch (IOException e) {
455                Main.addNetworkError(downloadUrl, Utils.getRootCause(e));
456                throw e;
457            }
458            switch(con.getResponseCode()) {
459            case HttpURLConnection.HTTP_OK:
460                return con;
461            case HttpURLConnection.HTTP_NOT_MODIFIED:
462                if (ifModifiedSince != null)
463                    return con;
464            case HttpURLConnection.HTTP_MOVED_PERM:
465            case HttpURLConnection.HTTP_MOVED_TEMP:
466            case HttpURLConnection.HTTP_SEE_OTHER:
467                String redirectLocation = con.getHeaderField("Location");
468                if (downloadUrl == null) {
469                    /* I18n: argument is HTTP response code */ String msg = tr("Unexpected response from HTTP server. Got {0} response without ''Location'' header. Can''t redirect. Aborting.", con.getResponseCode());
470                    throw new IOException(msg);
471                }
472                downloadUrl = new URL(redirectLocation);
473                // keep track of redirect attempts to break a redirect loops if it happens
474                // to occur for whatever reason
475                numRedirects++;
476                if (numRedirects >= Main.pref.getInteger("socket.maxredirects", 5)) {
477                    String msg = tr("Too many redirects to the download URL detected. Aborting.");
478                    throw new IOException(msg);
479                }
480                Main.info(tr("Download redirected to ''{0}''", downloadUrl));
481                break;
482            default:
483                String msg = tr("Failed to read from ''{0}''. Server responded with status code {1}.", downloadUrl, con.getResponseCode());
484                throw new IOException(msg);
485            }
486        }
487    }
488
489}