001// License: GPL. For details, see LICENSE file.
002package org.openstreetmap.josm.data.cache;
003
004import java.io.FileNotFoundException;
005import java.io.IOException;
006import java.net.HttpURLConnection;
007import java.net.URL;
008import java.security.SecureRandom;
009import java.util.List;
010import java.util.Map;
011import java.util.Set;
012import java.util.concurrent.ConcurrentHashMap;
013import java.util.concurrent.ConcurrentMap;
014import java.util.concurrent.LinkedBlockingDeque;
015import java.util.concurrent.ThreadPoolExecutor;
016import java.util.concurrent.TimeUnit;
017import java.util.regex.Matcher;
018
019import org.apache.commons.jcs.access.behavior.ICacheAccess;
020import org.apache.commons.jcs.engine.behavior.ICacheElement;
021import org.openstreetmap.josm.data.cache.ICachedLoaderListener.LoadResult;
022import org.openstreetmap.josm.data.imagery.TileJobOptions;
023import org.openstreetmap.josm.data.preferences.IntegerProperty;
024import org.openstreetmap.josm.tools.CheckParameterUtil;
025import org.openstreetmap.josm.tools.HttpClient;
026import org.openstreetmap.josm.tools.Logging;
027import org.openstreetmap.josm.tools.Utils;
028
029/**
030 * Generic loader for HTTP based tiles. Uses custom attribute, to check, if entry has expired
031 * according to HTTP headers sent with tile. If so, it tries to verify using Etags
032 * or If-Modified-Since / Last-Modified.
033 *
034 * If the tile is not valid, it will try to download it from remote service and put it
035 * to cache. If remote server will fail it will try to use stale entry.
036 *
037 * This class will keep only one Job running for specified tile. All others will just finish, but
038 * listeners will be gathered and notified, once download job will be finished
039 *
040 * @author Wiktor Niesiobędzki
041 * @param <K> cache entry key type
042 * @param <V> cache value type
043 * @since 8168
044 */
045public abstract class JCSCachedTileLoaderJob<K, V extends CacheEntry> implements ICachedLoaderJob<K> {
046    protected static final long DEFAULT_EXPIRE_TIME = TimeUnit.DAYS.toMillis(7);
047    // Limit for the max-age value send by the server.
048    protected static final long EXPIRE_TIME_SERVER_LIMIT = TimeUnit.DAYS.toMillis(28);
049    // Absolute expire time limit. Cached tiles that are older will not be used,
050    // even if the refresh from the server fails.
051    protected static final long ABSOLUTE_EXPIRE_TIME_LIMIT = TimeUnit.DAYS.toMillis(365);
052
053    /**
054     * maximum download threads that will be started
055     */
056    public static final IntegerProperty THREAD_LIMIT = new IntegerProperty("cache.jcs.max_threads", 10);
057
058    /*
059     * ThreadPoolExecutor starts new threads, until THREAD_LIMIT is reached. Then it puts tasks into LinkedBlockingDeque.
060     *
061     * The queue works FIFO, so one needs to take care about ordering of the entries submitted
062     *
063     * There is no point in canceling tasks, that are already taken by worker threads (if we made so much effort, we can at least cache
064     * the response, so later it could be used). We could actually cancel what is in LIFOQueue, but this is a tradeoff between simplicity
065     * and performance (we do want to have something to offer to worker threads before tasks will be resubmitted by class consumer)
066     */
067
068    private static final ThreadPoolExecutor DEFAULT_DOWNLOAD_JOB_DISPATCHER = new ThreadPoolExecutor(
069            1, // we have a small queue, so threads will be quickly started (threads are started only, when queue is full)
070            THREAD_LIMIT.get(), // do not this number of threads
071            30, // keepalive for thread
072            TimeUnit.SECONDS,
073            // make queue of LIFO type - so recently requested tiles will be loaded first (assuming that these are which user is waiting to see)
074            new LinkedBlockingDeque<Runnable>(),
075            Utils.newThreadFactory("JCS-downloader-%d", Thread.NORM_PRIORITY)
076            );
077
078    private static final ConcurrentMap<String, Set<ICachedLoaderListener>> inProgress = new ConcurrentHashMap<>();
079    private static final ConcurrentMap<String, Boolean> useHead = new ConcurrentHashMap<>();
080
081    protected final long now; // when the job started
082
083    private final ICacheAccess<K, V> cache;
084    private ICacheElement<K, V> cacheElement;
085    protected V cacheData;
086    protected CacheEntryAttributes attributes;
087
088    // HTTP connection parameters
089    private final int connectTimeout;
090    private final int readTimeout;
091    private final Map<String, String> headers;
092    private final ThreadPoolExecutor downloadJobExecutor;
093    private Runnable finishTask;
094    private boolean force;
095    private long minimumExpiryTime;
096
097    /**
098     * @param cache cache instance that we will work on
099     * @param options options of the request
100     * @param downloadJobExecutor that will be executing the jobs
101     */
102    public JCSCachedTileLoaderJob(ICacheAccess<K, V> cache,
103            TileJobOptions options,
104            ThreadPoolExecutor downloadJobExecutor) {
105        CheckParameterUtil.ensureParameterNotNull(cache, "cache");
106        this.cache = cache;
107        this.now = System.currentTimeMillis();
108        this.connectTimeout = options.getConnectionTimeout();
109        this.readTimeout = options.getReadTimeout();
110        this.headers = options.getHeaders();
111        this.downloadJobExecutor = downloadJobExecutor;
112        this.minimumExpiryTime = TimeUnit.SECONDS.toMillis(options.getMinimumExpiryTime());
113    }
114
115    /**
116     * @param cache cache instance that we will work on
117     * @param options of the request
118     */
119    public JCSCachedTileLoaderJob(ICacheAccess<K, V> cache,
120            TileJobOptions options) {
121        this(cache, options, DEFAULT_DOWNLOAD_JOB_DISPATCHER);
122    }
123
124    private void ensureCacheElement() {
125        if (cacheElement == null && getCacheKey() != null) {
126            cacheElement = cache.getCacheElement(getCacheKey());
127            if (cacheElement != null) {
128                attributes = (CacheEntryAttributes) cacheElement.getElementAttributes();
129                cacheData = cacheElement.getVal();
130            }
131        }
132    }
133
134    @Override
135    public V get() {
136        ensureCacheElement();
137        return cacheData;
138    }
139
140    @Override
141    public void submit(ICachedLoaderListener listener, boolean force) throws IOException {
142        this.force = force;
143        boolean first = false;
144        URL url = getUrl();
145        String deduplicationKey = null;
146        if (url != null) {
147            // url might be null, for example when Bing Attribution is not loaded yet
148            deduplicationKey = url.toString();
149        }
150        if (deduplicationKey == null) {
151            Logging.warn("No url returned for: {0}, skipping", getCacheKey());
152            throw new IllegalArgumentException("No url returned");
153        }
154        synchronized (this) {
155            first = !inProgress.containsKey(deduplicationKey);
156        }
157        inProgress.computeIfAbsent(deduplicationKey, k -> ConcurrentHashMap.newKeySet()).add(listener);
158
159        if (first || force) {
160            // submit all jobs to separate thread, so calling thread is not blocked with IO when loading from disk
161            Logging.debug("JCS - Submitting job for execution for url: {0}", getUrlNoException());
162            downloadJobExecutor.execute(this);
163        }
164    }
165
166    /**
167     * This method is run when job has finished
168     */
169    protected void executionFinished() {
170        if (finishTask != null) {
171            finishTask.run();
172        }
173    }
174
175    /**
176     *
177     * @return checks if object from cache has sufficient data to be returned
178     */
179    protected boolean isObjectLoadable() {
180        if (cacheData == null) {
181            return false;
182        }
183        return cacheData.getContent().length > 0;
184    }
185
186    /**
187     * Simple implementation. All errors should be cached as empty. Though some JDK (JDK8 on Windows for example)
188     * doesn't return 4xx error codes, instead they do throw an FileNotFoundException or IOException
189     *
190     * @return true if we should put empty object into cache, regardless of what remote resource has returned
191     */
192    protected boolean cacheAsEmpty() {
193        return attributes.getResponseCode() < 500;
194    }
195
196    /**
197     * @return key under which discovered server settings will be kept
198     */
199    protected String getServerKey() {
200        try {
201            return getUrl().getHost();
202        } catch (IOException e) {
203            Logging.trace(e);
204            return null;
205        }
206    }
207
208    @Override
209    public void run() {
210        final Thread currentThread = Thread.currentThread();
211        final String oldName = currentThread.getName();
212        currentThread.setName("JCS Downloading: " + getUrlNoException());
213        Logging.debug("JCS - starting fetch of url: {0} ", getUrlNoException());
214        ensureCacheElement();
215        try {
216            // try to fetch from cache
217            if (!force && cacheElement != null && isCacheElementValid() && isObjectLoadable()) {
218                // we got something in cache, and it's valid, so lets return it
219                Logging.debug("JCS - Returning object from cache: {0}", getCacheKey());
220                finishLoading(LoadResult.SUCCESS);
221                return;
222            }
223
224            // try to load object from remote resource
225            if (loadObject()) {
226                finishLoading(LoadResult.SUCCESS);
227            } else {
228                // if loading failed - check if we can return stale entry
229                if (isObjectLoadable()) {
230                    // try to get stale entry in cache
231                    finishLoading(LoadResult.SUCCESS);
232                    Logging.debug("JCS - found stale object in cache: {0}", getUrlNoException());
233                } else {
234                    // failed completely
235                    finishLoading(LoadResult.FAILURE);
236                }
237            }
238        } finally {
239            executionFinished();
240            currentThread.setName(oldName);
241        }
242    }
243
244    private void finishLoading(LoadResult result) {
245        Set<ICachedLoaderListener> listeners;
246        try {
247            listeners = inProgress.remove(getUrl().toString());
248        } catch (IOException e) {
249            listeners = null;
250            Logging.trace(e);
251        }
252        if (listeners == null) {
253            Logging.warn("Listener not found for URL: {0}. Listener not notified!", getUrlNoException());
254            return;
255        }
256        for (ICachedLoaderListener l: listeners) {
257            l.loadingFinished(cacheData, attributes, result);
258        }
259    }
260
261    protected boolean isCacheElementValid() {
262        long expires = attributes.getExpirationTime();
263
264        // check by expire date set by server
265        if (expires != 0L) {
266            // put a limit to the expire time (some servers send a value
267            // that is too large)
268            expires = Math.min(expires, attributes.getCreateTime() + Math.max(EXPIRE_TIME_SERVER_LIMIT, minimumExpiryTime));
269            if (now > expires) {
270                Logging.debug("JCS - Object {0} has expired -> valid to {1}, now is: {2}",
271                        getUrlNoException(), Long.toString(expires), Long.toString(now));
272                return false;
273            }
274        } else if (attributes.getLastModification() > 0 &&
275                now - attributes.getLastModification() > Math.max(DEFAULT_EXPIRE_TIME, minimumExpiryTime)) {
276            // check by file modification date
277            Logging.debug("JCS - Object has expired, maximum file age reached {0}", getUrlNoException());
278            return false;
279        } else if (now - attributes.getCreateTime() > Math.max(DEFAULT_EXPIRE_TIME, minimumExpiryTime)) {
280            Logging.debug("JCS - Object has expired, maximum time since object creation reached {0}", getUrlNoException());
281            return false;
282        }
283        return true;
284    }
285
286    /**
287     * @return true if object was successfully downloaded, false, if there was a loading failure
288     */
289    private boolean loadObject() {
290        if (attributes == null) {
291            attributes = new CacheEntryAttributes();
292        }
293        try {
294            // if we have object in cache, and host doesn't support If-Modified-Since nor If-None-Match
295            // then just use HEAD request and check returned values
296            if (isObjectLoadable() &&
297                    Boolean.TRUE.equals(useHead.get(getServerKey())) &&
298                    isCacheValidUsingHead()) {
299                Logging.debug("JCS - cache entry verified using HEAD request: {0}", getUrl());
300                return true;
301            }
302
303            Logging.debug("JCS - starting HttpClient GET request for URL: {0}", getUrl());
304            final HttpClient request = getRequest("GET");
305
306            if (isObjectLoadable() &&
307                    (now - attributes.getLastModification()) <= ABSOLUTE_EXPIRE_TIME_LIMIT) {
308                request.setIfModifiedSince(attributes.getLastModification());
309            }
310            if (isObjectLoadable() && attributes.getEtag() != null) {
311                request.setHeader("If-None-Match", attributes.getEtag());
312            }
313
314            final HttpClient.Response urlConn = request.connect();
315
316            if (urlConn.getResponseCode() == 304) {
317                // If isModifiedSince or If-None-Match has been set
318                // and the server answers with a HTTP 304 = "Not Modified"
319                Logging.debug("JCS - If-Modified-Since/ETag test: local version is up to date: {0}", getUrl());
320                // update cache attributes
321                attributes = parseHeaders(urlConn);
322                cache.put(getCacheKey(), cacheData, attributes);
323                return true;
324            } else if (isObjectLoadable() // we have an object in cache, but we haven't received 304 response code
325                    && (
326                            (attributes.getEtag() != null && attributes.getEtag().equals(urlConn.getHeaderField("ETag"))) ||
327                            attributes.getLastModification() == urlConn.getLastModified())
328                    ) {
329                // we sent ETag or If-Modified-Since, but didn't get 304 response code
330                // for further requests - use HEAD
331                String serverKey = getServerKey();
332                Logging.info("JCS - Host: {0} found not to return 304 codes for If-Modified-Since or If-None-Match headers",
333                        serverKey);
334                useHead.put(serverKey, Boolean.TRUE);
335            }
336
337            attributes = parseHeaders(urlConn);
338
339            for (int i = 0; i < 5; ++i) {
340                if (urlConn.getResponseCode() == HttpURLConnection.HTTP_UNAVAILABLE) {
341                    Thread.sleep(5000L+new SecureRandom().nextInt(5000));
342                    continue;
343                }
344
345                attributes.setResponseCode(urlConn.getResponseCode());
346                byte[] raw;
347                if (urlConn.getResponseCode() == HttpURLConnection.HTTP_OK) {
348                    raw = Utils.readBytesFromStream(urlConn.getContent());
349                } else {
350                    raw = new byte[]{};
351                    try {
352                        String data = urlConn.fetchContent();
353                        if (!data.isEmpty()) {
354                            String detectErrorMessage = detectErrorMessage(data);
355                            if (detectErrorMessage != null) {
356                                attributes.setErrorMessage(detectErrorMessage);
357                            }
358                        }
359                    } catch (IOException e) {
360                        Logging.warn(e);
361                    }
362                }
363
364                if (isResponseLoadable(urlConn.getHeaderFields(), urlConn.getResponseCode(), raw)) {
365                    // we need to check cacheEmpty, so for cases, when data is returned, but we want to store
366                    // as empty (eg. empty tile images) to save some space
367                    cacheData = createCacheEntry(raw);
368                    cache.put(getCacheKey(), cacheData, attributes);
369                    Logging.debug("JCS - downloaded key: {0}, length: {1}, url: {2}",
370                            getCacheKey(), raw.length, getUrl());
371                    return true;
372                } else if (cacheAsEmpty()) {
373                    cacheData = createCacheEntry(new byte[]{});
374                    cache.put(getCacheKey(), cacheData, attributes);
375                    Logging.debug("JCS - Caching empty object {0}", getUrl());
376                    return true;
377                } else {
378                    Logging.debug("JCS - failure during load - response is not loadable nor cached as empty");
379                    return false;
380                }
381            }
382        } catch (FileNotFoundException e) {
383            Logging.debug("JCS - Caching empty object as server returned 404 for: {0}", getUrlNoException());
384            attributes.setResponseCode(404);
385            attributes.setError(e);
386            attributes.setException(e);
387            boolean doCache = isResponseLoadable(null, 404, null) || cacheAsEmpty();
388            if (doCache) {
389                cacheData = createCacheEntry(new byte[]{});
390                cache.put(getCacheKey(), cacheData, attributes);
391            }
392            return doCache;
393        } catch (IOException e) {
394            Logging.debug("JCS - IOException during communication with server for: {0}", getUrlNoException());
395            if (isObjectLoadable()) {
396                return true;
397            } else {
398                attributes.setError(e);
399                attributes.setException(e);
400                attributes.setResponseCode(599); // set dummy error code, greater than 500 so it will be not cached
401                return false;
402            }
403
404        } catch (InterruptedException e) {
405            attributes.setError(e);
406            attributes.setException(e);
407            Logging.logWithStackTrace(Logging.LEVEL_WARN, e, "JCS - Exception during download {0}", getUrlNoException());
408            Thread.currentThread().interrupt();
409        }
410        Logging.warn("JCS - Silent failure during download: {0}", getUrlNoException());
411        return false;
412    }
413
414    /**
415     * Tries do detect an error message from given string.
416     * @param data string to analyze
417     * @return error message if detected, or null
418     * @since 14535
419     */
420    public String detectErrorMessage(String data) {
421        Matcher m = HttpClient.getTomcatErrorMatcher(data);
422        return m.matches() ? m.group(1).replace("'", "''") : null;
423    }
424
425    /**
426     * Check if the object is loadable. This means, if the data will be parsed, and if this response
427     * will finish as successful retrieve.
428     *
429     * This simple implementation doesn't load empty response, nor client (4xx) and server (5xx) errors
430     *
431     * @param headerFields headers sent by server
432     * @param responseCode http status code
433     * @param raw data read from server
434     * @return true if object should be cached and returned to listener
435     */
436    protected boolean isResponseLoadable(Map<String, List<String>> headerFields, int responseCode, byte[] raw) {
437        return raw != null && raw.length != 0 && responseCode < 400;
438    }
439
440    protected abstract V createCacheEntry(byte[] content);
441
442    protected CacheEntryAttributes parseHeaders(HttpClient.Response urlConn) {
443        CacheEntryAttributes ret = new CacheEntryAttributes();
444
445        /*
446         * according to https://www.ietf.org/rfc/rfc2616.txt Cache-Control takes precedence over max-age
447         * max-age is for private caches, s-max-age is for shared caches. We take any value that is larger
448         */
449        Long expiration = 0L;
450        String cacheControl = urlConn.getHeaderField("Cache-Control");
451        if (cacheControl != null) {
452            for (String token: cacheControl.split(",")) {
453                try {
454                    if (token.startsWith("max-age=")) {
455                        expiration = Math.max(expiration,
456                                TimeUnit.SECONDS.toMillis(Long.parseLong(token.substring("max-age=".length())))
457                                + System.currentTimeMillis()
458                                );
459                    }
460                    if (token.startsWith("s-max-age=")) {
461                        expiration = Math.max(expiration,
462                                TimeUnit.SECONDS.toMillis(Long.parseLong(token.substring("s-max-age=".length())))
463                                + System.currentTimeMillis()
464                                );
465                    }
466                } catch (NumberFormatException e) {
467                    // ignore malformed Cache-Control headers
468                    Logging.trace(e);
469                }
470            }
471        }
472
473        if (expiration.equals(0L)) {
474            expiration = urlConn.getExpiration();
475        }
476
477        // if nothing is found - set default
478        if (expiration.equals(0L)) {
479            expiration = System.currentTimeMillis() + DEFAULT_EXPIRE_TIME;
480        }
481
482        ret.setExpirationTime(Math.max(minimumExpiryTime + System.currentTimeMillis(), expiration));
483        ret.setLastModification(now);
484        ret.setEtag(urlConn.getHeaderField("ETag"));
485
486        return ret;
487    }
488
489    private HttpClient getRequest(String requestMethod) throws IOException {
490        final HttpClient urlConn = HttpClient.create(getUrl(), requestMethod);
491        urlConn.setAccept("text/html, image/png, image/jpeg, image/gif, */*");
492        urlConn.setReadTimeout(readTimeout); // 30 seconds read timeout
493        urlConn.setConnectTimeout(connectTimeout);
494        if (headers != null) {
495            urlConn.setHeaders(headers);
496        }
497
498        final boolean noCache = force
499                // To remove when switching to Java 11
500                // Workaround for https://bugs.openjdk.java.net/browse/JDK-8146450
501                || (Utils.getJavaVersion() == 8 && Utils.isRunningJavaWebStart());
502        urlConn.useCache(!noCache);
503
504        return urlConn;
505    }
506
507    private boolean isCacheValidUsingHead() throws IOException {
508        final HttpClient.Response urlConn = getRequest("HEAD").connect();
509        long lastModified = urlConn.getLastModified();
510        boolean ret = (attributes.getEtag() != null && attributes.getEtag().equals(urlConn.getHeaderField("ETag"))) ||
511                (lastModified != 0 && lastModified <= attributes.getLastModification());
512        if (ret) {
513            // update attributes
514            attributes = parseHeaders(urlConn);
515            cache.put(getCacheKey(), cacheData, attributes);
516        }
517        return ret;
518    }
519
520    /**
521     * TODO: move to JobFactory
522     * cancels all outstanding tasks in the queue.
523     */
524    public void cancelOutstandingTasks() {
525        for (Runnable r: downloadJobExecutor.getQueue()) {
526            if (downloadJobExecutor.remove(r) && r instanceof JCSCachedTileLoaderJob) {
527                ((JCSCachedTileLoaderJob<?, ?>) r).handleJobCancellation();
528            }
529        }
530    }
531
532    /**
533     * Sets a job, that will be run, when job will finish execution
534     * @param runnable that will be executed
535     */
536    public void setFinishedTask(Runnable runnable) {
537        this.finishTask = runnable;
538
539    }
540
541    /**
542     * Marks this job as canceled
543     */
544    public void handleJobCancellation() {
545        finishLoading(LoadResult.CANCELED);
546    }
547
548    private URL getUrlNoException() {
549        try {
550            return getUrl();
551        } catch (IOException e) {
552            return null;
553        }
554    }
555}