001// License: GPL. For details, see LICENSE file.
002package org.openstreetmap.josm.data.cache;
003
004import java.io.FileNotFoundException;
005import java.io.IOException;
006import java.net.HttpURLConnection;
007import java.net.URL;
008import java.security.SecureRandom;
009import java.util.HashSet;
010import java.util.List;
011import java.util.Map;
012import java.util.Set;
013import java.util.concurrent.ConcurrentHashMap;
014import java.util.concurrent.ConcurrentMap;
015import java.util.concurrent.LinkedBlockingDeque;
016import java.util.concurrent.ThreadPoolExecutor;
017import java.util.concurrent.TimeUnit;
018import java.util.regex.Matcher;
019
020import org.apache.commons.jcs.access.behavior.ICacheAccess;
021import org.apache.commons.jcs.engine.behavior.ICacheElement;
022import org.openstreetmap.josm.data.cache.ICachedLoaderListener.LoadResult;
023import org.openstreetmap.josm.data.imagery.TileJobOptions;
024import org.openstreetmap.josm.data.preferences.IntegerProperty;
025import org.openstreetmap.josm.tools.CheckParameterUtil;
026import org.openstreetmap.josm.tools.HttpClient;
027import org.openstreetmap.josm.tools.Logging;
028import org.openstreetmap.josm.tools.Utils;
029
030/**
031 * Generic loader for HTTP based tiles. Uses custom attribute, to check, if entry has expired
032 * according to HTTP headers sent with tile. If so, it tries to verify using Etags
033 * or If-Modified-Since / Last-Modified.
034 *
035 * If the tile is not valid, it will try to download it from remote service and put it
036 * to cache. If remote server will fail it will try to use stale entry.
037 *
038 * This class will keep only one Job running for specified tile. All others will just finish, but
039 * listeners will be gathered and notified, once download job will be finished
040 *
041 * @author Wiktor Niesiobędzki
042 * @param <K> cache entry key type
043 * @param <V> cache value type
044 * @since 8168
045 */
046public abstract class JCSCachedTileLoaderJob<K, V extends CacheEntry> implements ICachedLoaderJob<K> {
047    protected static final long DEFAULT_EXPIRE_TIME = TimeUnit.DAYS.toMillis(7);
048    // Limit for the max-age value send by the server.
049    protected static final long EXPIRE_TIME_SERVER_LIMIT = TimeUnit.DAYS.toMillis(28);
050    // Absolute expire time limit. Cached tiles that are older will not be used,
051    // even if the refresh from the server fails.
052    protected static final long ABSOLUTE_EXPIRE_TIME_LIMIT = TimeUnit.DAYS.toMillis(365);
053
054    /**
055     * maximum download threads that will be started
056     */
057    public static final IntegerProperty THREAD_LIMIT = new IntegerProperty("cache.jcs.max_threads", 10);
058
059    /*
060     * ThreadPoolExecutor starts new threads, until THREAD_LIMIT is reached. Then it puts tasks into LinkedBlockingDeque.
061     *
062     * The queue works FIFO, so one needs to take care about ordering of the entries submitted
063     *
064     * There is no point in canceling tasks, that are already taken by worker threads (if we made so much effort, we can at least cache
065     * the response, so later it could be used). We could actually cancel what is in LIFOQueue, but this is a tradeoff between simplicity
066     * and performance (we do want to have something to offer to worker threads before tasks will be resubmitted by class consumer)
067     */
068
069    private static final ThreadPoolExecutor DEFAULT_DOWNLOAD_JOB_DISPATCHER = new ThreadPoolExecutor(
070            1, // we have a small queue, so threads will be quickly started (threads are started only, when queue is full)
071            THREAD_LIMIT.get(), // do not this number of threads
072            30, // keepalive for thread
073            TimeUnit.SECONDS,
074            // make queue of LIFO type - so recently requested tiles will be loaded first (assuming that these are which user is waiting to see)
075            new LinkedBlockingDeque<Runnable>(),
076            Utils.newThreadFactory("JCS-downloader-%d", Thread.NORM_PRIORITY)
077            );
078
079    private static final ConcurrentMap<String, Set<ICachedLoaderListener>> inProgress = new ConcurrentHashMap<>();
080    private static final ConcurrentMap<String, Boolean> useHead = new ConcurrentHashMap<>();
081
082    protected final long now; // when the job started
083
084    private final ICacheAccess<K, V> cache;
085    private ICacheElement<K, V> cacheElement;
086    protected V cacheData;
087    protected CacheEntryAttributes attributes;
088
089    // HTTP connection parameters
090    private final int connectTimeout;
091    private final int readTimeout;
092    private final Map<String, String> headers;
093    private final ThreadPoolExecutor downloadJobExecutor;
094    private Runnable finishTask;
095    private boolean force;
096    private long minimumExpiryTime;
097
098    /**
099     * @param cache cache instance that we will work on
100     * @param options options of the request
101     * @param downloadJobExecutor that will be executing the jobs
102     */
103    public JCSCachedTileLoaderJob(ICacheAccess<K, V> cache,
104            TileJobOptions options,
105            ThreadPoolExecutor downloadJobExecutor) {
106        CheckParameterUtil.ensureParameterNotNull(cache, "cache");
107        this.cache = cache;
108        this.now = System.currentTimeMillis();
109        this.connectTimeout = options.getConnectionTimeout();
110        this.readTimeout = options.getReadTimeout();
111        this.headers = options.getHeaders();
112        this.downloadJobExecutor = downloadJobExecutor;
113        this.minimumExpiryTime = TimeUnit.SECONDS.toMillis(options.getMinimumExpiryTime());
114    }
115
116    /**
117     * @param cache cache instance that we will work on
118     * @param options of the request
119     */
120    public JCSCachedTileLoaderJob(ICacheAccess<K, V> cache,
121            TileJobOptions options) {
122        this(cache, options, DEFAULT_DOWNLOAD_JOB_DISPATCHER);
123    }
124
125    private void ensureCacheElement() {
126        if (cacheElement == null && getCacheKey() != null) {
127            cacheElement = cache.getCacheElement(getCacheKey());
128            if (cacheElement != null) {
129                attributes = (CacheEntryAttributes) cacheElement.getElementAttributes();
130                cacheData = cacheElement.getVal();
131            }
132        }
133    }
134
135    @Override
136    public V get() {
137        ensureCacheElement();
138        return cacheData;
139    }
140
141    @Override
142    public void submit(ICachedLoaderListener listener, boolean force) throws IOException {
143        this.force = force;
144        boolean first = false;
145        URL url = getUrl();
146        String deduplicationKey = null;
147        if (url != null) {
148            // url might be null, for example when Bing Attribution is not loaded yet
149            deduplicationKey = url.toString();
150        }
151        if (deduplicationKey == null) {
152            Logging.warn("No url returned for: {0}, skipping", getCacheKey());
153            throw new IllegalArgumentException("No url returned");
154        }
155        synchronized (this) {
156            first = !inProgress.containsKey(deduplicationKey);
157        }
158        inProgress.computeIfAbsent(deduplicationKey, k -> new HashSet<>()).add(listener);
159
160        if (first || force) {
161            // submit all jobs to separate thread, so calling thread is not blocked with IO when loading from disk
162            Logging.debug("JCS - Submitting job for execution for url: {0}", getUrlNoException());
163            downloadJobExecutor.execute(this);
164        }
165    }
166
167    /**
168     * This method is run when job has finished
169     */
170    protected void executionFinished() {
171        if (finishTask != null) {
172            finishTask.run();
173        }
174    }
175
176    /**
177     *
178     * @return checks if object from cache has sufficient data to be returned
179     */
180    protected boolean isObjectLoadable() {
181        if (cacheData == null) {
182            return false;
183        }
184        return cacheData.getContent().length > 0;
185    }
186
187    /**
188     * Simple implementation. All errors should be cached as empty. Though some JDK (JDK8 on Windows for example)
189     * doesn't return 4xx error codes, instead they do throw an FileNotFoundException or IOException
190     *
191     * @return true if we should put empty object into cache, regardless of what remote resource has returned
192     */
193    protected boolean cacheAsEmpty() {
194        return attributes.getResponseCode() < 500;
195    }
196
197    /**
198     * @return key under which discovered server settings will be kept
199     */
200    protected String getServerKey() {
201        try {
202            return getUrl().getHost();
203        } catch (IOException e) {
204            Logging.trace(e);
205            return null;
206        }
207    }
208
209    @Override
210    public void run() {
211        final Thread currentThread = Thread.currentThread();
212        final String oldName = currentThread.getName();
213        currentThread.setName("JCS Downloading: " + getUrlNoException());
214        Logging.debug("JCS - starting fetch of url: {0} ", getUrlNoException());
215        ensureCacheElement();
216        try {
217            // try to fetch from cache
218            if (!force && cacheElement != null && isCacheElementValid() && isObjectLoadable()) {
219                // we got something in cache, and it's valid, so lets return it
220                Logging.debug("JCS - Returning object from cache: {0}", getCacheKey());
221                finishLoading(LoadResult.SUCCESS);
222                return;
223            }
224
225            // try to load object from remote resource
226            if (loadObject()) {
227                finishLoading(LoadResult.SUCCESS);
228            } else {
229                // if loading failed - check if we can return stale entry
230                if (isObjectLoadable()) {
231                    // try to get stale entry in cache
232                    finishLoading(LoadResult.SUCCESS);
233                    Logging.debug("JCS - found stale object in cache: {0}", getUrlNoException());
234                } else {
235                    // failed completely
236                    finishLoading(LoadResult.FAILURE);
237                }
238            }
239        } finally {
240            executionFinished();
241            currentThread.setName(oldName);
242        }
243    }
244
245    private void finishLoading(LoadResult result) {
246        Set<ICachedLoaderListener> listeners;
247        try {
248            listeners = inProgress.remove(getUrl().toString());
249        } catch (IOException e) {
250            listeners = null;
251            Logging.trace(e);
252        }
253        if (listeners == null) {
254            Logging.warn("Listener not found for URL: {0}. Listener not notified!", getUrlNoException());
255            return;
256        }
257        for (ICachedLoaderListener l: listeners) {
258            l.loadingFinished(cacheData, attributes, result);
259        }
260    }
261
262    protected boolean isCacheElementValid() {
263        long expires = attributes.getExpirationTime();
264
265        // check by expire date set by server
266        if (expires != 0L) {
267            // put a limit to the expire time (some servers send a value
268            // that is too large)
269            expires = Math.min(expires, attributes.getCreateTime() + Math.max(EXPIRE_TIME_SERVER_LIMIT, minimumExpiryTime));
270            if (now > expires) {
271                Logging.debug("JCS - Object {0} has expired -> valid to {1}, now is: {2}",
272                        getUrlNoException(), Long.toString(expires), Long.toString(now));
273                return false;
274            }
275        } else if (attributes.getLastModification() > 0 &&
276                now - attributes.getLastModification() > Math.max(DEFAULT_EXPIRE_TIME, minimumExpiryTime)) {
277            // check by file modification date
278            Logging.debug("JCS - Object has expired, maximum file age reached {0}", getUrlNoException());
279            return false;
280        } else if (now - attributes.getCreateTime() > Math.max(DEFAULT_EXPIRE_TIME, minimumExpiryTime)) {
281            Logging.debug("JCS - Object has expired, maximum time since object creation reached {0}", getUrlNoException());
282            return false;
283        }
284        return true;
285    }
286
287    /**
288     * @return true if object was successfully downloaded, false, if there was a loading failure
289     */
290    private boolean loadObject() {
291        if (attributes == null) {
292            attributes = new CacheEntryAttributes();
293        }
294        try {
295            // if we have object in cache, and host doesn't support If-Modified-Since nor If-None-Match
296            // then just use HEAD request and check returned values
297            if (isObjectLoadable() &&
298                    Boolean.TRUE.equals(useHead.get(getServerKey())) &&
299                    isCacheValidUsingHead()) {
300                Logging.debug("JCS - cache entry verified using HEAD request: {0}", getUrl());
301                return true;
302            }
303
304            Logging.debug("JCS - starting HttpClient GET request for URL: {0}", getUrl());
305            final HttpClient request = getRequest("GET", true);
306
307            if (isObjectLoadable() &&
308                    (now - attributes.getLastModification()) <= ABSOLUTE_EXPIRE_TIME_LIMIT) {
309                request.setIfModifiedSince(attributes.getLastModification());
310            }
311            if (isObjectLoadable() && attributes.getEtag() != null) {
312                request.setHeader("If-None-Match", attributes.getEtag());
313            }
314
315            final HttpClient.Response urlConn = request.connect();
316
317            if (urlConn.getResponseCode() == 304) {
318                // If isModifiedSince or If-None-Match has been set
319                // and the server answers with a HTTP 304 = "Not Modified"
320                Logging.debug("JCS - If-Modified-Since/ETag test: local version is up to date: {0}", getUrl());
321                // update cache attributes
322                attributes = parseHeaders(urlConn);
323                cache.put(getCacheKey(), cacheData, attributes);
324                return true;
325            } else if (isObjectLoadable() // we have an object in cache, but we haven't received 304 response code
326                    && (
327                            (attributes.getEtag() != null && attributes.getEtag().equals(urlConn.getHeaderField("ETag"))) ||
328                            attributes.getLastModification() == urlConn.getLastModified())
329                    ) {
330                // we sent ETag or If-Modified-Since, but didn't get 304 response code
331                // for further requests - use HEAD
332                String serverKey = getServerKey();
333                Logging.info("JCS - Host: {0} found not to return 304 codes for If-Modified-Since or If-None-Match headers",
334                        serverKey);
335                useHead.put(serverKey, Boolean.TRUE);
336            }
337
338            attributes = parseHeaders(urlConn);
339
340            for (int i = 0; i < 5; ++i) {
341                if (urlConn.getResponseCode() == HttpURLConnection.HTTP_UNAVAILABLE) {
342                    Thread.sleep(5000L+new SecureRandom().nextInt(5000));
343                    continue;
344                }
345
346                attributes.setResponseCode(urlConn.getResponseCode());
347                byte[] raw;
348                if (urlConn.getResponseCode() == HttpURLConnection.HTTP_OK) {
349                    raw = Utils.readBytesFromStream(urlConn.getContent());
350                } else {
351                    raw = new byte[]{};
352                    try {
353                        String data = urlConn.fetchContent();
354                        if (!data.isEmpty()) {
355                            String detectErrorMessage = detectErrorMessage(data);
356                            if (detectErrorMessage != null) {
357                                attributes.setErrorMessage(detectErrorMessage);
358                            }
359                        }
360                    } catch (IOException e) {
361                        Logging.warn(e);
362                    }
363                }
364
365                if (isResponseLoadable(urlConn.getHeaderFields(), urlConn.getResponseCode(), raw)) {
366                    // we need to check cacheEmpty, so for cases, when data is returned, but we want to store
367                    // as empty (eg. empty tile images) to save some space
368                    cacheData = createCacheEntry(raw);
369                    cache.put(getCacheKey(), cacheData, attributes);
370                    Logging.debug("JCS - downloaded key: {0}, length: {1}, url: {2}",
371                            getCacheKey(), raw.length, getUrl());
372                    return true;
373                } else if (cacheAsEmpty()) {
374                    cacheData = createCacheEntry(new byte[]{});
375                    cache.put(getCacheKey(), cacheData, attributes);
376                    Logging.debug("JCS - Caching empty object {0}", getUrl());
377                    return true;
378                } else {
379                    Logging.debug("JCS - failure during load - response is not loadable nor cached as empty");
380                    return false;
381                }
382            }
383        } catch (FileNotFoundException e) {
384            Logging.debug("JCS - Caching empty object as server returned 404 for: {0}", getUrlNoException());
385            attributes.setResponseCode(404);
386            attributes.setError(e);
387            attributes.setException(e);
388            boolean doCache = isResponseLoadable(null, 404, null) || cacheAsEmpty();
389            if (doCache) {
390                cacheData = createCacheEntry(new byte[]{});
391                cache.put(getCacheKey(), cacheData, attributes);
392            }
393            return doCache;
394        } catch (IOException e) {
395            Logging.debug("JCS - IOException during communication with server for: {0}", getUrlNoException());
396            if (isObjectLoadable()) {
397                return true;
398            } else {
399                attributes.setError(e);
400                attributes.setException(e);
401                attributes.setResponseCode(599); // set dummy error code, greater than 500 so it will be not cached
402                return false;
403            }
404
405        } catch (InterruptedException e) {
406            attributes.setError(e);
407            attributes.setException(e);
408            Logging.logWithStackTrace(Logging.LEVEL_WARN, e, "JCS - Exception during download {0}", getUrlNoException());
409            Thread.currentThread().interrupt();
410        }
411        Logging.warn("JCS - Silent failure during download: {0}", getUrlNoException());
412        return false;
413    }
414
415    /**
416     * Tries do detect an error message from given string.
417     * @param data string to analyze
418     * @return error message if detected, or null
419     * @since 14535
420     */
421    public String detectErrorMessage(String data) {
422        Matcher m = HttpClient.getTomcatErrorMatcher(data);
423        return m.matches() ? m.group(1).replace("'", "''") : null;
424    }
425
426    /**
427     * Check if the object is loadable. This means, if the data will be parsed, and if this response
428     * will finish as successful retrieve.
429     *
430     * This simple implementation doesn't load empty response, nor client (4xx) and server (5xx) errors
431     *
432     * @param headerFields headers sent by server
433     * @param responseCode http status code
434     * @param raw data read from server
435     * @return true if object should be cached and returned to listener
436     */
437    protected boolean isResponseLoadable(Map<String, List<String>> headerFields, int responseCode, byte[] raw) {
438        return raw != null && raw.length != 0 && responseCode < 400;
439    }
440
441    protected abstract V createCacheEntry(byte[] content);
442
443    protected CacheEntryAttributes parseHeaders(HttpClient.Response urlConn) {
444        CacheEntryAttributes ret = new CacheEntryAttributes();
445
446        /*
447         * according to https://www.ietf.org/rfc/rfc2616.txt Cache-Control takes precedence over max-age
448         * max-age is for private caches, s-max-age is for shared caches. We take any value that is larger
449         */
450        Long expiration = 0L;
451        String cacheControl = urlConn.getHeaderField("Cache-Control");
452        if (cacheControl != null) {
453            for (String token: cacheControl.split(",")) {
454                try {
455                    if (token.startsWith("max-age=")) {
456                        expiration = Math.max(expiration,
457                                TimeUnit.SECONDS.toMillis(Long.parseLong(token.substring("max-age=".length())))
458                                + System.currentTimeMillis()
459                                );
460                    }
461                    if (token.startsWith("s-max-age=")) {
462                        expiration = Math.max(expiration,
463                                TimeUnit.SECONDS.toMillis(Long.parseLong(token.substring("s-max-age=".length())))
464                                + System.currentTimeMillis()
465                                );
466                    }
467                } catch (NumberFormatException e) {
468                    // ignore malformed Cache-Control headers
469                    Logging.trace(e);
470                }
471            }
472        }
473
474        if (expiration.equals(0L)) {
475            expiration = urlConn.getExpiration();
476        }
477
478        // if nothing is found - set default
479        if (expiration.equals(0L)) {
480            expiration = System.currentTimeMillis() + DEFAULT_EXPIRE_TIME;
481        }
482
483        ret.setExpirationTime(Math.max(minimumExpiryTime + System.currentTimeMillis(), expiration));
484        ret.setLastModification(now);
485        ret.setEtag(urlConn.getHeaderField("ETag"));
486
487        return ret;
488    }
489
490    private HttpClient getRequest(String requestMethod, boolean noCache) throws IOException {
491        final HttpClient urlConn = HttpClient.create(getUrl(), requestMethod);
492        urlConn.setAccept("text/html, image/png, image/jpeg, image/gif, */*");
493        urlConn.setReadTimeout(readTimeout); // 30 seconds read timeout
494        urlConn.setConnectTimeout(connectTimeout);
495        if (headers != null) {
496            urlConn.setHeaders(headers);
497        }
498
499        if (force || noCache) {
500            urlConn.useCache(false);
501        }
502        return urlConn;
503    }
504
505    private boolean isCacheValidUsingHead() throws IOException {
506        final HttpClient.Response urlConn = getRequest("HEAD", false).connect();
507        long lastModified = urlConn.getLastModified();
508        boolean ret = (attributes.getEtag() != null && attributes.getEtag().equals(urlConn.getHeaderField("ETag"))) ||
509                (lastModified != 0 && lastModified <= attributes.getLastModification());
510        if (ret) {
511            // update attributes
512            attributes = parseHeaders(urlConn);
513            cache.put(getCacheKey(), cacheData, attributes);
514        }
515        return ret;
516    }
517
518    /**
519     * TODO: move to JobFactory
520     * cancels all outstanding tasks in the queue.
521     */
522    public void cancelOutstandingTasks() {
523        for (Runnable r: downloadJobExecutor.getQueue()) {
524            if (downloadJobExecutor.remove(r) && r instanceof JCSCachedTileLoaderJob) {
525                ((JCSCachedTileLoaderJob<?, ?>) r).handleJobCancellation();
526            }
527        }
528    }
529
530    /**
531     * Sets a job, that will be run, when job will finish execution
532     * @param runnable that will be executed
533     */
534    public void setFinishedTask(Runnable runnable) {
535        this.finishTask = runnable;
536
537    }
538
539    /**
540     * Marks this job as canceled
541     */
542    public void handleJobCancellation() {
543        finishLoading(LoadResult.CANCELED);
544    }
545
546    private URL getUrlNoException() {
547        try {
548            return getUrl();
549        } catch (IOException e) {
550            return null;
551        }
552    }
553}