001// License: GPL. For details, see LICENSE file. 002package org.openstreetmap.josm.data.cache; 003 004import java.io.FileNotFoundException; 005import java.io.IOException; 006import java.net.HttpURLConnection; 007import java.net.URL; 008import java.security.SecureRandom; 009import java.util.HashSet; 010import java.util.List; 011import java.util.Map; 012import java.util.Set; 013import java.util.concurrent.ConcurrentHashMap; 014import java.util.concurrent.ConcurrentMap; 015import java.util.concurrent.LinkedBlockingDeque; 016import java.util.concurrent.ThreadPoolExecutor; 017import java.util.concurrent.TimeUnit; 018import java.util.regex.Matcher; 019 020import org.apache.commons.jcs.access.behavior.ICacheAccess; 021import org.apache.commons.jcs.engine.behavior.ICacheElement; 022import org.openstreetmap.josm.data.cache.ICachedLoaderListener.LoadResult; 023import org.openstreetmap.josm.data.imagery.TileJobOptions; 024import org.openstreetmap.josm.data.preferences.IntegerProperty; 025import org.openstreetmap.josm.tools.CheckParameterUtil; 026import org.openstreetmap.josm.tools.HttpClient; 027import org.openstreetmap.josm.tools.Logging; 028import org.openstreetmap.josm.tools.Utils; 029 030/** 031 * Generic loader for HTTP based tiles. Uses custom attribute, to check, if entry has expired 032 * according to HTTP headers sent with tile. If so, it tries to verify using Etags 033 * or If-Modified-Since / Last-Modified. 034 * 035 * If the tile is not valid, it will try to download it from remote service and put it 036 * to cache. If remote server will fail it will try to use stale entry. 037 * 038 * This class will keep only one Job running for specified tile. All others will just finish, but 039 * listeners will be gathered and notified, once download job will be finished 040 * 041 * @author Wiktor Niesiobędzki 042 * @param <K> cache entry key type 043 * @param <V> cache value type 044 * @since 8168 045 */ 046public abstract class JCSCachedTileLoaderJob<K, V extends CacheEntry> implements ICachedLoaderJob<K> { 047 protected static final long DEFAULT_EXPIRE_TIME = TimeUnit.DAYS.toMillis(7); 048 // Limit for the max-age value send by the server. 049 protected static final long EXPIRE_TIME_SERVER_LIMIT = TimeUnit.DAYS.toMillis(28); 050 // Absolute expire time limit. Cached tiles that are older will not be used, 051 // even if the refresh from the server fails. 052 protected static final long ABSOLUTE_EXPIRE_TIME_LIMIT = TimeUnit.DAYS.toMillis(365); 053 054 /** 055 * maximum download threads that will be started 056 */ 057 public static final IntegerProperty THREAD_LIMIT = new IntegerProperty("cache.jcs.max_threads", 10); 058 059 /* 060 * ThreadPoolExecutor starts new threads, until THREAD_LIMIT is reached. Then it puts tasks into LinkedBlockingDeque. 061 * 062 * The queue works FIFO, so one needs to take care about ordering of the entries submitted 063 * 064 * There is no point in canceling tasks, that are already taken by worker threads (if we made so much effort, we can at least cache 065 * the response, so later it could be used). We could actually cancel what is in LIFOQueue, but this is a tradeoff between simplicity 066 * and performance (we do want to have something to offer to worker threads before tasks will be resubmitted by class consumer) 067 */ 068 069 private static final ThreadPoolExecutor DEFAULT_DOWNLOAD_JOB_DISPATCHER = new ThreadPoolExecutor( 070 1, // we have a small queue, so threads will be quickly started (threads are started only, when queue is full) 071 THREAD_LIMIT.get(), // do not this number of threads 072 30, // keepalive for thread 073 TimeUnit.SECONDS, 074 // make queue of LIFO type - so recently requested tiles will be loaded first (assuming that these are which user is waiting to see) 075 new LinkedBlockingDeque<Runnable>(), 076 Utils.newThreadFactory("JCS-downloader-%d", Thread.NORM_PRIORITY) 077 ); 078 079 private static final ConcurrentMap<String, Set<ICachedLoaderListener>> inProgress = new ConcurrentHashMap<>(); 080 private static final ConcurrentMap<String, Boolean> useHead = new ConcurrentHashMap<>(); 081 082 protected final long now; // when the job started 083 084 private final ICacheAccess<K, V> cache; 085 private ICacheElement<K, V> cacheElement; 086 protected V cacheData; 087 protected CacheEntryAttributes attributes; 088 089 // HTTP connection parameters 090 private final int connectTimeout; 091 private final int readTimeout; 092 private final Map<String, String> headers; 093 private final ThreadPoolExecutor downloadJobExecutor; 094 private Runnable finishTask; 095 private boolean force; 096 private long minimumExpiryTime; 097 098 /** 099 * @param cache cache instance that we will work on 100 * @param options options of the request 101 * @param downloadJobExecutor that will be executing the jobs 102 */ 103 public JCSCachedTileLoaderJob(ICacheAccess<K, V> cache, 104 TileJobOptions options, 105 ThreadPoolExecutor downloadJobExecutor) { 106 CheckParameterUtil.ensureParameterNotNull(cache, "cache"); 107 this.cache = cache; 108 this.now = System.currentTimeMillis(); 109 this.connectTimeout = options.getConnectionTimeout(); 110 this.readTimeout = options.getReadTimeout(); 111 this.headers = options.getHeaders(); 112 this.downloadJobExecutor = downloadJobExecutor; 113 this.minimumExpiryTime = TimeUnit.SECONDS.toMillis(options.getMinimumExpiryTime()); 114 } 115 116 /** 117 * @param cache cache instance that we will work on 118 * @param options of the request 119 */ 120 public JCSCachedTileLoaderJob(ICacheAccess<K, V> cache, 121 TileJobOptions options) { 122 this(cache, options, DEFAULT_DOWNLOAD_JOB_DISPATCHER); 123 } 124 125 private void ensureCacheElement() { 126 if (cacheElement == null && getCacheKey() != null) { 127 cacheElement = cache.getCacheElement(getCacheKey()); 128 if (cacheElement != null) { 129 attributes = (CacheEntryAttributes) cacheElement.getElementAttributes(); 130 cacheData = cacheElement.getVal(); 131 } 132 } 133 } 134 135 @Override 136 public V get() { 137 ensureCacheElement(); 138 return cacheData; 139 } 140 141 @Override 142 public void submit(ICachedLoaderListener listener, boolean force) throws IOException { 143 this.force = force; 144 boolean first = false; 145 URL url = getUrl(); 146 String deduplicationKey = null; 147 if (url != null) { 148 // url might be null, for example when Bing Attribution is not loaded yet 149 deduplicationKey = url.toString(); 150 } 151 if (deduplicationKey == null) { 152 Logging.warn("No url returned for: {0}, skipping", getCacheKey()); 153 throw new IllegalArgumentException("No url returned"); 154 } 155 synchronized (inProgress) { 156 Set<ICachedLoaderListener> newListeners = inProgress.get(deduplicationKey); 157 if (newListeners == null) { 158 newListeners = new HashSet<>(); 159 inProgress.put(deduplicationKey, newListeners); 160 first = true; 161 } 162 newListeners.add(listener); 163 } 164 165 if (first || force) { 166 // submit all jobs to separate thread, so calling thread is not blocked with IO when loading from disk 167 Logging.debug("JCS - Submitting job for execution for url: {0}", getUrlNoException()); 168 downloadJobExecutor.execute(this); 169 } 170 } 171 172 /** 173 * This method is run when job has finished 174 */ 175 protected void executionFinished() { 176 if (finishTask != null) { 177 finishTask.run(); 178 } 179 } 180 181 /** 182 * 183 * @return checks if object from cache has sufficient data to be returned 184 */ 185 protected boolean isObjectLoadable() { 186 if (cacheData == null) { 187 return false; 188 } 189 return cacheData.getContent().length > 0; 190 } 191 192 /** 193 * Simple implementation. All errors should be cached as empty. Though some JDK (JDK8 on Windows for example) 194 * doesn't return 4xx error codes, instead they do throw an FileNotFoundException or IOException 195 * 196 * @return true if we should put empty object into cache, regardless of what remote resource has returned 197 */ 198 protected boolean cacheAsEmpty() { 199 return attributes.getResponseCode() < 500; 200 } 201 202 /** 203 * @return key under which discovered server settings will be kept 204 */ 205 protected String getServerKey() { 206 try { 207 return getUrl().getHost(); 208 } catch (IOException e) { 209 Logging.trace(e); 210 return null; 211 } 212 } 213 214 @Override 215 public void run() { 216 final Thread currentThread = Thread.currentThread(); 217 final String oldName = currentThread.getName(); 218 currentThread.setName("JCS Downloading: " + getUrlNoException()); 219 Logging.debug("JCS - starting fetch of url: {0} ", getUrlNoException()); 220 ensureCacheElement(); 221 try { 222 // try to fetch from cache 223 if (!force && cacheElement != null && isCacheElementValid() && isObjectLoadable()) { 224 // we got something in cache, and it's valid, so lets return it 225 Logging.debug("JCS - Returning object from cache: {0}", getCacheKey()); 226 finishLoading(LoadResult.SUCCESS); 227 return; 228 } 229 230 // try to load object from remote resource 231 if (loadObject()) { 232 finishLoading(LoadResult.SUCCESS); 233 } else { 234 // if loading failed - check if we can return stale entry 235 if (isObjectLoadable()) { 236 // try to get stale entry in cache 237 finishLoading(LoadResult.SUCCESS); 238 Logging.debug("JCS - found stale object in cache: {0}", getUrlNoException()); 239 } else { 240 // failed completely 241 finishLoading(LoadResult.FAILURE); 242 } 243 } 244 } finally { 245 executionFinished(); 246 currentThread.setName(oldName); 247 } 248 } 249 250 private void finishLoading(LoadResult result) { 251 Set<ICachedLoaderListener> listeners; 252 synchronized (inProgress) { 253 try { 254 listeners = inProgress.remove(getUrl().toString()); 255 } catch (IOException e) { 256 listeners = null; 257 Logging.trace(e); 258 } 259 } 260 if (listeners == null) { 261 Logging.warn("Listener not found for URL: {0}. Listener not notified!", getUrlNoException()); 262 return; 263 } 264 for (ICachedLoaderListener l: listeners) { 265 l.loadingFinished(cacheData, attributes, result); 266 } 267 } 268 269 protected boolean isCacheElementValid() { 270 long expires = attributes.getExpirationTime(); 271 272 // check by expire date set by server 273 if (expires != 0L) { 274 // put a limit to the expire time (some servers send a value 275 // that is too large) 276 expires = Math.min(expires, attributes.getCreateTime() + Math.max(EXPIRE_TIME_SERVER_LIMIT, minimumExpiryTime)); 277 if (now > expires) { 278 Logging.debug("JCS - Object {0} has expired -> valid to {1}, now is: {2}", 279 getUrlNoException(), Long.toString(expires), Long.toString(now)); 280 return false; 281 } 282 } else if (attributes.getLastModification() > 0 && 283 now - attributes.getLastModification() > Math.max(DEFAULT_EXPIRE_TIME, minimumExpiryTime)) { 284 // check by file modification date 285 Logging.debug("JCS - Object has expired, maximum file age reached {0}", getUrlNoException()); 286 return false; 287 } else if (now - attributes.getCreateTime() > Math.max(DEFAULT_EXPIRE_TIME, minimumExpiryTime)) { 288 Logging.debug("JCS - Object has expired, maximum time since object creation reached {0}", getUrlNoException()); 289 return false; 290 } 291 return true; 292 } 293 294 /** 295 * @return true if object was successfully downloaded, false, if there was a loading failure 296 */ 297 private boolean loadObject() { 298 if (attributes == null) { 299 attributes = new CacheEntryAttributes(); 300 } 301 try { 302 // if we have object in cache, and host doesn't support If-Modified-Since nor If-None-Match 303 // then just use HEAD request and check returned values 304 if (isObjectLoadable() && 305 Boolean.TRUE.equals(useHead.get(getServerKey())) && 306 isCacheValidUsingHead()) { 307 Logging.debug("JCS - cache entry verified using HEAD request: {0}", getUrl()); 308 return true; 309 } 310 311 Logging.debug("JCS - starting HttpClient GET request for URL: {0}", getUrl()); 312 final HttpClient request = getRequest("GET", true); 313 314 if (isObjectLoadable() && 315 (now - attributes.getLastModification()) <= ABSOLUTE_EXPIRE_TIME_LIMIT) { 316 request.setIfModifiedSince(attributes.getLastModification()); 317 } 318 if (isObjectLoadable() && attributes.getEtag() != null) { 319 request.setHeader("If-None-Match", attributes.getEtag()); 320 } 321 322 final HttpClient.Response urlConn = request.connect(); 323 324 if (urlConn.getResponseCode() == 304) { 325 // If isModifiedSince or If-None-Match has been set 326 // and the server answers with a HTTP 304 = "Not Modified" 327 Logging.debug("JCS - If-Modified-Since/ETag test: local version is up to date: {0}", getUrl()); 328 // update cache attributes 329 attributes = parseHeaders(urlConn); 330 cache.put(getCacheKey(), cacheData, attributes); 331 return true; 332 } else if (isObjectLoadable() // we have an object in cache, but we haven't received 304 response code 333 && ( 334 (attributes.getEtag() != null && attributes.getEtag().equals(urlConn.getHeaderField("ETag"))) || 335 attributes.getLastModification() == urlConn.getLastModified()) 336 ) { 337 // we sent ETag or If-Modified-Since, but didn't get 304 response code 338 // for further requests - use HEAD 339 String serverKey = getServerKey(); 340 Logging.info("JCS - Host: {0} found not to return 304 codes for If-Modified-Since or If-None-Match headers", 341 serverKey); 342 useHead.put(serverKey, Boolean.TRUE); 343 } 344 345 attributes = parseHeaders(urlConn); 346 347 for (int i = 0; i < 5; ++i) { 348 if (urlConn.getResponseCode() == HttpURLConnection.HTTP_UNAVAILABLE) { 349 Thread.sleep(5000L+new SecureRandom().nextInt(5000)); 350 continue; 351 } 352 353 attributes.setResponseCode(urlConn.getResponseCode()); 354 byte[] raw; 355 if (urlConn.getResponseCode() == HttpURLConnection.HTTP_OK) { 356 raw = Utils.readBytesFromStream(urlConn.getContent()); 357 } else { 358 raw = new byte[]{}; 359 try { 360 String data = urlConn.fetchContent(); 361 if (!data.isEmpty()) { 362 String detectErrorMessage = detectErrorMessage(data); 363 if (detectErrorMessage != null) { 364 attributes.setErrorMessage(detectErrorMessage); 365 } 366 } 367 } catch (IOException e) { 368 Logging.warn(e); 369 } 370 } 371 372 if (isResponseLoadable(urlConn.getHeaderFields(), urlConn.getResponseCode(), raw)) { 373 // we need to check cacheEmpty, so for cases, when data is returned, but we want to store 374 // as empty (eg. empty tile images) to save some space 375 cacheData = createCacheEntry(raw); 376 cache.put(getCacheKey(), cacheData, attributes); 377 Logging.debug("JCS - downloaded key: {0}, length: {1}, url: {2}", 378 getCacheKey(), raw.length, getUrl()); 379 return true; 380 } else if (cacheAsEmpty()) { 381 cacheData = createCacheEntry(new byte[]{}); 382 cache.put(getCacheKey(), cacheData, attributes); 383 Logging.debug("JCS - Caching empty object {0}", getUrl()); 384 return true; 385 } else { 386 Logging.debug("JCS - failure during load - response is not loadable nor cached as empty"); 387 return false; 388 } 389 } 390 } catch (FileNotFoundException e) { 391 Logging.debug("JCS - Caching empty object as server returned 404 for: {0}", getUrlNoException()); 392 attributes.setResponseCode(404); 393 attributes.setError(e); 394 attributes.setException(e); 395 boolean doCache = isResponseLoadable(null, 404, null) || cacheAsEmpty(); 396 if (doCache) { 397 cacheData = createCacheEntry(new byte[]{}); 398 cache.put(getCacheKey(), cacheData, attributes); 399 } 400 return doCache; 401 } catch (IOException e) { 402 Logging.debug("JCS - IOException during communication with server for: {0}", getUrlNoException()); 403 if (isObjectLoadable()) { 404 return true; 405 } else { 406 attributes.setError(e); 407 attributes.setException(e); 408 attributes.setResponseCode(599); // set dummy error code, greater than 500 so it will be not cached 409 return false; 410 } 411 412 } catch (InterruptedException e) { 413 attributes.setError(e); 414 attributes.setException(e); 415 Logging.logWithStackTrace(Logging.LEVEL_WARN, e, "JCS - Exception during download {0}", getUrlNoException()); 416 Thread.currentThread().interrupt(); 417 } 418 Logging.warn("JCS - Silent failure during download: {0}", getUrlNoException()); 419 return false; 420 } 421 422 /** 423 * Tries do detect an error message from given string. 424 * @param data string to analyze 425 * @return error message if detected, or null 426 * @since 14535 427 */ 428 public String detectErrorMessage(String data) { 429 Matcher m = HttpClient.getTomcatErrorMatcher(data); 430 return m.matches() ? m.group(1).replace("'", "''") : null; 431 } 432 433 /** 434 * Check if the object is loadable. This means, if the data will be parsed, and if this response 435 * will finish as successful retrieve. 436 * 437 * This simple implementation doesn't load empty response, nor client (4xx) and server (5xx) errors 438 * 439 * @param headerFields headers sent by server 440 * @param responseCode http status code 441 * @param raw data read from server 442 * @return true if object should be cached and returned to listener 443 */ 444 protected boolean isResponseLoadable(Map<String, List<String>> headerFields, int responseCode, byte[] raw) { 445 return raw != null && raw.length != 0 && responseCode < 400; 446 } 447 448 protected abstract V createCacheEntry(byte[] content); 449 450 protected CacheEntryAttributes parseHeaders(HttpClient.Response urlConn) { 451 CacheEntryAttributes ret = new CacheEntryAttributes(); 452 453 /* 454 * according to https://www.ietf.org/rfc/rfc2616.txt Cache-Control takes precedence over max-age 455 * max-age is for private caches, s-max-age is for shared caches. We take any value that is larger 456 */ 457 Long expiration = 0L; 458 String cacheControl = urlConn.getHeaderField("Cache-Control"); 459 if (cacheControl != null) { 460 for (String token: cacheControl.split(",")) { 461 try { 462 if (token.startsWith("max-age=")) { 463 expiration = Math.max(expiration, 464 TimeUnit.SECONDS.toMillis(Long.parseLong(token.substring("max-age=".length()))) 465 + System.currentTimeMillis() 466 ); 467 } 468 if (token.startsWith("s-max-age=")) { 469 expiration = Math.max(expiration, 470 TimeUnit.SECONDS.toMillis(Long.parseLong(token.substring("s-max-age=".length()))) 471 + System.currentTimeMillis() 472 ); 473 } 474 } catch (NumberFormatException e) { 475 // ignore malformed Cache-Control headers 476 Logging.trace(e); 477 } 478 } 479 } 480 481 if (expiration.equals(0L)) { 482 expiration = urlConn.getExpiration(); 483 } 484 485 // if nothing is found - set default 486 if (expiration.equals(0L)) { 487 expiration = System.currentTimeMillis() + DEFAULT_EXPIRE_TIME; 488 } 489 490 ret.setExpirationTime(Math.max(minimumExpiryTime + System.currentTimeMillis(), expiration)); 491 ret.setLastModification(now); 492 ret.setEtag(urlConn.getHeaderField("ETag")); 493 494 return ret; 495 } 496 497 private HttpClient getRequest(String requestMethod, boolean noCache) throws IOException { 498 final HttpClient urlConn = HttpClient.create(getUrl(), requestMethod); 499 urlConn.setAccept("text/html, image/png, image/jpeg, image/gif, */*"); 500 urlConn.setReadTimeout(readTimeout); // 30 seconds read timeout 501 urlConn.setConnectTimeout(connectTimeout); 502 if (headers != null) { 503 urlConn.setHeaders(headers); 504 } 505 506 if (force || noCache) { 507 urlConn.useCache(false); 508 } 509 return urlConn; 510 } 511 512 private boolean isCacheValidUsingHead() throws IOException { 513 final HttpClient.Response urlConn = getRequest("HEAD", false).connect(); 514 long lastModified = urlConn.getLastModified(); 515 boolean ret = (attributes.getEtag() != null && attributes.getEtag().equals(urlConn.getHeaderField("ETag"))) || 516 (lastModified != 0 && lastModified <= attributes.getLastModification()); 517 if (ret) { 518 // update attributes 519 attributes = parseHeaders(urlConn); 520 cache.put(getCacheKey(), cacheData, attributes); 521 } 522 return ret; 523 } 524 525 /** 526 * TODO: move to JobFactory 527 * cancels all outstanding tasks in the queue. 528 */ 529 public void cancelOutstandingTasks() { 530 for (Runnable r: downloadJobExecutor.getQueue()) { 531 if (downloadJobExecutor.remove(r) && r instanceof JCSCachedTileLoaderJob) { 532 ((JCSCachedTileLoaderJob<?, ?>) r).handleJobCancellation(); 533 } 534 } 535 } 536 537 /** 538 * Sets a job, that will be run, when job will finish execution 539 * @param runnable that will be executed 540 */ 541 public void setFinishedTask(Runnable runnable) { 542 this.finishTask = runnable; 543 544 } 545 546 /** 547 * Marks this job as canceled 548 */ 549 public void handleJobCancellation() { 550 finishLoading(LoadResult.CANCELED); 551 } 552 553 private URL getUrlNoException() { 554 try { 555 return getUrl(); 556 } catch (IOException e) { 557 return null; 558 } 559 } 560}