bes  Updated for version 3.20.10
AggMemberDatasetDimensionCache.cc
1 /*
2  * AggMemberDatasetDimensionCache.cc
3  *
4  * Created on: Sep 25, 2015
5  * Author: ndp
6  */
7 
8 #include "config.h"
9 
10 #include "AggMemberDatasetDimensionCache.h"
11 #include "AggMemberDataset.h"
12 #include <string>
13 #include <fstream>
14 #include <sstream>
15 #include <sys/stat.h>
16 
17 #include <libdap/util.h>
18 #include "BESInternalError.h"
19 #include "BESUtil.h"
20 #include "BESDebug.h"
21 #include "TheBESKeys.h"
22 
23 
24 static const string BES_DATA_ROOT("BES.Data.RootDirectory");
25 static const string BES_CATALOG_ROOT("BES.Catalog.catalog.RootDirectory");
26 
27 
28 namespace agg_util
29 {
30 
31 AggMemberDatasetDimensionCache *AggMemberDatasetDimensionCache::d_instance = 0;
32 bool AggMemberDatasetDimensionCache::d_enabled = true;
33 
34 const string AggMemberDatasetDimensionCache::CACHE_DIR_KEY = "NCML.DimensionCache.directory";
35 const string AggMemberDatasetDimensionCache::PREFIX_KEY = "NCML.DimensionCache.prefix";
36 const string AggMemberDatasetDimensionCache::SIZE_KEY = "NCML.DimensionCache.size";
37 // const string AggMemberDatasetDimensionCache::CACHE_CONTROL_FILE = "ncmlAggDimensions.cache.info";
38 
43 unsigned long AggMemberDatasetDimensionCache::getCacheSizeFromConfig(){
44 
45  bool found;
46  string size;
47  unsigned long size_in_megabytes = 0;
48  TheBESKeys::TheKeys()->get_value( SIZE_KEY, size, found ) ;
49  if( found ) {
50  std::istringstream iss(size);
51  iss >> size_in_megabytes;
52  }
53  else {
54  string msg = "[ERROR] AggMemberDatasetDimensionCache::getCacheSize() - The BES Key " + SIZE_KEY + " is not set! It MUST be set to utilize the NcML Dimension Cache. ";
55  BESDEBUG("cache", msg << endl);
56  throw BESInternalError(msg , __FILE__, __LINE__);
57  }
58  return size_in_megabytes;
59 }
60 
65 string AggMemberDatasetDimensionCache::getCacheDirFromConfig(){
66  bool found;
67  string subdir = "";
68  TheBESKeys::TheKeys()->get_value( CACHE_DIR_KEY, subdir, found ) ;
69 
70  if( !found ) {
71  string msg = "[ERROR] AggMemberDatasetDimensionCache::getSubDirFromConfig() - The BES Key " + CACHE_DIR_KEY + " is not set! It MUST be set to utilize the NcML Dimension Cache. ";
72  BESDEBUG("cache", msg << endl);
73  throw BESInternalError(msg , __FILE__, __LINE__);
74  }
75 
76  return subdir;
77 }
78 
79 
84 string AggMemberDatasetDimensionCache::getDimCachePrefixFromConfig(){
85  bool found;
86  string prefix = "";
87  TheBESKeys::TheKeys()->get_value( PREFIX_KEY, prefix, found ) ;
88  if( found ) {
89  prefix = BESUtil::lowercase( prefix ) ;
90  }
91  else {
92  string msg = "[ERROR] AggMemberDatasetDimensionCache::getResultPrefix() - The BES Key " + PREFIX_KEY + " is not set! It MUST be set to utilize the NcML Dimension Cache. ";
93  BESDEBUG("cache", msg << endl);
94  throw BESInternalError(msg , __FILE__, __LINE__);
95  }
96 
97  return prefix;
98 }
99 
100 
106 string AggMemberDatasetDimensionCache::getBesDataRootDirFromConfig(){
107  bool found;
108  string cacheDir = "";
109  TheBESKeys::TheKeys()->get_value( BES_CATALOG_ROOT, cacheDir, found ) ;
110  if( !found ) {
111  TheBESKeys::TheKeys()->get_value( BES_DATA_ROOT, cacheDir, found ) ;
112  if( !found ) {
113  string msg = ((string)"[ERROR] AggMemberDatasetDimensionCache::getStoredResultsDir() - Neither the BES Key ") + BES_CATALOG_ROOT +
114  "or the BES key " + BES_DATA_ROOT + " have been set! One MUST be set to utilize the NcML Dimension Cache. ";
115  BESDEBUG("cache", msg << endl);
116  throw BESInternalError(msg , __FILE__, __LINE__);
117  }
118  }
119  return cacheDir;
120 
121 }
122 
126 AggMemberDatasetDimensionCache::AggMemberDatasetDimensionCache()
127 {
128  BESDEBUG("cache", "AggMemberDatasetDimensionCache::AggMemberDatasetDimensionCache() - BEGIN" << endl);
129 
130  d_dimCacheDir = getCacheDirFromConfig();
131  d_dataRootDir = getBesDataRootDirFromConfig();
132 
133  d_dimCacheFilePrefix = getDimCachePrefixFromConfig();
134  d_maxCacheSize = getCacheSizeFromConfig();
135 
136  BESDEBUG("cache", "AggMemberDatasetDimensionCache() - Stored results cache configuration params: " << d_dimCacheDir << ", " << d_dimCacheFilePrefix << ", " << d_maxCacheSize << endl);
137 
138  // initialize(d_dimCacheDir, CACHE_CONTROL_FILE, d_dimCacheFilePrefix, d_maxCacheSize);
139  initialize(d_dimCacheDir, d_dimCacheFilePrefix, d_maxCacheSize);
140 
141  BESDEBUG("cache", "AggMemberDatasetDimensionCache::AggMemberDatasetDimensionCache() - END" << endl);
142 
143 }
144 
148 AggMemberDatasetDimensionCache::AggMemberDatasetDimensionCache(const string &data_root_dir, const string &cache_dir, const string &prefix, unsigned long long size){
149 
150  BESDEBUG("cache", "AggMemberDatasetDimensionCache::AggMemberDatasetDimensionCache() - BEGIN" << endl);
151 
152  d_dataRootDir = data_root_dir;
153  d_dimCacheDir = cache_dir;
154  d_dimCacheFilePrefix = prefix;
155  d_maxCacheSize = size;
156 
157 // initialize(d_dimCacheDir, CACHE_CONTROL_FILE, d_dimCacheFilePrefix, d_maxCacheSize);
158  initialize(d_dimCacheDir, d_dimCacheFilePrefix, d_maxCacheSize);
159 
160  BESDEBUG("cache", "AggMemberDatasetDimensionCache::AggMemberDatasetDimensionCache() - END" << endl);
161 }
162 
163 
168 AggMemberDatasetDimensionCache *
169 AggMemberDatasetDimensionCache::get_instance(const string &data_root_dir, const string &cache_dir, const string &result_file_prefix, unsigned long long max_cache_size)
170 {
171  if (d_enabled && d_instance == 0){
172  if (libdap::dir_exists(cache_dir)) {
173  d_instance = new AggMemberDatasetDimensionCache(data_root_dir, cache_dir, result_file_prefix, max_cache_size);
174  d_enabled = d_instance->cache_enabled();
175  if(!d_enabled){
176  delete d_instance;
177  d_instance = NULL;
178  BESDEBUG("cache", "AggMemberDatasetDimensionCache::"<<__func__ << "() - " <<
179  "Cache is DISABLED"<< endl);
180  }
181  else {
182  #ifdef HAVE_ATEXIT
183  atexit(delete_instance);
184  #endif
185  BESDEBUG("cache", "AggMemberDatasetDimensionCache::"<<__func__ << "() - " <<
186  "Cache is ENABLED"<< endl);
187  }
188  }
189  }
190  return d_instance;
191 }
192 
198 AggMemberDatasetDimensionCache *
200 {
201  if (d_enabled && d_instance == 0) {
202  d_instance = new AggMemberDatasetDimensionCache();
203  d_enabled = d_instance->cache_enabled();
204  if(!d_enabled){
205  delete d_instance;
206  d_instance = NULL;
207  BESDEBUG("cache", "AggMemberDatasetDimensionCache::"<<__func__ << "() - " <<
208  "Cache is DISABLED"<< endl);
209  }
210  else {
211 #ifdef HAVE_ATEXIT
212  atexit(delete_instance);
213 #endif
214  BESDEBUG("cache", "AggMemberDatasetDimensionCache::"<<__func__ << "() - " <<
215  "Cache is ENABLED"<< endl);
216  }
217 }
218 
219  return d_instance;
220 }
221 
222 
226 void AggMemberDatasetDimensionCache::delete_instance() {
227  BESDEBUG("cache","AggMemberDatasetDimensionCache::delete_instance() - Deleting singleton BESStoredDapResultCache instance." << endl);
228  delete d_instance;
229  d_instance = 0;
230 }
231 
232 
233 
234 AggMemberDatasetDimensionCache::~AggMemberDatasetDimensionCache()
235 {
236  // Nothing to do here....
237 }
238 
249 bool AggMemberDatasetDimensionCache::is_valid(const string &cache_file_name, const string &local_id)
250 {
251  // If the cached response is zero bytes in size, it's not valid.
252  // (hmmm...)
253  string datasetFileName = BESUtil::assemblePath(d_dataRootDir,local_id, true);
254 
255  off_t entry_size = 0;
256  time_t entry_time = 0;
257  struct stat buf;
258  if (stat(cache_file_name.c_str(), &buf) == 0) {
259  entry_size = buf.st_size;
260  entry_time = buf.st_mtime;
261  }
262  else {
263  return false;
264  }
265 
266  if (entry_size == 0)
267  return false;
268 
269  time_t dataset_time = entry_time;
270  if (stat(datasetFileName.c_str(), &buf) == 0) {
271  dataset_time = buf.st_mtime;
272  }
273 
274  // Trick: if the d_dataset is not a file, stat() returns error and
275  // the times stay equal and the code uses the cache entry.
276 
277  // TODO Fix this so that the code can get a LMT from the correct handler.
278  // TODO Consider adding a getLastModified() method to the libdap::DDS object to support this
279  // TODO The DDS may be expensive to instantiate - I think the handler may be a better location for an LMT method, if we can access the handler when/where needed.
280  if (dataset_time > entry_time)
281  return false;
282 
283  return true;
284 }
285 
286 
287 
295  BESDEBUG("cache", "AggMemberDatasetDimensionCache::loadDimensionCache() - BEGIN" << endl );
296 
297  // Get the cache filename for this thing, mangle name.
298  string local_id = amd->getLocation();
299  BESDEBUG("cache", "AggMemberDatasetDimensionCache::loadDimensionCache() - local resource id: "<< local_id << endl );
300  string cache_file_name = get_cache_file_name(local_id, true);
301  BESDEBUG("cache", "AggMemberDatasetDimensionCache::loadDimensionCache() - cache_file_name: "<< cache_file_name << endl );
302 
303  int fd;
304  try {
305  // If the object in the cache is not valid, remove it. The read_lock will
306  // then fail and the code will drop down to the create_and_lock() call.
307  // is_valid() tests for a non-zero length cache file (cache_file_name) and
308  // for the source data file (local_id) with a newer LMT than the cache file.
309  if (!is_valid(cache_file_name, local_id)){
310  BESDEBUG("cache", "AggMemberDatasetDimensionCache::loadDimensionCache() - File is not valid. Purging file from cache. filename: " << cache_file_name << endl);
311  purge_file(cache_file_name);
312  }
313 
314  if (get_read_lock(cache_file_name, fd)) {
315  BESDEBUG("cache", "AggMemberDatasetDimensionCache::loadDimensionCache() - Dimension cache file exists. Loading dimension cache from file: " << cache_file_name << endl);
316 
317  ifstream istrm(cache_file_name.c_str());
318  if (!istrm)
319  throw libdap::InternalErr(__FILE__, __LINE__, "Could not open '" + cache_file_name + "' to read cached dimensions.");
320 
321  amd->loadDimensionCache(istrm);
322 
323  istrm.close();
324 
325 
326  }
327  else {
328  // If here, the cache_file_name could not be locked for read access, or it was out of date.
329  // So we are going to (re)build the cache file.
330 
331  // We need to build the DDS object and extract the dimensions.
332  // We do not lock before this operation because it may take a _long_ time and
333  // we don't want to monopolize the cache while we do it.
335 
336  // Now, we try to make an empty cache file and get an exclusive lock on it.
337  if (create_and_lock(cache_file_name, fd)) {
338  // Woohoo! We got the exclusive lock on the new cache file.
339  BESDEBUG("cache", "AggMemberDatasetDimensionCache::loadDimensionCache() - Created and locked cache file: " << cache_file_name << endl);
340 
341  // Now we open it (again) using the more friendly ostream API.
342  ofstream ostrm(cache_file_name.c_str());
343  if (!ostrm)
344  throw libdap::InternalErr(__FILE__, __LINE__, "Could not open '" + cache_file_name + "' to write cached response.");
345 
346  // Save the dimensions to the cache file.
347  amd->saveDimensionCache(ostrm);
348 
349  // And close the cache file;s ostream.
350  ostrm.close();
351 
352  // Change the exclusive lock on the new file to a shared lock. This keeps
353  // other processes from purging the new file and ensures that the reading
354  // process can use it.
356 
357  // Now update the total cache size info and purge if needed. The new file's
358  // name is passed into the purge method because this process cannot detect its
359  // own lock on the file.
360  unsigned long long size = update_cache_info(cache_file_name);
361  if (cache_too_big(size))
362  update_and_purge(cache_file_name);
363  }
364  // get_read_lock() returns immediately if the file does not exist,
365  // but blocks waiting to get a shared lock if the file does exist.
366  else if (get_read_lock(cache_file_name, fd)) {
367  // If we got here then someone else rebuilt the cache file before we could do it.
368  // That's OK, and since we already built the DDS we have all of the cache info in memory
369  // from directly accessing the source dataset(s), so we need to do nothing more,
370  // Except send a debug statement so we can see that this happened.
371  BESDEBUG("cache", "AggMemberDatasetDimensionCache::loadDimensionCache() - Couldn't create and lock cache file, But I got a read lock. "
372  "Cache file may have been rebuilt by another process. "
373  "Cache file: " << cache_file_name << endl);
374  }
375  else {
376  throw libdap::InternalErr(__FILE__, __LINE__, "AggMemberDatasetDimensionCache::loadDimensionCache() - Cache error during function invocation.");
377  }
378  }
379 
380  BESDEBUG("cache", "AggMemberDatasetDimensionCache::loadDimensionCache() - unlocking and closing cache file "<< cache_file_name << endl );
381  unlock_and_close(cache_file_name);
382  }
383  catch (...) {
384  BESDEBUG("cache", "AggMemberDatasetDimensionCache::loadDimensionCache() - caught exception, unlocking cache and re-throw." << endl );
385  unlock_cache();
386  throw;
387  }
388 
389  BESDEBUG("cache", "AggMemberDatasetDimensionCache::loadDimensionCache() - END (local_id=`"<< local_id << "')" << endl );
390 
391 }
392 
393 
394 
395 
396 
397 
398 } /* namespace agg_util */
void initialize(const std::string &cache_dir, const std::string &prefix, unsigned long long size)
Initialize an instance of FileLockingCache.
virtual void unlock_and_close(const std::string &target)
virtual unsigned long long update_cache_info(const std::string &target)
Update the cache info file to include 'target'.
virtual bool create_and_lock(const std::string &target, int &fd)
Create a file in the cache and lock it for write access.
virtual void exclusive_to_shared_lock(int fd)
Transfer from an exclusive lock to a shared lock.
virtual bool get_read_lock(const std::string &target, int &fd)
Get a read-only lock on the file if it exists.
virtual void purge_file(const std::string &file)
Purge a single file from the cache.
virtual bool cache_too_big(unsigned long long current_size) const
look at the cache size; is it too large? Look at the cache size and see if it is too big.
virtual void update_and_purge(const std::string &new_file)
Purge files from the cache.
virtual std::string get_cache_file_name(const std::string &src, bool mangle=true)
exception thrown if internal error encountered
static std::string lowercase(const std::string &s)
Definition: BESUtil.cc:206
static std::string assemblePath(const std::string &firstPart, const std::string &secondPart, bool leadingSlash=false, bool trailingSlash=false)
Assemble path fragments making sure that they are separated by a single '/' character.
Definition: BESUtil.cc:840
void get_value(const std::string &s, std::string &val, bool &found)
Retrieve the value of a given key, if set.
Definition: TheBESKeys.cc:340
static TheBESKeys * TheKeys()
Definition: TheBESKeys.cc:71
static AggMemberDatasetDimensionCache * get_instance()
virtual void loadDimensionCache(std::istream &istr)=0
const std::string & getLocation() const
virtual void saveDimensionCache(std::ostream &ostr)=0
virtual void fillDimensionCacheByUsingDDS()=0
Helper class for temporarily hijacking an existing dhi to load a DDX response for one particular file...