bes  Updated for version 3.20.10
history_utils.cc
1 // -*- mode: c++; c-basic-offset:4 -*-
2 
3 // This file is part of the Hyrax data server.
4 
5 // Copyright (c) 2021 OPeNDAP, Inc.
6 // Author: Nathan Potter <ndp@opendap.org>
7 //
8 // This library is free software; you can redistribute it and/or
9 // modify it under the terms of the GNU Lesser General Public
10 // License as published by the Free Software Foundation; either
11 // version 2.1 of the License, or (at your option) any later version.
12 //
13 // This library is distributed in the hope that it will be useful,
14 // but WITHOUT ANY WARRANTY; without even the implied warranty of
15 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 // Lesser General Public License for more details.
17 //
18 // You should have received a copy of the GNU Lesser General Public
19 // License along with this library; if not, write to the Free Software
20 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
21 //
22 // You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112.
23 
24 #include "config.h"
25 
26 #include <stdio.h>
27 #include <stdlib.h>
28 #include <string.h>
29 
30 #ifdef HAVE_UNISTD_H
31 #include <unistd.h>
32 #endif
33 
34 #include <sys/stat.h>
35 
36 #include <fstream>
37 #include <sstream> // std::stringstream
38 #include <thread>
39 #include <future>
40 
41 // rapidjson
42 #include <stringbuffer.h>
43 #include <writer.h>
44 #include "document.h"
45 
46 #include <libdap/D4Group.h>
47 #include <libdap/D4Attributes.h>
48 #include <libdap/DataDDS.h>
49 
50 #include "BESContextManager.h"
51 #include "BESDapResponseBuilder.h"
52 #include "DapFunctionUtils.h"
53 #include "BESDebug.h"
54 #include "BESUtil.h"
55 #include "TempFile.h"
56 
57 #include "FONcBaseType.h"
58 #include "FONcTransmitter.h"
59 #include "FONcTransform.h"
60 
61 using namespace std;
62 using namespace rapidjson;
63 
64 #define NEW_LINE ((char)0x0a)
65 #define CF_HISTORY_KEY "history"
66 #define CF_HISTORY_CONTEXT "cf_history_entry"
67 #define HISTORY_JSON_KEY "history_json"
68 #define HISTORY_JSON_CONTEXT "history_json_entry"
69 
70 #define MODULE "fonc"
71 #define prolog string("history_utils::").append(__func__).append("() - ")
72 
73 #if 0
74 void appendHistoryJson(vector<string> *global_attr, vector<string> jsonNew)
75 {
76 
77  const char *oldJson = global_attr->at(0).c_str();
78  const char *newJson = jsonNew.at(0).c_str();
79  Document docNew, docOld;
80  Document::AllocatorType &allocator = docOld.GetAllocator();
81  docNew.SetArray();
82  docNew.Parse(newJson);
83  docOld.SetArray();
84  docOld.Parse(oldJson);
85  docNew.PushBack(docOld, allocator);
86 
87  // Stringify JSON
88  StringBuffer buffer;
89  Writer<StringBuffer> writer(buffer);
90  docNew.Accept(writer);
91  global_attr->clear();
92  global_attr->push_back(buffer.GetString());
93 }
94 #endif
95 
96 
97 
105 string create_cf_history_txt(const string &request_url)
106 {
107  // This code will be used only when the 'cf_history_context' is not set,
108  // which should be never in an operating server. However, when we are
109  // testing, often only the besstandalone code is running and the existing
110  // baselines don't set the context, so we have this. It must do something
111  // so the tests are not hopelessly obscure and filter out junk that varies
112  // by host (e.g., the names of cached files that have been decompressed).
113  // jhrg 6/3/16
114 
115  string cf_history_entry;
116  std::stringstream ss;
117  time_t raw_now;
118  struct tm *timeinfo;
119  time(&raw_now); /* get current time; same as: timer = time(NULL) */
120  timeinfo = localtime(&raw_now);
121 
122  char time_str[100];
123  strftime(time_str, 100, "%Y-%m-%d %H:%M:%S", timeinfo);
124 
125  ss << time_str << " " << "Hyrax" << " " << request_url << '\n';
126  cf_history_entry = ss.str();
127  BESDEBUG(MODULE, prolog << "New cf history entry: '" << cf_history_entry << "'" << endl);
128  return cf_history_entry;
129 }
130 
138 template <typename Writer>
139 void create_json_history_obj(const string &request_url, Writer& writer)
140 {
141  // This code will be used only when the 'history_json_context' is not set,
142  // which should be never in an operating server. However, when we are
143  // testing, often only the besstandalone code is running and the existing
144  // baselines don't set the context, so we have this. It must do something
145  // so the tests are not hopelessly obscure and filter out junk that varies
146  // by host (e.g., the names of cached files that have been decompressed).
147  // jhrg 6/3/16
148  // sk 6/17/21
149 
150  // "$schema"
151  string schema = "https://harmony.earthdata.nasa.gov/schemas/history/0.1.0/history-0.1.0.json";
152  // "date_time"
153  time_t raw_now;
154  struct tm *timeinfo;
155  time(&raw_now); /* get current time; same as: timer = time(NULL) */
156  timeinfo = localtime(&raw_now);
157  char time_str[100];
158  strftime(time_str, 100, "%Y-%m-%dT%H:%M:%S", timeinfo);
159 
160  writer.StartObject();
161  writer.Key("$schema");
162  writer.String(schema.c_str());
163  writer.Key("date_time");
164  writer.String(time_str);
165  writer.Key("program");
166  writer.String("hyrax");
167  writer.Key("version");
168  writer.String("1.16.3");
169  writer.Key("parameters");
170  writer.StartArray();
171  writer.StartObject();
172  writer.Key("request_url");
173  writer.String(request_url.c_str());
174  writer.EndObject();
175  writer.EndArray();
176  writer.EndObject();
177 }
178 
184 string get_cf_history_entry (const string &request_url)
185 {
186  bool foundIt = false;
187  string cf_history_entry = BESContextManager::TheManager()->get_context(CF_HISTORY_CONTEXT, foundIt);
188  if (!foundIt) {
189  // If the cf_history_entry context was not set by the incoming command then
190  // we compute and the value of the history string here.
191  cf_history_entry = create_cf_history_txt(request_url);
192  }
193  return cf_history_entry;
194 }
195 
196 #if 0
202 vector<string> get_history_json_entry (const string &request_url)
203 {
204  vector<string> history_json_entry_vec;
205  bool foundIt = false;
206  string history_json_entry = BESContextManager::TheManager()->get_context("history_json_entry", foundIt);
207 
208  if (!foundIt) {
209  // If the history_json_entry context was not set by the incoming command then
210  // we compute and the value of the history string here.
211  Document history_json_doc;
212  history_json_doc.SetObject();
213  StringBuffer buffer;
214  Writer<StringBuffer> writer(buffer);
215  create_json_history_obj(request_url, writer);
216  history_json_entry = buffer.GetString();
217  }
218 
219  BESDEBUG(MODULE,prolog << "Using history_json_entry: " << history_json_entry << endl);
220  // And here we add to the returned vector.
221  history_json_entry_vec.push_back(history_json_entry);
222  return history_json_entry_vec;
223 }
224 #endif
225 
226 
227 
233 string get_history_json_entry (const string &request_url)
234 {
235  bool foundIt = false;
236  string history_json_entry = BESContextManager::TheManager()->get_context(HISTORY_JSON_CONTEXT, foundIt);
237  if (!foundIt) {
238  // If the history_json_entry context was not set as a context key on BESContextManager
239  // we compute and the value of the history string here.
240  Document history_json_doc;
241  history_json_doc.SetObject();
242  StringBuffer buffer;
243  Writer<StringBuffer> writer(buffer);
244  create_json_history_obj(request_url, writer);
245  history_json_entry = buffer.GetString();
246  }
247 
248  BESDEBUG(MODULE,prolog << "Using history_json_entry: " << history_json_entry << endl);
249  return history_json_entry;
250 }
251 
260 string json_append_entry_to_array(const string& source_array_str, const string& new_entry_str)
261 {
262  Document target_array;
263  target_array.SetArray();
264  Document::AllocatorType &allocator = target_array.GetAllocator();
265  target_array.Parse(source_array_str.c_str()); // Parse json array
266 
267  Document entry;
268  entry.Parse(new_entry_str.c_str()); // Parse new entry
269 
270  target_array.PushBack(entry, allocator);
271 
272  // Stringify JSON
273  StringBuffer buffer;
274  Writer<StringBuffer> writer(buffer);
275  target_array.Accept(writer);
276  return buffer.GetString();
277 }
278 
284 void update_history_json_attr(D4Attribute *global_attribute, const string &request_url)
285 {
286  BESDEBUG(MODULE,prolog << "Updating history_json entry for global DAP4 attribute: " << global_attribute->name() << endl);
287 
288  string hj_entry_str = get_history_json_entry(request_url);
289  BESDEBUG(MODULE,prolog << "hj_entry_str: " << hj_entry_str << endl);
290 
291  string history_json;
292 
293  D4Attribute *history_json_attr = nullptr;
294  if(global_attribute->type() == D4AttributeType::attr_container_c){
295  history_json_attr = global_attribute->attributes()->find(HISTORY_JSON_KEY);
296  }
297  else if( global_attribute->name() == HISTORY_JSON_KEY){
298  history_json_attr = global_attribute;
299  }
300 
301  if (!history_json_attr) {
302  // If there is no source history_json attribute then we make one from scratch
303  // and add it to the global_attribute
304  BESDEBUG(MODULE, prolog << "Adding history_json entry to global_attribute " << global_attribute->name() << endl);
305  history_json_attr = new D4Attribute(HISTORY_JSON_KEY, attr_str_c);
306  global_attribute->attributes()->add_attribute_nocopy(history_json_attr);
307 
308  // Promote the entry to an json array, assigning it the value of the attribute
309  history_json = "[" + hj_entry_str +"]";
310  BESDEBUG(MODULE,prolog << "CREATED history_json: " << history_json << endl);
311 
312  } else {
313  // We found an existing history_jason attribute!
314  // We know the convention is that this should be a single valued DAP attribute
315  // We need to get the existing json document, parse it, insert the entry into
316  // the document using rapidjson, and then serialize it to a new string value that
317  // We will use to overwrite the current value in the existing history_json_attr.
318  history_json = *history_json_attr->value_begin();
319  history_json=R"([{"$schema":"https:\/\/harmony.earthdata.nasa.gov\/schemas\/history\/0.1.0\/history-0.1.0.json","date_time":"2021-06-25T13:28:48.951+0000","program":"hyrax","version":"@HyraxVersion@","parameters":[{"request_url":"http:\/\/localhost:8080\/opendap\/hj\/coads_climatology.nc.dap.nc4?GEN1"}]}])";
320  BESDEBUG(MODULE,prolog << "FOUND history_json: " << history_json << endl);
321 
322  // Append the entry to the exisiting history_json array
323  history_json = json_append_entry_to_array(history_json, hj_entry_str);
324  BESDEBUG(MODULE,prolog << "NEW history_json: " << history_json << endl);
325 
326  }
327 
328  // Now the we have the update history_json element, serialized to a string, we use it to
329  // the value of the existing D4Attribute history_json_attr
330  vector<string> attr_vals;
331  attr_vals.push_back(history_json);
332  history_json_attr->add_value_vector(attr_vals); // This replaces the value
333 }
334 
341 string append_cf_history_entry(string cf_history, string cf_history_entry){
342 
343  stringstream cf_hist_new;
344  if(!cf_history.empty()){
345  cf_hist_new << cf_history;
346  if(cf_history.back() != NEW_LINE)
347  cf_hist_new << NEW_LINE;
348  }
349  cf_hist_new << cf_history_entry;
350  if(cf_history_entry.back() != NEW_LINE)
351  cf_hist_new << NEW_LINE;
352 
353  BESDEBUG(MODULE, prolog << "Updated cf history: '" << cf_hist_new.str() << "'" << endl);
354  return cf_hist_new.str();
355 }
356 
362 void update_cf_history_attr(D4Attribute *global_attribute, const string &request_url){
363  BESDEBUG(MODULE,prolog << "Updating cf history entry for global DAP4 attribute: " << global_attribute->name() << endl);
364 
365  string cf_hist_entry = get_cf_history_entry(request_url);
366  BESDEBUG(MODULE, prolog << "New cf history entry: " << cf_hist_entry << endl);
367 
368  string cf_history;
369  D4Attribute *history_attr = nullptr;
370  if(global_attribute->type() == D4AttributeType::attr_container_c){
371  history_attr = global_attribute->attributes()->find(CF_HISTORY_KEY);
372  }
373  else if( global_attribute->name() == CF_HISTORY_KEY){
374  history_attr = global_attribute;
375  }
376 
377  if (!history_attr) {
378  //if there is no source cf history attribute make one and add it to the global_attribute.
379  BESDEBUG(MODULE, prolog << "Adding history entry to " << global_attribute->name() << endl);
380  history_attr = new D4Attribute(CF_HISTORY_KEY, attr_str_c);
381  global_attribute->attributes()->add_attribute_nocopy(history_attr);
382  }
383  else {
384  cf_history = history_attr->value(0);
385  }
386  cf_history = append_cf_history_entry(cf_history,cf_hist_entry);
387 
388  std::vector<std::string> cf_hist_vec;
389  cf_hist_vec.push_back(cf_history);
390  history_attr->add_value_vector(cf_hist_vec);
391 }
392 
393 
399 void update_cf_history_attr(AttrTable *global_attr_tbl, const string &request_url) {
400 
401  BESDEBUG(MODULE,prolog << "Updating cf history entry for global DAP2 attribute: " << global_attr_tbl->get_name() << endl);
402 
403  string cf_hist_entry = get_cf_history_entry(request_url);
404  BESDEBUG(MODULE,prolog << "New cf history entry: '" << cf_hist_entry << "'" <<endl);
405 
406  string cf_history = global_attr_tbl->get_attr(CF_HISTORY_KEY); // returns empty string if not found
407  BESDEBUG(MODULE,prolog << "Previous cf history: '" << cf_history << "'" << endl);
408 
409  cf_history = append_cf_history_entry(cf_history,cf_hist_entry);
410  BESDEBUG(MODULE,prolog << "Updated cf history: '" << cf_history << "'" << endl);
411 
412  global_attr_tbl->del_attr(CF_HISTORY_KEY, -1);
413  int attr_count = global_attr_tbl->append_attr(CF_HISTORY_KEY, "string", cf_history);
414  BESDEBUG(MODULE,prolog << "Found " << attr_count << " value(s) for the cf history attribute." << endl);
415 }
416 
422 void update_history_json_attr(AttrTable *global_attr_tbl, const string &request_url) {
423 
424  BESDEBUG(MODULE,prolog << "Updating history_json entry for global DAP2 attribute: " << global_attr_tbl->get_name() << endl);
425 
426  string hj_entry_str = get_history_json_entry(request_url);
427  BESDEBUG(MODULE,prolog << "New history_json entry: " << hj_entry_str << endl);
428 
429  string history_json = global_attr_tbl->get_attr(HISTORY_JSON_KEY);
430  BESDEBUG(MODULE,prolog << "Previous history_json: " << history_json << endl);
431 
432  if (history_json.empty()) {
433  //if there is no source history_json attribute
434  BESDEBUG(MODULE, prolog << "Creating new history_json entry to global attribute: " << global_attr_tbl->get_name() << endl);
435  history_json = "[" + hj_entry_str +"]"; // Hack to make the entry into a json array.
436  } else {
437  history_json = json_append_entry_to_array(history_json,hj_entry_str);
438  global_attr_tbl->del_attr(HISTORY_JSON_KEY, -1);
439  }
440  BESDEBUG(MODULE,prolog << "New history_json: " << history_json << endl);
441  int attr_count = global_attr_tbl->append_attr(HISTORY_JSON_KEY, "string", history_json);
442  BESDEBUG(MODULE,prolog << "Found " << attr_count << " value(s) for the history_json attribute." << endl);
443 
444 }
445 
446 
453 void updateHistoryAttributes(DDS *dds, const string &ce)
454 {
455  string request_url = dds->filename();
456  // remove path info
457  request_url = request_url.substr(request_url.find_last_of('/')+1);
458  // remove 'uncompress' cache mangling
459  request_url = request_url.substr(request_url.find_last_of('#')+1);
460  if(!ce.empty()) request_url += "?" + ce;
461 
462  // Add the new entry to the "history" attribute
463  // Get the top level Attribute table.
464  AttrTable &globals = dds->get_attr_table();
465 
466  // Since many files support "CF" conventions the history tag may already exist in the source data
467  // and we should add an entry to it if possible.
468  bool added_history = false; // Used to indicate that we located a toplevel AttrTable whose name ends in "_GLOBAL" and that has an existing "history" attribute.
469 // unsigned int num_attrs = globals.get_size();
470  if (globals.is_global_attribute()) {
471  // Here we look for a top level AttrTable whose name ends with "_GLOBAL" which is where, by convention,
472  // data ingest handlers place global level attributes found in the source dataset.
473  auto i = globals.attr_begin();
474  auto e = globals.attr_end();
475  for (; i != e; i++) {
476  AttrType attrType = globals.get_attr_type(i);
477  string attr_name = globals.get_name(i);
478  // Test the entry...
479  if (attrType == Attr_container && BESUtil::endsWith(attr_name, "_GLOBAL")) {
480  // We are going to append to an existing history attribute if there is one
481  // Or just add a history attribute if there is not one. In a most
482  // handy API moment, append_attr() does just this.
483 
484  AttrTable *global_attr_tbl = globals.get_attr_table(i);
485  update_cf_history_attr(global_attr_tbl,request_url);
486  update_history_json_attr(global_attr_tbl,request_url);
487  added_history = true;
488  BESDEBUG(MODULE, prolog << "Added history entries to " << attr_name << endl);
489  }
490  }
491  if(!added_history){
492  auto dap_global_at = globals.append_container("DAP_GLOBAL");
493  dap_global_at->set_name("DAP_GLOBAL");
494  dap_global_at->set_is_global_attribute(true);
495 
496  update_cf_history_attr(dap_global_at,request_url);
497  update_history_json_attr(dap_global_at,request_url);
498  BESDEBUG(MODULE, prolog << "No top level AttributeTable name matched '*_GLOBAL'. "
499  "Created DAP_GLOBAL AttributeTable and added history attributes to it." << endl);
500  }
501  }
502 }
503 
504 
511 void updateHistoryAttributes(DMR *dmr, const string &ce)
512 {
513  string request_url = dmr->filename();
514  // remove path info
515  request_url = request_url.substr(request_url.find_last_of('/')+1);
516  // remove 'uncompress' cache mangling
517  request_url = request_url.substr(request_url.find_last_of('#')+1);
518  if(!ce.empty()) request_url += "?" + ce;
519 
520  bool added_cf_history = false;
521  bool added_json_history = false;
522  D4Group* root_grp = dmr->root();
523  D4Attributes *root_attrs = root_grp->attributes();
524  for (auto attrs = root_attrs->attribute_begin(); attrs != root_attrs->attribute_end(); ++attrs) {
525  string name = (*attrs)->name();
526  BESDEBUG(MODULE, prolog << "Attribute name is "<< name << endl);
527  if ((*attrs)->type() == D4AttributeType::attr_container_c && BESUtil::endsWith(name, "_GLOBAL")) {
528  // Update Climate Forecast history attribute.
529  update_cf_history_attr(*attrs, request_url);
530  added_cf_history = true;
531 
532  // Update NASA's history_json attribute
533  update_history_json_attr(*attrs, request_url);
534  added_json_history = true;
535  }
536  else if(name == CF_HISTORY_KEY){ // A top level cf history attribute
537  update_cf_history_attr(*attrs, request_url);
538  added_cf_history = true;
539  }
540  else if( name == HISTORY_JSON_KEY){ // A top level history_json attribute
541  update_cf_history_attr(*attrs, request_url);
542  added_json_history = true;
543  }
544  }
545  if(!added_cf_history || !added_json_history){
546  auto *dap_global = new D4Attribute("DAP_GLOBAL",attr_container_c);
547  root_attrs->add_attribute_nocopy(dap_global);
548  // CF history attribute
549  if(!added_cf_history){
550  update_cf_history_attr(dap_global, request_url);
551  }
552  // NASA's history_json attribute
553  if(!added_json_history){
554  update_history_json_attr(dap_global,request_url);
555  }
556  }
557 }
virtual std::string get_context(const std::string &name, bool &found)
retrieve the value of the specified context from the BES
static bool endsWith(std::string const &fullString, std::string const &ending)
Definition: BESUtil.cc:961
A document for parsing JSON text as DOM.
Definition: document.h:2203
Allocator & GetAllocator()
Get the allocator of this document.
Definition: document.h:2491
GenericDocument & Parse(const typename SourceEncoding::Ch *str)
Parse JSON text from a read-only string (with Encoding conversion)
Definition: document.h:2404
Represents an in-memory output stream.
Definition: stringbuffer.h:41
JSON writer.
Definition: writer.h:90
Concept for allocating, resizing and freeing memory block.
main RapidJSON namespace