bes  Updated for version 3.20.10
SuperChunky.cc
1 // -*- mode: c++; c-basic-offset:4 -*-
2 
3 // This file is part of the BES
4 // Copyright (c) 2020 OPeNDAP, Inc.
5 // Author: Nathan Potter<ndp@opendap.org>
6 //
7 // This library is free software; you can redistribute it and/or
8 // modify it under the terms of the GNU Lesser General Public
9 // License as published by the Free Software Foundation; either
10 // version 2.1 of the License, or (at your option) any later version.
11 //
12 // This library is distributed in the hope that it will be useful,
13 // but WITHOUT ANY WARRANTY; without even the implied warranty of
14 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
15 // Lesser General Public License for more details.
16 //
17 // You should have received a copy of the GNU Lesser General Public
18 // License along with this library; if not, write to the Free Software
19 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 //
21 // You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112.
22 //
23 // Created by ndp on 12/2/20.
24 //
25 
26 #include "config.h"
27 
28 #include <vector>
29 #include <string>
30 #include <sstream>
31 #include <iostream>
32 #include <fstream>
33 
34 #include <libdap/BaseType.h>
35 #include <libdap/D4Group.h>
36 
37 #include "BESInternalError.h"
38 #include "BESInternalFatalError.h"
39 #include "BESDebug.h"
40 #include "BESUtil.h"
41 #include "TheBESKeys.h"
42 #include "BESLog.h"
43 #include "BESStopWatch.h"
44 #include "BESIndent.h"
45 
46 #include "DmrppNames.h"
47 #include "DMRpp.h"
48 #include "DmrppD4Group.h"
49 #include "DmrppArray.h"
50 #include "DmrppParserSax2.h"
51 #include "DmrppTypeFactory.h"
52 
53 #include "SuperChunk.h"
54 
55 #define prolog std::string("superchunky::").append(__func__).append("() - ")
56 
57 namespace dmrpp {
58 
59 bool debug = true;
60 
61 void compute_super_chunks(dmrpp::DmrppArray *array, bool /*only_constrained*/, vector<SuperChunk *> &super_chunks){
62 
63  // Now we get the chunkyness
64  auto chunk_dim_sizes = array->get_chunk_dimension_sizes();
65  //unsigned int chunk_size_in_elements = array->get_chunk_size_in_elements();
66  auto const &chunks = array->get_immutable_chunks();
67 
68  unsigned long long sc_count=0;
69  stringstream sc_id;
70  sc_id << array->name() << "-" << sc_count++;
71 
72  //unsigned long long super_chunk_index = 0;
73  auto currentSuperChunk = new SuperChunk(sc_id.str(), array);
74  super_chunks.push_back(currentSuperChunk); // first super chunk...
75  if(debug) cout << "SuperChunking array: "<< array->name() << endl;
76 
77  for(const auto &chunk:chunks){
78  bool was_added = currentSuperChunk->add_chunk(chunk);
79  if(!was_added){
80  if(debug) {
81  unsigned long long next_contiguous_chunk_offset = currentSuperChunk->get_offset() + currentSuperChunk->get_size();
82  unsigned long long gap_size;
83  bool is_behind = false;
84  if(chunk->get_offset() > next_contiguous_chunk_offset){
85  gap_size = chunk->get_offset() - next_contiguous_chunk_offset;
86  }
87  else {
88  is_behind = true;
89  gap_size = next_contiguous_chunk_offset - chunk->get_offset();
90  }
91  stringstream msg;
92  msg << "FOUND GAP chunk(offset: " << chunk->get_offset();
93  msg << " size: " << chunk->get_size() << ")";
94  msg << " SuperChunk(ptr: " << (void *) currentSuperChunk;
95  msg << " offset: " << currentSuperChunk->get_offset();
96  msg << " size: " << currentSuperChunk->get_size();
97  msg << " next_contiguous_chunk_offset: " << next_contiguous_chunk_offset << ") ";
98  msg << " gap_size: " << gap_size;
99  msg << " bytes" << (is_behind?" behind":" beyond") << " target offset";
100  msg << endl;
101  cerr << msg.str();
102  }
103  // If we were working on a SuperChunk (i.e. the current SuperChunk contains chunks)
104  // then we need to start a new one.
105  if(!currentSuperChunk->empty()){
106  sc_id.str(std::string());
107  sc_id << array->name() << "-" << sc_count++;
108  currentSuperChunk = new SuperChunk(sc_id.str(), array);
109  super_chunks.push_back(currentSuperChunk); // next super chunk...
110  }
111  bool add_first_successful = currentSuperChunk->add_chunk(chunk);
112  if(!add_first_successful)
113  throw BESInternalError("ERROR: Failed to add first Chunk to a new SuperChunk."+
114  chunk->to_string() ,__FILE__,__LINE__);
115 
116  }
117  }
118  // Dump the currentSuperChunk if it doesn't have anything in it.
119  if(currentSuperChunk->empty()) {
120  super_chunks.pop_back();
121  delete currentSuperChunk;
122  }
123  if(false){
124  cout << "SuperChunk Inventory For Array: " << array->name() << endl;
125  for(auto super_chunk: super_chunks) {
126  cout << super_chunk->to_string(true) << endl;
127  }
128  }
129 }
130 
131 void compute_super_chunks(libdap::BaseType *var, bool only_constrained, vector<SuperChunk *> &super_chunks) {
132  if (var->is_simple_type())
133  return;
134  if (var->is_constructor_type())
135  return;
136  if (var->is_vector_type()) {
137  auto array = dynamic_cast<dmrpp::DmrppArray *>(var);
138  if (array) {
139  if(debug) cout << "Found DmrppArray: "<< array->name() << endl;
140  compute_super_chunks(array, only_constrained, super_chunks);
141  }
142  else {
143  BESDEBUG(MODULE, prolog << "The variable: "<< var->name()
144  << " is not an instance of DmrppArray. SKIPPING"<< endl);
145  }
146  }
147 }
148 
149 #if 0
150 void inventory_super_chunks(libdap::BaseType *var, bool only_constrained, vector<SuperChunk *> &super_chunks){
151  if(var->is_simple_type())
152  return;
153  if(var->is_constructor_type())
154  return;
155  if(var->is_vector_type()){
156  auto array = dynamic_cast<DmrppArray*>(var);
157  if(array){
158  // Now we get the chunkyness
159  auto chunk_dim_sizes = array->get_chunk_dimension_sizes();
160  //unsigned int chunk_size_in_elements = array->get_chunk_size_in_elements();
161  auto chunks = array->get_immutable_chunks();
162  unsigned long long next_contiguous_chunk_offset = 0;
163 
164  //unsigned long long super_chunk_index = 0;
165  vector<vector<const Chunk *> *> super_chunks;
166  auto currentSuperChunk = new vector<const Chunk *>();
167  super_chunks.push_back(currentSuperChunk); // first super chunk...
168 
169  if(debug) cout << "SuperChunking array: "<< array->name() << endl;
170 
171  bool first = true;
172  for(auto chunk:chunks){
173  auto current_offset = chunk.get_offset();
174  auto current_size = chunk.get_size();
175  // auto c_pia = chunk.get_position_in_array();
176 
177  if(!first){
178  if(current_offset!=next_contiguous_chunk_offset){
179  // The current chunk is not contiguous with the previous
180  unsigned long long gap_size = current_offset - next_contiguous_chunk_offset;
181  if(debug) {
182  cout << "FOUND GAP current_offset: " << current_offset <<
183  " nbytes: " << current_offset <<
184  " next_contiguous_chunk_offset: " << next_contiguous_chunk_offset <<
185  " gap_size: " << gap_size <<
186  " currentSuperChunk.size(): " << currentSuperChunk->size() << endl;
187  }
188  // If we were working on a SuperChunk (i.e. the current SuperChunk contains chunks)
189  // then we need to start a new one.
190  if(!currentSuperChunk->empty()){
191  currentSuperChunk = new vector<const Chunk *>();
192  super_chunks.push_back(currentSuperChunk); // next super chunk...
193  }
194  }
195  }
196  currentSuperChunk->push_back(&chunk);
197  next_contiguous_chunk_offset = current_offset + current_size;
198  first = false;
199  }
200  // Dump the currentSuperChunk if it doesn't have anything in it.
201  if(currentSuperChunk->empty()) {
202  super_chunks.pop_back();
203  delete currentSuperChunk;
204  }
205  cout << "SuperChunk Inventory For Array: " << array->name() << endl;
206  unsigned long long sc_count=0;
207  for(auto super_chunk: super_chunks) {
208  cout << " SuperChunk[" << sc_count++ << "] contains : " << super_chunk->size() << " chunks."
209  << endl;
210  if (debug) {
211  for (auto chunk:*super_chunk) {
212  cout << " " << chunk->to_string() << endl;
213  }
214  }
215  }
216 
217  }
218  else {
219  cerr << prolog << " ERROR! The variable: "<< var->name()
220  << " is not an instance of DmrppArray. SKIPPING"<< endl;
221  }
222 
223  }
224  }
225 #endif
226 
227  void inventory_super_chunks(libdap::D4Group *group, bool only_constrained, vector<SuperChunk *> &super_chunks){
228 
229  // Process Groups - RECURSION HAPPENS HERE.
230  auto gtr = group->grp_begin();
231  while(gtr!=group->grp_end()){
232  if(debug) cout << "Found Group: "<< (*gtr)->name() << endl;
233  inventory_super_chunks(*gtr++, only_constrained, super_chunks);
234  }
235 
236  // Process Vars
237  auto vtr = group->var_begin();
238  while(vtr!=group->var_end()){
239  if(debug) cout << "Found Variable: "<< (*vtr)->type_name() << " " << (*vtr)->name() << endl;
240  compute_super_chunks(*vtr++, only_constrained, super_chunks);
241  //inventory_super_chunks(*vtr++, only_constrained);
242  }
243  }
244 
245  void inventory_super_chunks(DMRpp &dmr, bool only_constrained, vector<SuperChunk *> &super_chunks){
246  inventory_super_chunks(dmr.root(), only_constrained, super_chunks);
247  }
248 
249  dmrpp::DMRpp *get_dmrpp(const string dmrpp_filename){
250  ifstream dmrpp_ifs (dmrpp_filename);
251  if (dmrpp_ifs.is_open())
252  {
253  dmrpp::DmrppParserSax2 parser;
254  dmrpp::DmrppTypeFactory factory;
255  auto dmr = new DMRpp(&factory,dmrpp_filename);
256  parser.intern(dmrpp_ifs, dmr);
257  return dmr;
258  }
259  else {
260  throw BESInternalFatalError("The provided file could not be opened. filename: '"+dmrpp_filename+"'",__FILE__,__LINE__);
261  }
262  }
263 
264  void inventory_super_chunks(const string dmrpp_filename){
265  cout << "DMR++ file: " << dmrpp_filename << endl;
266  dmrpp::DMRpp *dmr = get_dmrpp(dmrpp_filename);
267 
268  vector<SuperChunk *> super_chunks;
269 
270  {
271  BESStopWatch sw;
272  sw.start(prolog);
273  dmrpp::inventory_super_chunks(*dmr, false, super_chunks);
274  }
275 
276  cout << "DMR++ file: " << dmrpp_filename << endl;
277  cout << "Produced " << super_chunks.size() << " SuperChunks." << endl;
278  for(auto super_chunk: super_chunks) {
279  cout << super_chunk->to_string(true) << endl;
280  }
281 
282  delete dmr;
283  }
284 
285  void dump_vars(libdap::D4Group *group){
286  // Process Groups - RECURSION HAPPENS HERE.
287  auto gtr = group->grp_begin();
288  while(gtr!=group->grp_end()){
289  if(debug) cout << "Found Group: "<< (*gtr)->name() << endl;
290  dump_vars(*gtr++);
291  }
292 
293  // Process Vars
294  auto vtr = group->var_begin();
295  while(vtr!=group->var_end()){
296  libdap::BaseType *bt = *vtr++;
297  bt->dump(cout);
298  cout << endl;
299  }
300  }
301 
302  void dump_vars(DMRpp &dmr){
303  dump_vars(dmr.root());
304  }
305 } // namespace dmrpp
306 
307 int main(int argc, char *argv[]) {
308  string bes_log_file("superchunky_bes.log");
309  //string bes_debug_log_file("cerr");
310  //string bes_debug_keys( "bes,http,curl,dmrpp,dmrpp:3,dmrpp:4,rr");
311  //string http_cache_dir;
312  string prefix;
313  //string http_netrc_file;
314  string cache_effective_urls("false");
315  char *prefixCstr = getenv("prefix");
316  if (prefixCstr) {
317  prefix = prefixCstr;
318  } else {
319  prefix = "/";
320  }
321 
322  cout << "bes_log_file: " << bes_log_file << endl;
323 
324  auto bes_config_file = BESUtil::assemblePath(prefix, "/etc/bes/bes.conf", true);
325  TheBESKeys::ConfigFile = bes_config_file; // Set the config file for TheBESKeys
326  TheBESKeys::TheKeys()->set_key("BES.LogName", bes_log_file); // Set the log file so it goes where we say.
327  TheBESKeys::TheKeys()->set_key("AllowedHosts", "^https?:\\/\\/.*$", false); // Set AllowedHosts to allow any URL
328  TheBESKeys::TheKeys()->set_key("AllowedHosts", "^file:\\/\\/\\/.*$", true); // Set AllowedHosts to allow any file
329  TheBESKeys::TheKeys()->set_key("Http.cache.effective.urls", cache_effective_urls, false); // Set AllowedHosts to allow any file
330 
331 
332  // if (bes_debug) BESDebug::SetUp(bes_debug_log_file + "," + bes_debug_keys); // Enable BESDebug settings
333 
334 
335  BESIndent::SetIndent("");
336 
337  for(auto i=1; i<argc; i++){
338  string dmrpp_filename(argv[i]);
339  //dmrpp::inventory_super_chunks(dmrpp_filename);
340 
341  dmrpp::DMRpp *dmrpp = dmrpp::get_dmrpp( dmrpp_filename);
342  dump_vars(*dmrpp);
343  }
344  return 0;
345 }
346 
347 
348 
exception thrown if internal error encountered
exception thrown if an internal error is found and is fatal to the BES
virtual bool start(std::string name)
Definition: BESStopWatch.cc:67
static std::string assemblePath(const std::string &firstPart, const std::string &secondPart, bool leadingSlash=false, bool trailingSlash=false)
Assemble path fragments making sure that they are separated by a single '/' character.
Definition: BESUtil.cc:840
static TheBESKeys * TheKeys()
Definition: TheBESKeys.cc:71
void set_key(const std::string &key, const std::string &val, bool addto=false)
allows the user to set key/value pairs from within the application.
Definition: TheBESKeys.cc:206
static std::string ConfigFile
Definition: TheBESKeys.h:185
Provide a way to print the DMR++ response.
Definition: DMRpp.h:44
Extend libdap::Array so that a handler can read data using a DMR++ file.
Definition: DmrppArray.h:68
virtual unsigned long long get_size(bool constrained=false)
Return the total number of elements in this Array.
Definition: DmrppArray.cc:580
virtual unsigned long add_chunk(std::shared_ptr< http::url > d_data_url, const std::string &byte_order, unsigned long long size, unsigned long long offset, const std::string &position_in_array)
Add a new chunk as defined by an h4:byteStream element.
Definition: DmrppCommon.cc:204
virtual const std::vector< unsigned long long > & get_chunk_dimension_sizes() const
The chunk dimension sizes held in a const vector.
Definition: DmrppCommon.h:179
virtual const std::vector< std::shared_ptr< Chunk > > & get_immutable_chunks() const
A const reference to the vector of chunks.
Definition: DmrppCommon.h:169
void intern(std::istream &f, libdap::DMR *dest_dmr)