bes  Updated for version 3.20.10
SaxParserWrapper.cc
1 // This file is part of the "NcML Module" project, a BES module designed
3 // to allow NcML files to be used to be used as a wrapper to add
4 // AIS to existing datasets of any format.
5 //
6 // Copyright (c) 2009 OPeNDAP, Inc.
7 // Author: Michael Johnson <m.johnson@opendap.org>
8 //
9 // For more information, please also see the main website: http://opendap.org/
10 //
11 // This library is free software; you can redistribute it and/or
12 // modify it under the terms of the GNU Lesser General Public
13 // License as published by the Free Software Foundation; either
14 // version 2.1 of the License, or (at your option) any later version.
15 //
16 // This library is distributed in the hope that it will be useful,
17 // but WITHOUT ANY WARRANTY; without even the implied warranty of
18 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
19 // Lesser General Public License for more details.
20 //
21 // You should have received a copy of the GNU Lesser General Public
22 // License along with this library; if not, write to the Free Software
23 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
24 //
25 // Please see the files COPYING and COPYRIGHT for more information on the GLPL.
26 //
27 // You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112.
29 
30 #include "config.h"
31 
32 #include "SaxParserWrapper.h"
33 
34 #include <exception>
35 #include <iostream>
36 #include <libxml/parser.h>
37 #include <libxml/xmlstring.h>
38 #include <cstdio> // for vsnprintf
39 #include <string>
40 
41 #include "BESDebug.h"
42 #include "BESError.h"
43 #include "BESInternalError.h"
44 #include "BESInternalFatalError.h"
45 #include "BESSyntaxUserError.h"
46 #include "BESForbiddenError.h"
47 #include "BESNotFoundError.h"
48 #include "NCMLDebug.h"
49 #include "SaxParser.h"
50 #include "XMLHelpers.h"
51 
52 // Toggle to tell the parser to use the Sax2 start/end element
53 // calls with namespace information.
54 // [ TODO We probably want to remove the non-namespace pathways at some point,
55 // but I will leave them here for now in case there's issues ]
56 #define NCML_PARSER_USE_SAX2_NAMESPACES 1
57 
58 using namespace std;
59 using namespace ncml_module;
60 
62 // Helpers
63 
64 #if NCML_PARSER_USE_SAX2_NAMESPACES
65 static const int SAX2_NAMESPACE_ATTRIBUTE_ARRAY_STRIDE = 5;
66 static int toXMLAttributeMapWithNamespaces(XMLAttributeMap& attrMap, const xmlChar** attributes, int num_attributes)
67 {
68  attrMap.clear();
69  for (int i = 0; i < num_attributes; ++i) {
70  XMLAttribute attr;
71  attr.fromSAX2NamespaceAttributes(attributes);
72  attributes += SAX2_NAMESPACE_ATTRIBUTE_ARRAY_STRIDE; // jump to start of next record
73  attrMap.addAttribute(attr);
74  }
75  return num_attributes;
76 }
77 #else
78 // Assumes the non-namespace calls, so attrs is stride 2 {name,value}
79 static int toXMLAttributeMapNoNamespaces(XMLAttributeMap& attrMap, const xmlChar** attrs)
80 {
81  attrMap.clear();
82  int count=0;
83  while (attrs && *attrs != NULL)
84  {
85  XMLAttribute attr;
86  attr.localname = XMLUtil::xmlCharToString(*attrs);
87  attr.value = XMLUtil::xmlCharToString(*(attrs+1));
88  attrMap.addAttribute(attr);
89  attrs += 2;
90  count++;
91  }
92  return count;
93 }
94 #endif // NCML_PARSER_USE_SAX2_NAMESPACES
95 
97 // Callback we will register that just pass on to our C++ engine
98 //
99 // NOTE WELL: New C handlers need to follow the given
100 // other examples in order to avoid memory leaks
101 // in libxml during an exception!
102 
103 // To avoid cut & paste below, we use this macro to cast the void* into the wrapper and
104 // set up a proper error handling structure around the main call.
105 // The macro internally defines the symbol "parser" to the SaxParser contained in the wrapper.
106 // So for example, a safe handler call to SaxParser would look like:
107 // static void ncmlStartDocument(void* userData)
108 //{
109 // BEGIN_SAFE_HANDLER_CALL(userData); // pass in the void*, which is a SaxParserWrapper*
110 // parser.onStartDocument(); // call the dispatch on the wrapped parser using the autodefined name parser
111 // END_SAFE_HANDLER_CALL; // end the error handling wrapper
112 //}
113 
114 #define BEGIN_SAFE_PARSER_BLOCK(argName) { \
115  SaxParserWrapper* _spw_ = static_cast<SaxParserWrapper*>(argName); \
116  if (_spw_->isExceptionState()) \
117  { \
118  return; \
119  } \
120  else \
121  { \
122  try \
123  { \
124  SaxParser& parser = _spw_->getParser(); \
125  parser.setParseLineNumber(_spw_->getCurrentParseLine());
126 
127 // This is required after the end of the actual calls to the parser.
128 #define END_SAFE_PARSER_BLOCK } \
129  catch (BESError& theErr) \
130  { \
131  BESDEBUG("ncml", "Caught BESError&, deferring..." << endl); \
132  BESInternalError _badness_("ParseError: " + theErr.get_message() , theErr.get_file(), theErr.get_line());\
133  _spw_->deferException(_badness_); \
134  } \
135  catch (std::exception& ex) \
136  { \
137  BESDEBUG("ncml", "Caught std::exception&, wrapping and deferring..." << endl); \
138  BESInternalError _badness_("ParseError: " + string(ex.what()), __FILE__, __LINE__);\
139  _spw_->deferException(_badness_); \
140  } \
141  catch (...) \
142  { \
143  BESDEBUG("ncml", "Caught unknown (...) exception: deferring default error." << endl); \
144  BESInternalError _badness_("SaxParserWrapper:: Unknown Exception Type: ", __FILE__, __LINE__); \
145  _spw_->deferException(_badness_); \
146  } \
147  } \
148 }
149 
151 // Our C SAX callbacks, wrapped carefully.
152 
153 static void ncmlStartDocument(void* userData)
154 {
155  BEGIN_SAFE_PARSER_BLOCK(userData)
156 
157  parser.onStartDocument();
158 
159  END_SAFE_PARSER_BLOCK
160 }
161 
162 static void ncmlEndDocument(void* userData)
163 {
164  BEGIN_SAFE_PARSER_BLOCK(userData)
165 
166  parser.onEndDocument();
167 
168  END_SAFE_PARSER_BLOCK
169 }
170 
171 #if !NCML_PARSER_USE_SAX2_NAMESPACES
172 
173 static void ncmlStartElement(void * userData,
174  const xmlChar * name,
175  const xmlChar ** attrs)
176 {
177  // BESDEBUG("ncml", "ncmlStartElement called for:<" << name << ">" << endl);
178  BEGIN_SAFE_PARSER_BLOCK(1)
179 
180  string nameS = XMLUtil::xmlCharToString(name);
181  XMLAttributeMap map;
182  toXMLAttributeMapNoNamespaces(map, attrs);
183 
184  // These args will be valid for the scope of the call.
185  parser.onStartElement(nameS, map);
186 
187  END_SAFE_PARSER_BLOCK
188 }
189 
190 static void ncmlEndElement(void * userData,
191  const xmlChar * name)
192 {
193  BEGIN_SAFE_PARSER_BLOCK(1)
194 
195  string nameS = XMLUtil::xmlCharToString(name);
196  parser.onEndElement(nameS);
197 
198  END_SAFE_PARSER_BLOCK
199 }
200 #endif // !NCML_PARSER_USE_SAX2_NAMESPACES
201 
202 #if NCML_PARSER_USE_SAX2_NAMESPACES
203 static
204 void ncmlSax2StartElementNs(void *userData, const xmlChar *localname, const xmlChar *prefix, const xmlChar *URI,
205  int nb_namespaces, const xmlChar **namespaces, int nb_attributes, int /* nb_defaulted */,
206  const xmlChar **attributes)
207 {
208  // BESDEBUG("ncml", "ncmlStartElement called for:<" << name << ">" << endl);
209  BEGIN_SAFE_PARSER_BLOCK(userData)
210 
211  BESDEBUG("ncml", "SaxParserWrapper::ncmlSax2StartElementNs() - localname:" << localname << endl);
212 
213  XMLAttributeMap attrMap;
214  toXMLAttributeMapWithNamespaces(attrMap, attributes, nb_attributes);
215 
216  XMLNamespaceMap nsMap;
217  nsMap.fromSAX2Namespaces(namespaces, nb_namespaces);
218 
219  // These args will be valid for the scope of the call.
220  string localnameString = XMLUtil::xmlCharToString(localname);
221  string prefixString = XMLUtil::xmlCharToString(prefix);
222  string uriString = XMLUtil::xmlCharToString(URI);
223 
224  parser.onStartElementWithNamespace(
225  localnameString,
226  prefixString,
227  uriString,
228  attrMap,
229  nsMap);
230 
231  END_SAFE_PARSER_BLOCK
232 }
233 
234 static
235 void ncmlSax2EndElementNs(void *userData, const xmlChar *localname, const xmlChar *prefix, const xmlChar *URI)
236 {
237  BEGIN_SAFE_PARSER_BLOCK(userData)
238 
239  string localnameString = XMLUtil::xmlCharToString(localname);
240  string prefixString = XMLUtil::xmlCharToString(prefix);
241  string uriString = XMLUtil::xmlCharToString(URI);
242  parser.onEndElementWithNamespace(localnameString, prefixString, uriString);
243 
244  END_SAFE_PARSER_BLOCK
245 }
246 #endif // NCML_PARSER_USE_SAX2_NAMESPACES
247 
248 static void ncmlCharacters(void* userData, const xmlChar* content, int len)
249 {
250  BEGIN_SAFE_PARSER_BLOCK(userData)
251 
252  // len is since the content string might not be null terminated,
253  // so we have to build out own and pass it up special....
254  // TODO consider just using these xmlChar's upstairs to avoid copies, or make an adapter or something.
255  string characters("");
256  characters.reserve(len);
257  const xmlChar* contentEnd = content+len;
258  while(content != contentEnd)
259  {
260  characters += (const char)(*content++);
261  }
262 
263  parser.onCharacters(characters);
264 
265  END_SAFE_PARSER_BLOCK
266 }
267 
268 static void ncmlWarning(void* userData, const char* msg, ...)
269 {
270  BEGIN_SAFE_PARSER_BLOCK(userData)
271 
272  BESDEBUG("ncml", "SaxParserWrapper::ncmlWarning() - msg:" << msg << endl);
273 
274  char buffer[1024];
275  va_list(args);
276  va_start(args, msg);
277  unsigned int len = sizeof(buffer);
278  vsnprintf(buffer, len, msg, args);
279  va_end(args);
280  parser.onParseWarning(string(buffer));
281 
282  END_SAFE_PARSER_BLOCK
283 }
284 
285 static void ncmlFatalError(void* userData, const char* msg, ...)
286 {
287  BEGIN_SAFE_PARSER_BLOCK(userData)
288 
289  BESDEBUG("ncml", "SaxParserWrapper::ncmlFatalError() - msg:" << msg << endl);
290 
291  char buffer[1024];
292  va_list(args);
293  va_start(args, msg);
294  unsigned int len = sizeof(buffer);
295  vsnprintf(buffer, len, msg, args);
296  va_end(args);
297  parser.onParseError(string(buffer));
298 
299  END_SAFE_PARSER_BLOCK
300 }
301 
303 // class SaxParserWrapper impl
304 
305 SaxParserWrapper::SaxParserWrapper(SaxParser& parser) :
306  _parser(parser), _handler(), _state(NOT_PARSING), _errorMsg(""), _errorType(0), _errorFile(""), _errorLine(-1)
307 {
308 }
309 
310 SaxParserWrapper::~SaxParserWrapper()
311 {
312  // Really not much to do... everything cleans itself up.
313  _state = NOT_PARSING;
314 
315  // Leak fix. jhrg 6/21/19
316  cleanupParser();
317 }
318 
319 bool SaxParserWrapper::parse(const string& ncmlFilename)
320 {
321  // It's illegal to call this until it's done.
322  if (_state == PARSING) {
323  throw BESInternalError("Parse called again while already in parse.", __FILE__, __LINE__);
324  }
325 
326  // OK, now we're parsing
327  _state = PARSING;
328 
329  setupParser();
330 
331  bool success = xmlSAXUserParseFile(&_handler, this, ncmlFilename.c_str());
332 
333  // If we deferred an exception during the libxml parse call, now's the time to rethrow it.
334  if (isExceptionState()) {
336  }
337 
338  // Otherwise, we're also done parsing.
339  _state = NOT_PARSING;
340  return success;
341 }
342 
344 {
345  _state = EXCEPTION;
346  _errorType = theErr.get_bes_error_type();
347  _errorMsg = theErr.get_message();
348  _errorLine = theErr.get_line();
349  _errorFile = theErr.get_file();
350 }
351 
352 // HACK admittedly a little gross, but it's weird to have to copy an exception
353 // and this seemed the safest way rather than making dynamic storage, etc.
355 {
356  // Clear our state out so we can parse again though.
357  _state = NOT_PARSING;
358 
359  switch (_errorType) {
360  case BES_INTERNAL_ERROR:
361  throw BESInternalError(_errorMsg, _errorFile, _errorLine);
362 
363  case BES_INTERNAL_FATAL_ERROR:
364  throw BESInternalFatalError(_errorMsg, _errorFile, _errorLine);
365 
366  case BES_SYNTAX_USER_ERROR:
367  throw BESSyntaxUserError(_errorMsg, _errorFile, _errorLine);
368 
369  case BES_FORBIDDEN_ERROR:
370  throw BESForbiddenError(_errorMsg, _errorFile, _errorLine);
371 
372  case BES_NOT_FOUND_ERROR:
373  throw BESNotFoundError(_errorMsg, _errorFile, _errorLine);
374 
375  default:
376  throw BESInternalError("Unknown exception type.", __FILE__, __LINE__);
377  }
378 }
379 
381 {
382 #if 0
383  if (_context) {
384  return xmlSAX2GetLineNumber(_context);
385  }
386  else {
387  return -1;
388  }
389 #endif
390  return -1; //FIXME part of leak fix. jhrg 6.21.19
391 }
392 
393 static void setAllHandlerCBToNulls(xmlSAXHandler& h)
394 {
395  h.internalSubset = 0;
396  h.isStandalone = 0;
397  h.hasInternalSubset = 0;
398  h.hasExternalSubset = 0;
399  h.resolveEntity = 0;
400  h.getEntity = 0;
401  h.entityDecl = 0;
402  h.notationDecl = 0;
403  h.attributeDecl = 0;
404  h.elementDecl = 0;
405  h.unparsedEntityDecl = 0;
406  h.setDocumentLocator = 0;
407  h.startDocument = 0;
408  h.endDocument = 0;
409  h.startElement = 0;
410  h.endElement = 0;
411  h.reference = 0;
412  h.characters = 0;
413  h.ignorableWhitespace = 0;
414  h.processingInstruction = 0;
415  h.comment = 0;
416  h.warning = 0;
417  h.error = 0;
418  h.fatalError = 0;
419  h.getParameterEntity = 0;
420  h.cdataBlock = 0;
421  h.externalSubset = 0;
422 
423  // unsigned int initialized; magic number the init should fill in
424  /* The following fields are extensions available only on version 2 */
425  // void *_private; //i'd assume i don't set this either...
426  h.startElementNs = 0;
427  h.endElementNs = 0;
428  h.serror = 0;
429 }
430 
431 void SaxParserWrapper::setupParser()
432 {
433  // setup the handler for version 2,
434  // which sets an internal version magic number
435  // into _handler.initialized
436  // but which doesn't clear the handlers to 0.
437  xmlSAXVersion(&_handler, 2);
438 
439  // Initialize all handlers to 0 by hand to start
440  // so we don't blow those internal magic numbers.
441  setAllHandlerCBToNulls(_handler);
442 
443  // Put our static functions into the handler
444  _handler.startDocument = ncmlStartDocument;
445  _handler.endDocument = ncmlEndDocument;
446  _handler.warning = ncmlWarning;
447  _handler.error = ncmlFatalError;
448  _handler.fatalError = ncmlFatalError;
449  _handler.characters = ncmlCharacters;
450 
451  // We'll use one or the other until we're sure it works.
452 #if NCML_PARSER_USE_SAX2_NAMESPACES
453  _handler.startElement = 0;
454  _handler.endElement = 0;
455  _handler.startElementNs = ncmlSax2StartElementNs;
456  _handler.endElementNs = ncmlSax2EndElementNs;
457 #else
458  _handler.startElement = ncmlStartElement;
459  _handler.endElement = ncmlEndElement;
460  _handler.startElementNs = 0;
461  _handler.endElementNs = 0;
462 #endif // NCML_PARSER_USE_SAX2_NAMESPACES
463 }
464 
465 // Leak fix. jhrg 6/21/19
466 void SaxParserWrapper::cleanupParser() throw ()
467 {
468 }
Abstract exception class for the BES with basic string message.
Definition: BESError.h:58
virtual int get_bes_error_type()
Return the return code for this error class.
Definition: BESError.h:143
virtual int get_line()
get the line number where the exception was thrown
Definition: BESError.h:115
virtual std::string get_file()
get the file name where the exception was thrown
Definition: BESError.h:107
virtual std::string get_message()
get the error message for this exception
Definition: BESError.h:99
error thrown if the BES is not allowed to access the resource requested
exception thrown if internal error encountered
exception thrown if an internal error is found and is fatal to the BES
error thrown if the resource requested cannot be found
error thrown if there is a user syntax error in the request or any other user error
bool parse(const std::string &ncmlFilename)
Do a SAX parse of the ncmlFilename and pass the calls to wrapper parser.
void deferException(BESError &theErr)
The remaining calls are for the internals of the parser, but need to be public.
Interface class for the wrapper between libxml C SAX parser and our NCMLParser.
Definition: SaxParser.h:48
void addAttribute(const XMLAttribute &attribute)
Definition: XMLHelpers.cc:167
void fromSAX2Namespaces(const xmlChar **pNamespaces, int numNamespaces)
Definition: XMLHelpers.cc:320
NcML Parser for adding/modifying/removing metadata (attributes) to existing local datasets using NcML...
void fromSAX2NamespaceAttributes(const xmlChar **chunkOfFivePointers)
Definition: XMLHelpers.cc:94