bes  Updated for version 3.20.10
AllowedHosts.cc
1 // RemoteAccess.cc
2 
3 // -*- mode: c++; c-basic-offset:4 -*-
4 
5 // This file is part of the OPeNDAP Back-End Server (BES)
6 // and creates an allowed hosts list of which systems that may be
7 // accessed by the server as part of it's routine operation.
8 
9 // Copyright (c) 2018 OPeNDAP, Inc.
10 // Author: Nathan D. Potter <ndp@opendap.org>
11 //
12 // This library is free software; you can redistribute it and/or
13 // modify it under the terms of the GNU Lesser General Public
14 // License as published by the Free Software Foundation; either
15 // version 2.1 of the License, or (at your option) any later version.
16 //
17 // This library is distributed in the hope that it will be useful,
18 // but WITHOUT ANY WARRANTY; without even the implied warranty of
19 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
20 // Lesser General Public License for more details.
21 //
22 // You should have received a copy of the GNU Lesser General Public
23 // License along with this library; if not, write to the Free Software
24 // Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
25 //
26 // You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112.
27 
28 #include "config.h"
29 
30 #include <sstream>
31 
32 #include "BESUtil.h"
33 #include "BESCatalog.h"
34 #include "BESCatalogList.h"
35 #include "BESCatalogUtils.h"
36 #include "BESRegex.h"
37 #include "TheBESKeys.h"
38 #include "BESInternalError.h"
39 #include "BESDebug.h"
40 #include "BESNotFoundError.h"
41 #include "BESForbiddenError.h"
42 #include "BESLog.h"
43 
44 #include "HttpNames.h"
45 #include "url_impl.h"
46 
47 #include "AllowedHosts.h"
48 
49 
50 using namespace std;
51 
52 #define MODULE "ah"
53 #define prolog string("AllowedHosts::").append(__func__).append("() - ")
54 
55 namespace http {
56 
57 AllowedHosts *AllowedHosts::d_instance = nullptr;
61 static std::once_flag d_ah_init_once;
62 
69 AllowedHosts::theHosts() {
70  std::call_once(d_ah_init_once, AllowedHosts::initialize_instance);
71  return d_instance;
72 }
73 
74 AllowedHosts::AllowedHosts() {
75  bool found = false;
76  string key = ALLOWED_HOSTS_BES_KEY;
77  TheBESKeys::TheKeys()->get_values(ALLOWED_HOSTS_BES_KEY, d_allowed_hosts, found);
78  if (!found) {
79  throw BESInternalError(string("The allowed hosts key, '") + ALLOWED_HOSTS_BES_KEY
80  + "' has not been configured.", __FILE__, __LINE__);
81  }
82 }
83 
87 void AllowedHosts::initialize_instance() {
88  d_instance = new AllowedHosts();
89 #ifdef HAVE_ATEXIT
90  atexit(delete_instance);
91 #endif
92 }
93 
97 void AllowedHosts::delete_instance() {
98  delete d_instance;
99  d_instance = 0;
100 }
101 
113 bool AllowedHosts::is_allowed(shared_ptr<http::url> candidate_url) {
114  string error_msg;
115  return is_allowed(candidate_url, error_msg);
116 }
117 
118 bool AllowedHosts::is_allowed(shared_ptr<http::url> candidate_url, std::string &why_not) {
119  BESDEBUG(MODULE, prolog << "BEGIN candidate_url: " << candidate_url->str() << endl);
120  bool isAllowed = false;
121 
122  // Special case: This allows any file: URL to pass if the URL starts with the default
123  // catalog's path.
124  if (candidate_url->protocol() == FILE_PROTOCOL) {
125 
126  // Ensure that the file path starts with the catalog root dir.
127  // We know that when a file URL is parsed by http::url it stores everything in after the "file://" mark in
128  // the path, as there is no hostname.
129  string file_path = candidate_url->path();
130  BESDEBUG(MODULE, prolog << " file_path: '" << file_path <<
131  "' (length: " << file_path.length() << " size: " << file_path.size() << ")" <<endl);
132  // Get the BES Catalog
134  string default_catalog_name = bcl->default_catalog_name();
135  BESDEBUG(MODULE, prolog << "Searching for catalog named: '" << default_catalog_name << "'" << endl);
136  BESCatalog *bcat = bcl->find_catalog(default_catalog_name);
137  if (bcat) {
138  BESDEBUG(MODULE, prolog << "Found catalog named: '" << bcat->get_catalog_name() << "'" << endl);
139  } else {
140  string error_msg = "INTERNAL_ERROR: Unable to locate default catalog. Check BES configuration.";
141  BESDEBUG(MODULE, prolog << error_msg << endl);
142  throw BESInternalError(error_msg, __FILE__, __LINE__);
143  }
144 
145  string catalog_root = bcat->get_root();
146  BESDEBUG(MODULE, prolog << "catalog_root: '" << catalog_root <<
147  "' (length: " << catalog_root.length() << " size: " << catalog_root.size() << ")" << endl);
148 
149  string relative_path;
150  if (file_path[0] == '/') {
151  if (file_path.length() < catalog_root.length()) {
152  // Upward traversal is not allowed (specified resource path is shorter than data root path)
153  why_not = "Path is out of scope from configuration.";
154  isAllowed = false;
155  } else {
156  BESDEBUG(MODULE, prolog << "file_path: " << file_path << endl);
157  BESDEBUG(MODULE, prolog << "catalog_root: " << catalog_root << endl);
158  size_t ret = file_path.find(catalog_root);
159  BESDEBUG(MODULE, prolog << "file_path.find(catalog_root): " << ret << endl);
160  isAllowed = (ret == 0);
161  relative_path = file_path.substr(catalog_root.length());
162  BESDEBUG(MODULE, prolog << "relative_path: " << relative_path << endl);
163  BESDEBUG(MODULE, prolog << "isAllowed: " << (isAllowed?"true":"false") << endl);
164 
165  }
166  } else {
167  BESDEBUG(MODULE, prolog << "Relative path detected");
168  relative_path = file_path;
169  isAllowed = true;
170  }
171 
172  // string::find() returns 0 if the submitted path begins with the catalog root.
173  // And since we are just looking at the catalog.root as a prefix of the resource
174  // name we only allow access to the resource for an exact match.
175  if (isAllowed) {
176  // If we stop adding a '/' to file_path values that don't begin with one
177  // then we need to detect the use of the relative path here
178  bool follow_sym_links = bcat->get_catalog_utils()->follow_sym_links();
179  try {
180  BESUtil::check_path(relative_path, catalog_root, follow_sym_links);
181  }
182  catch (BESNotFoundError &e) {
183  why_not = e.get_message();
184  isAllowed = false;
185  }
186  catch (BESForbiddenError &e) {
187  why_not = e.get_message();
188  isAllowed = false;
189  }
190  }
191  BESDEBUG(MODULE, prolog << "File Access Allowed: " << (isAllowed ? "true " : "false ") << endl);
192  } else if(candidate_url->protocol() == HTTPS_PROTOCOL || candidate_url->protocol() == HTTP_PROTOCOL ){
193 
194  isAllowed = candidate_url->is_trusted() || check(candidate_url->str());
195 
196  if (candidate_url->is_trusted()) {
197  INFO_LOG(prolog << "Candidate URL is marked trusted, allowing. url: " << candidate_url->str() << endl);
198  }
199  BESDEBUG(MODULE, prolog << "HTTP Access Allowed: " << (isAllowed ? "true " : "false ") << endl);
200  }
201  else {
202  stringstream ss;
203  ss << "The candidate_url utilizes an unsupported protocol '" << candidate_url->protocol() << "'" ;
204  BESDEBUG(MODULE, prolog << ss.str() << endl);
205  throw BESInternalError(ss.str(),__FILE__,__LINE__);
206  }
207  BESDEBUG(MODULE, prolog << "END Access Allowed: " << (isAllowed ? "true " : "false ") << endl);
208  return isAllowed;
209 }
210 
211 
212 
213 bool AllowedHosts::check(const std::string &url){
214  bool isAllowed=false;
215  auto it = d_allowed_hosts.begin();
216  auto end_it = d_allowed_hosts.end();
217  for (; it != end_it && !isAllowed; it++) {
218  string a_regex_pattern = *it;
219  BESRegex reg_expr(a_regex_pattern.c_str());
220  int match_result = reg_expr.match(url.c_str(), url.length());
221  if (match_result >= 0) {
222  auto match_length = (unsigned int) match_result;
223  if (match_length == url.length()) {
224  BESDEBUG(MODULE,
225  prolog << "FULL MATCH. pattern: " << a_regex_pattern << " url: " << url << endl);
226  isAllowed = true;;
227  } else {
228  BESDEBUG(MODULE,
229  prolog << "No Match. pattern: " << a_regex_pattern << " url: " << url << endl);
230  }
231  }
232  }
233  return isAllowed;
234 }
235 
236 } // namespace http
List of all registered catalogs.
virtual std::string default_catalog_name() const
The name of the default catalog.
static BESCatalogList * TheCatalogList()
Get the singleton BESCatalogList instance.
Catalogs provide a hierarchical organization for data.
Definition: BESCatalog.h:51
virtual std::string get_root() const =0
virtual std::string get_catalog_name() const
Get the name for this catalog.
Definition: BESCatalog.h:103
virtual BESCatalogUtils * get_catalog_utils() const
Get a pointer to the utilities, customized for this catalog.
Definition: BESCatalog.h:113
virtual std::string get_message()
get the error message for this exception
Definition: BESError.h:99
error thrown if the BES is not allowed to access the resource requested
exception thrown if internal error encountered
error thrown if the resource requested cannot be found
Regular expression matching.
Definition: BESRegex.h:53
static void check_path(const std::string &path, const std::string &root, bool follow_sym_links)
Check if the specified path is valid.
Definition: BESUtil.cc:260
static TheBESKeys * TheKeys()
Definition: TheBESKeys.cc:71
void get_values(const std::string &s, std::vector< std::string > &vals, bool &found)
Retrieve the values of a given key, if set.
Definition: TheBESKeys.cc:371
Can a given URL be dereferenced given the BES's configuration?
Definition: AllowedHosts.h:54
utility class for the HTTP catalog module
Definition: AllowedHosts.cc:55