15template <
bool has_state_overr
ide>
22 input.remove_suffix(1);
46 if (type == ada::scheme::type::FILE &&
89 if (type == ada::scheme::type::FILE &&
112inline void url_aggregator::copy_scheme(
const url_aggregator&
u)
noexcept {
113 ada_log(
"url_aggregator::copy_scheme ",
u.buffer);
119 buffer.erase(0, components.protocol_end);
120 buffer.insert(0,
u.get_protocol());
121 components.protocol_end =
u.components.protocol_end;
142inline void url_aggregator::set_scheme_from_view_with_colon(
144 ada_log(
"url_aggregator::set_scheme_from_view_with_colon ",
154 if (buffer.empty()) {
157 buffer.erase(0, components.protocol_end);
176inline void url_aggregator::set_scheme(std::string_view
new_scheme)
noexcept {
186 if (buffer.empty()) {
187 buffer.append(helpers::concat(
new_scheme,
":"));
189 buffer.erase(0, components.protocol_end);
190 buffer.insert(0, helpers::concat(
new_scheme,
":"));
213 helpers::remove_ascii_tab_or_newline(
view);
225 std::string::iterator
pointer =
226 std::find_if_not(
view.begin(),
view.end(), unicode::is_alnum_plus);
239 if (cannot_have_credentials_or_port()) {
245 update_base_username(
input);
248 update_base_username(ada::unicode::percent_encode(
259 if (cannot_have_credentials_or_port()) {
265 update_base_password(
input);
268 update_base_password(ada::unicode::percent_encode(
279 if (cannot_have_credentials_or_port()) {
283 helpers::remove_ascii_tab_or_newline(
trimmed);
289 if (ada::unicode::is_c0_control_or_space(
trimmed.front())) {
293 if (
input.find_first_of(
"0123456789") == std::string_view::npos) {
333 if (unicode::has_tabs_or_newline(
input)) {
337 helpers::remove_ascii_tab_or_newline(
tmp_buffer);
346 update_base_pathname(
"/");
362 update_base_pathname(
"/");
374 helpers::strip_trailing_spaces_from_opaque_path(*
this);
380 helpers::remove_ascii_tab_or_newline(
new_value);
399 helpers::strip_trailing_spaces_from_opaque_path(*
this);
405 helpers::remove_ascii_tab_or_newline(
new_value);
414 ada_log(
"url_aggregator::set_href, success :",
out.has_value());
417 ada_log(
"url_aggregator::set_href, parsed ",
out->to_string());
422 return out.has_value();
434 if (
input[0] ==
'[') {
436 if (
input.back() !=
']') {
443 input.remove_prefix(1);
444 input.remove_suffix(1);
445 return parse_ipv6(
input);
451 return parse_opaque_host(
input);
463 unicode::contains_forbidden_domain_code_point_or_upper(
input.data(),
472 input.find(
"xn-") == std::string_view::npos) {
474 update_base_hostname(
input);
476 ada_log(
"parse_host fast path ipv4");
486 ada_log(
"parse_host calling to_ascii");
487 std::optional<std::string> host = std::string(
get_hostname());
490 ada_log(
"parse_host to_ascii returns false");
493 ada_log(
"parse_host to_ascii succeeded ", *host,
" [", host->size(),
496 if (std::any_of(host.value().begin(), host.value().end(),
497 ada::unicode::is_forbidden_domain_code_point)) {
503 if (checkers::is_ipv4(host.value())) {
504 ada_log(
"parse_host got ipv4 ", *host);
505 return parse_ipv4(host.value(),
false);
508 update_base_hostname(host.value());
513template <
bool overr
ide_hostname>
514bool url_aggregator::set_host_or_hostname(
const std::string_view
input) {
515 ada_log(
"url_aggregator::set_host_or_hostname ",
input);
529 helpers::remove_ascii_tab_or_newline(
_host);
534 if (type != ada::scheme::type::FILE) {
564 }
else if (has_dash_dot()) {
565 add_authority_slashes_if_needed();
575 }
else if (has_dash_dot()) {
583 if (
location != std::string_view::npos) {
599 if (helpers::substring(buffer, components.
host_start,
600 components.
host_end) ==
"localhost") {
623 ada_log(
"url_aggregator::get_origin");
640 return helpers::concat(
out->get_protocol(),
"//",
out->get_host());
651 ada_log(
"url_aggregator::get_username");
653 return helpers::substring(buffer, components.
protocol_end + 2,
661 ada_log(
"url_aggregator::get_password");
663 return helpers::substring(buffer, components.
username_end + 1,
671 ada_log(
"url_aggregator::get_port");
675 return helpers::substring(buffer, components.
host_end + 1,
681 ada_log(
"url_aggregator::get_hash");
687 if (buffer.size() - components.
hash_start <= 1) {
690 return helpers::substring(buffer, components.
hash_start);
695 ada_log(
"url_aggregator::get_host");
714 ada_log(
"url_aggregator::get_hostname");
724 return helpers::substring(buffer,
start, components.
host_end);
729 ada_log(
"url_aggregator::get_pathname pathname_start = ",
732 " components.hash_start = ", components.
hash_start);
744 ada_log(
"url_aggregator::get_search");
762 ada_log(
"url_aggregator::get_protocol");
763 return helpers::substring(buffer, 0, components.
protocol_end);
767 ada_log(
"url_aggregator::to_string buffer:", buffer,
" [", buffer.size(),
774 auto back = std::back_insert_iterator(
answer);
777 answer.append(
"\t\"buffer\":\"");
778 helpers::encode_json(buffer, back);
781 answer.append(
"\t\"protocol\":\"");
782 helpers::encode_json(get_protocol(), back);
785 if (has_credentials()) {
786 answer.append(
"\t\"username\":\"");
787 helpers::encode_json(get_username(), back);
789 answer.append(
"\t\"password\":\"");
790 helpers::encode_json(get_password(), back);
794 answer.append(
"\t\"host\":\"");
795 helpers::encode_json(get_host(), back);
798 answer.append(
"\t\"path\":\"");
799 helpers::encode_json(get_pathname(), back);
801 answer.append(
"\t\"opaque path\":");
802 answer.append((has_opaque_path ?
"true" :
"false"));
806 answer.append(
"\t\"query\":\"");
807 helpers::encode_json(get_search(), back);
811 answer.append(
"\t\"fragment\":\"");
812 helpers::encode_json(get_hash(), back);
820 return std::to_string(
offset);
824 answer.append(
"\t\"protocol_end\":");
828 answer.append(
"\t\"username_end\":");
832 answer.append(
"\t\"host_start\":");
836 answer.append(
"\t\"host_end\":");
840 answer.append(
"\t\"port\":");
844 answer.append(
"\t\"pathname_start\":");
848 answer.append(
"\t\"search_start\":");
852 answer.append(
"\t\"hash_start\":");
866bool url_aggregator::parse_ipv4(std::string_view
input,
bool in_place) {
868 " bytes], overlaps with buffer: ",
869 helpers::overlaps(
input, buffer) ?
"yes" :
"no");
873 input.remove_suffix(1);
884 ((
input.length() > 2) && (
input[2] ==
'.')))) {
887 input.remove_prefix(2);
889 std::from_chars_result
r{};
891 ada_log(
"parse_ipv4 trying to parse hex number");
894 }
else if ((
input.length() >= 2) &&
input[0] ==
'0' &&
896 ada_log(
"parse_ipv4 trying to parse octal number");
900 ada_log(
"parse_ipv4 trying to parse decimal number");
905 if (
r.ec != std::errc()) {
906 ada_log(
"parse_ipv4 parsing failed");
930 input.remove_prefix(1);
934 ada_log(
"parse_ipv4 found invalid (more than 4 numbers or empty) ");
944 "url_aggregator::parse_ipv4 completed and was already correct in the "
949 ada_log(
"url_aggregator::parse_ipv4 completed and we need to update it");
954 update_base_hostname(
962bool url_aggregator::parse_ipv6(std::string_view
input) {
974 std::array<uint16_t, 8>
address{};
986 if (
input[0] ==
':') {
990 ada_log(
"parse_ipv6 starts with : but the rest does not start with :");
1005 ada_log(
"parse_ipv6 piece_index == 8");
1013 ada_log(
"parse_ipv6 compress is non-null");
1031 unicode::is_ascii_hex_digit(*
pointer)) {
1033 value =
uint16_t(value * 0x10 + unicode::convert_hex_to_binary(*
pointer));
1042 ada_log(
"parse_ipv6 length is 0");
1051 ada_log(
"parse_ipv6 piece_index > 6");
1071 ada_log(
"parse_ipv6 Otherwise, validation error, return failure");
1079 "parse_ipv6 If c is not an ASCII digit, validation error, return "
1095 ada_log(
"parse_ipv6 if ipv4Piece is 0, validation error");
1105 ada_log(
"parse_ipv6 ipv4_piece > 255");
1144 "parse_ipv6 If c is the EOF code point, validation error, return "
1153 "parse_ipv6 Otherwise, if c is not the EOF code point, validation "
1154 "error, return failure");
1186 "parse_ipv6 if compress is null and pieceIndex is not 8, validation "
1187 "error, return failure");
1200bool url_aggregator::parse_opaque_host(std::string_view
input) {
1205 ada::unicode::is_forbidden_host_code_point)) {
1214 update_base_hostname(
input);
1217 update_base_hostname(ada::unicode::percent_encode(
1231 answer.append(std::to_string(buffer.size()));
1232 answer.append(
" bytes]");
1236 line1.resize(buffer.size(),
' ');
1246 if (components.
host_end != buffer.size()) {
1249 if (components.
host_start != buffer.size()) {
1269 line2.append(
" hash_start");
1282 line3.append(
" search_start ");
1295 line4.append(
" pathname_start ");
1302 if (components.
host_end != buffer.size()) {
1309 line5.append(
" host_end ");
1316 if (components.
host_start != buffer.size()) {
1323 line6.append(
" host_start ");
1337 line7.append(
" username_end ");
1351 line8.append(
" protocol_end ");
1358 answer.append(
"note: hash omitted\n");
1361 answer.append(
"note: search omitted\n");
1364 answer.append(
"warning: protocol_end overflows\n");
1367 answer.append(
"warning: username_end overflows\n");
1370 answer.append(
"warning: host_start overflows\n");
1372 if (components.
host_end > buffer.size()) {
1373 answer.append(
"warning: host_end overflows\n");
1376 answer.append(
"warning: pathname_start overflows\n");
1386 ada_log(
"url_aggregator::validate inconsistent components \n",
1437 if (components.
host_end > buffer.size()) {
1442 ada_log(
"url_aggregator::validate pathname_start overflow \n",
1450 "url_aggregator::validate missing : at the end of the protocol \n",
1461 "url_aggregator::validate missing : or @ at the end of the username "
1468 if (components.
host_start != buffer.size()) {
1472 "url_aggregator::validate missing @ at the end of the password \n",
1482 "url_aggregator::validate missing // between protocol and host "
1491 "url_aggregator::validate missing @ at the end of the username "
1499 ada_log(
"url_aggregator::validate expected omitted host \n",
1505 if (components.
host_end != buffer.size() &&
1508 buffer[components.
host_end] ==
'/' &&
1509 buffer[components.
host_end + 1] ==
'.') {
1514 "url_aggregator::validate expected the path to begin with // \n",
1518 }
else if (buffer[components.
host_end] !=
':') {
1519 ada_log(
"url_aggregator::validate missing : at the port \n",
1528 ada_log(
"url_aggregator::validate missing / at the path \n",
1535 ada_log(
"url_aggregator::validate missing ? at the search \n",
1542 ada_log(
"url_aggregator::validate missing # at the hash \n",
1551void url_aggregator::delete_dash_dot() {
1552 ada_log(
"url_aggregator::delete_dash_dot");
1555 buffer.erase(components.
host_end, 2);
1567inline void url_aggregator::consume_prepared_path(std::string_view
input) {
1568 ada_log(
"url_aggregator::consume_prepared_path ",
input);
1603 if (
input[0] !=
'.') {
1605 if (
slashdot == std::string_view::npos) {
1616 ada_log(
"parse_path trivial");
1629 (type != ada::scheme::type::FILE);
1631 ada_log(
"parse_prepared_path fast");
1647 update_base_pathname(path);
1651 if (path.back() ==
'/') {
1652 update_base_pathname(path);
1657 path.resize(path.rfind(
'/') + 1);
1658 update_base_pathname(path);
1665 update_base_pathname(path);
1690 ?
input.find_first_of(
"/\\")
1693 if (
location != std::string_view::npos) {
1701 ada::unicode::percent_encode<false>(
1705 if (unicode::is_double_dot_path_segment(
path_buffer)) {
1706 if ((helpers::shorten_path(path, type) ||
special) &&
1707 location == std::string_view::npos) {
1710 }
else if (unicode::is_single_dot_path_segment(
path_buffer) &&
1711 (
location == std::string_view::npos)) {
1715 else if (!unicode::is_single_dot_path_segment(
path_buffer)) {
1719 if (type == ada::scheme::type::FILE && path.empty() &&
1732 if (
location == std::string_view::npos) {
1733 update_base_pathname(path);
Definitions for URL specific checkers used within Ada.
#define ADA_ASSERT_TRUE(COND)
#define ada_lifetime_bound
#define ada_really_inline
Definitions for helper functions used within Ada.
Definitions for user facing functions for parsing URL and it's components.
constexpr uint8_t C0_CONTROL_PERCENT_ENCODE[32]
constexpr uint8_t PATH_PERCENT_ENCODE[32]
constexpr uint8_t USERINFO_PERCENT_ENCODE[32]
constexpr uint8_t SPECIAL_QUERY_PERCENT_ENCODE[32]
constexpr uint8_t QUERY_PERCENT_ENCODE[32]
constexpr bool is_windows_drive_letter(std::string_view input) noexcept
bool has_hex_prefix(std::string_view input)
constexpr bool is_alpha(char x) noexcept
constexpr bool is_digit(char x) noexcept
ada_really_inline bool begins_with(std::string_view view, std::string_view prefix)
constexpr ada::scheme::type get_scheme_type(std::string_view scheme) noexcept
constexpr uint16_t get_special_port(std::string_view scheme) noexcept
std::string ipv6(const std::array< uint16_t, 8 > &address) noexcept
std::string ipv4(uint64_t address) noexcept
ada_really_inline size_t percent_encode_index(const std::string_view input, const uint8_t character_set[])
template ada::result< url_aggregator > parse< url_aggregator >(std::string_view input, const url_aggregator *base_url)
ada_warn_unused ada::result< result_type > parse(std::string_view input, const result_type *base_url=nullptr)
Declarations for the URL scheme.
bool has_non_empty_username() const noexcept
void set_hash(std::string_view input)
void clear_search() override
bool has_hostname() const noexcept
std::string_view get_hostname() const noexcept ada_lifetime_bound
bool has_non_empty_password() const noexcept
ada_really_inline bool has_credentials() const noexcept
std::string to_string() const override
std::string_view get_pathname() const noexcept ada_lifetime_bound
std::string_view get_hash() const noexcept ada_lifetime_bound
std::string to_diagram() const
bool set_protocol(std::string_view input)
std::string get_origin() const noexcept override
bool validate() const noexcept
std::string_view get_search() const noexcept ada_lifetime_bound
bool has_valid_domain() const noexcept override
bool set_hostname(std::string_view input)
bool set_password(std::string_view input)
bool set_pathname(std::string_view input)
std::string_view get_protocol() const noexcept ada_lifetime_bound
std::string_view get_password() const noexcept ada_lifetime_bound
bool set_href(std::string_view input)
void set_search(std::string_view input)
std::string_view get_port() const noexcept ada_lifetime_bound
bool has_port() const noexcept
std::string_view get_href() const noexcept ada_lifetime_bound
bool set_host(std::string_view input)
std::string_view get_host() const noexcept ada_lifetime_bound
bool set_port(std::string_view input)
std::string_view get_username() const noexcept ada_lifetime_bound
bool set_username(std::string_view input)
ada_really_inline bool is_special() const noexcept
bool check_offset_consistency() const noexcept
static constexpr uint32_t omitted
Definitions for unicode operations.
Inline functions for url aggregator.
Declaration for the basic URL definitions.
Declaration for the URL Components.