30#ifndef _GLIBCXX_UNICODE_H
31#define _GLIBCXX_UNICODE_H 1
33#if __cplusplus >= 202002L
41namespace std _GLIBCXX_VISIBILITY(default)
43_GLIBCXX_BEGIN_NAMESPACE_VERSION
48 __is_scalar_value(
char32_t __c)
50 if (__c < 0xD800) [[likely]]
52 return 0xDFFF < __c && __c <= 0x10FFFF;
56 template<
typename _CharT>
58 __is_single_code_unit(
char32_t __c)
63 return __c < __gnu_cxx::__int_traits<_CharT>::__max
64 && __is_scalar_value(__c);
72 operator()() const noexcept
76 struct _Null_sentinel_t
78 template<input_iterator _It>
79 requires default_initializable<iter_value_t<_It>>
80 && equality_comparable_with<iter_reference_t<_It>, iter_value_t<_It>>
82 operator==(_It __it, _Null_sentinel_t)
83 {
return *__it == iter_value_t<_It>{}; }
86 template<
typename _FromFmt,
typename _ToFmt,
87 input_iterator _Iter, sentinel_for<_Iter> _Sent = _Iter,
88 typename _ErrorHandler = _Repl>
89 requires convertible_to<iter_value_t<_Iter>, _FromFmt>
92 static_assert(forward_iterator<_Iter> ||
noexcept(_ErrorHandler()()));
95 using value_type = _ToFmt;
96 using difference_type = iter_difference_t<_Iter>;
97 using reference = value_type;
98 using iterator_concept
100 bidirectional_iterator_tag>;
102 constexpr _Utf_iterator() =
default;
105 _Utf_iterator(_Iter __first, _Iter __it, _Sent __last)
106 requires bidirectional_iterator<_Iter>
107 : _M_first_and_curr{__first, __it}, _M_last(__last)
109 if (_M_curr() != _M_last)
116 _Utf_iterator(_Iter __it, _Sent __last)
117 requires (!bidirectional_iterator<_Iter>)
118 : _M_first_and_curr{__it}, _M_last(__last)
120 if (_M_curr() != _M_last)
126 template<
class _Iter2,
class _Sent2>
127 requires convertible_to<_Iter2, _Iter> && convertible_to<_Sent2, _Sent>
129 _Utf_iterator(
const _Utf_iterator<_FromFmt, _ToFmt, _Iter2, _Sent2,
130 _ErrorHandler>& __other)
131 : _M_buf(__other._M_buf), _M_first_and_curr(__other._M_first_and_curr),
132 _M_buf_index(__other._M_buf_index), _M_buf_last(__other._M_buf_last),
133 _M_last(__other._M_last)
138 begin() const requires bidirectional_iterator<_Iter>
139 {
return _M_first(); }
143 end()
const {
return _M_last; }
147 base() const requires forward_iterator<_Iter>
148 {
return _M_curr(); }
152 operator*()
const {
return _M_buf[_M_buf_index]; }
154 constexpr _Utf_iterator&
157 if (_M_buf_index + 1 == _M_buf_last && _M_curr() != _M_last)
159 if constexpr (forward_iterator<_Iter>)
161 if (_M_curr() == _M_last)
166 else if (_M_buf_index + 1 < _M_buf_last)
171 constexpr _Utf_iterator
179 constexpr _Utf_iterator&
180 operator--()
requires bidirectional_iterator<_Iter>
182 if (!_M_buf_index && _M_curr() != _M_first())
184 else if (_M_buf_index)
189 constexpr _Utf_iterator
198 friend constexpr bool
199 operator==(_Utf_iterator __lhs, _Utf_iterator __rhs)
200 requires forward_iterator<_Iter> ||
requires (_Iter __i) { __i != __i; }
202 if constexpr (forward_iterator<_Iter>)
203 return __lhs._M_curr() == __rhs._M_curr()
204 && __lhs._M_buf_index == __rhs._M_buf_index;
205 else if (__lhs._M_curr() != __rhs._M_curr())
207 else if (__lhs._M_buf_index == __rhs._M_buf_index
208 && __lhs._M_buf_last == __rhs._M_buf_last)
211 return __lhs._M_buf_index == __lhs._M_buf_last
212 && __rhs._M_buf_index == __rhs._M_buf_last;
216 friend constexpr bool
217 operator==(_Utf_iterator __lhs, _Sent __rhs)
219 if constexpr (forward_iterator<_Iter>)
220 return __lhs._M_curr() == __rhs;
222 return __lhs._M_curr() == __rhs
223 && __lhs._M_buf_index == __lhs._M_buf_last;
230 if constexpr (
sizeof(_FromFmt) ==
sizeof(uint8_t))
232 else if constexpr (
sizeof(_FromFmt) ==
sizeof(uint16_t))
236 static_assert(
sizeof(_FromFmt) ==
sizeof(uint32_t));
247 _Guard(
void*, _Iter&) { }
250 template<
typename _It>
requires forward_iterator<_It>
253 constexpr ~_Guard() { _M_this->_M_curr() =
std::move(_M_orig); }
254 _Utf_iterator* _M_this;
261 _Guard<_Iter> __g{
this, _M_curr()};
263 uint8_t __u = *_M_curr()++;
264 const uint8_t __lo_bound = 0x80, __hi_bound = 0xBF;
265 uint8_t __to_incr = 1;
267 if (__u <= 0x7F) [[likely]]
269 else if (__u < 0xC2) [[unlikely]]
271 else if (_M_curr() == _M_last) [[unlikely]]
273 else if (__u <= 0xDF)
278 if (__u < __lo_bound || __u > __hi_bound) [[unlikely]]
282 __c = (__c << 6) | (__u & 0x3F);
287 else if (__u <= 0xEF)
289 const uint8_t __lo_bound_2 = __u == 0xE0 ? 0xA0 : __lo_bound;
290 const uint8_t __hi_bound_2 = __u == 0xED ? 0x9F : __hi_bound;
295 if (__u < __lo_bound_2 || __u > __hi_bound_2) [[unlikely]]
297 else if (++_M_curr() == _M_last) [[unlikely]]
302 __c = (__c << 6) | (__u & 0x3F);
305 if (__u < __lo_bound || __u > __hi_bound) [[unlikely]]
309 __c = (__c << 6) | (__u & 0x3F);
315 else if (__u <= 0xF4)
317 const uint8_t __lo_bound_2 = __u == 0xF0 ? 0x90 : __lo_bound;
318 const uint8_t __hi_bound_2 = __u == 0xF4 ? 0x8F : __hi_bound;
323 if (__u < __lo_bound_2 || __u > __hi_bound_2) [[unlikely]]
325 else if (++_M_curr() == _M_last) [[unlikely]]
330 __c = (__c << 6) | (__u & 0x3F);
333 if (__u < __lo_bound || __u > __hi_bound) [[unlikely]]
335 else if (++_M_curr() == _M_last) [[unlikely]]
340 __c = (__c << 6) | (__u & 0x3F);
343 if (__u < __lo_bound || __u > __hi_bound) [[unlikely]]
347 __c = (__c << 6) | (__u & 0x3F);
357 _M_update(__c, __to_incr);
363 _Guard<_Iter> __g{
this, _M_curr()};
365 uint16_t __u = *_M_curr()++;
366 uint8_t __to_incr = 1;
368 if (__u < 0xD800 || __u > 0xDFFF) [[likely]]
370 else if (__u < 0xDC00 && _M_curr() != _M_last)
372 uint16_t __u2 = *_M_curr();
373 if (__u2 < 0xDC00 || __u2 > 0xDFFF) [[unlikely]]
379 uint32_t __x = (__u & 0x3F) << 10 | __u2 & 0x3FF;
380 uint32_t __w = (__u >> 6) & 0x1F;
381 __c = (__w + 1) << 16 | __x;
387 _M_update(__c, __to_incr);
393 _Guard<_Iter> __g{
this, _M_curr()};
394 char32_t __c = *_M_curr()++;
395 if (!__is_scalar_value(__c)) [[unlikely]]
402 _M_update(
char32_t __c, uint8_t __to_incr)
404 _M_to_increment = __to_incr;
406 if constexpr (
sizeof(_ToFmt) ==
sizeof(uint32_t))
411 else if constexpr (
sizeof(_ToFmt) ==
sizeof(uint16_t))
413 if (__is_single_code_unit<_ToFmt>(__c))
422 const char32_t __lead_offset = 0xD800 - (0x10000 >> 10);
423 char16_t __lead = __lead_offset + (__c >> 10);
424 char16_t __trail = 0xDC00 + (__c & 0x3FF);
432 static_assert(
sizeof(_ToFmt) == 1);
434 if (__bits <= 7) [[likely]]
437 _M_buf[1] = _M_buf[2] = _M_buf[3] = 0;
440 else if (__bits <= 11)
442 _M_buf[0] = 0xC0 | (__c >> 6);
443 _M_buf[1] = 0x80 | (__c & 0x3F);
444 _M_buf[2] = _M_buf[3] = 0;
447 else if (__bits <= 16)
449 _M_buf[0] = 0xE0 | (__c >> 12);
450 _M_buf[1] = 0x80 | ((__c >> 6) & 0x3F);
451 _M_buf[2] = 0x80 | (__c & 0x3F);
457 _M_buf[0] = 0xF0 | ((__c >> 18) & 0x07);
458 _M_buf[1] = 0x80 | ((__c >> 12) & 0x3F);
459 _M_buf[2] = 0x80 | ((__c >> 6) & 0x3F);
460 _M_buf[3] = 0x80 | (__c & 0x3F);
469 char32_t __c = _ErrorHandler()();
470 __glibcxx_assert(__is_scalar_value(__c));
475 _M_first() const requires bidirectional_iterator<_Iter>
476 {
return _M_first_and_curr._M_first; }
479 _M_curr() {
return _M_first_and_curr._M_curr; }
482 _M_curr()
const {
return _M_first_and_curr._M_curr; }
484 array<value_type, 4 /
sizeof(_ToFmt)> _M_buf;
486 template<
typename _It>
487 struct _First_and_curr
489 _First_and_curr() =
default;
492 _First_and_curr(_It __curr) : _M_curr(__curr) { }
494 template<convertible_to<_It> _It2>
496 _First_and_curr(
const _First_and_curr<_It2>& __other)
497 : _M_curr(__other._M_curr) { }
502 template<
typename _It>
requires bidirectional_iterator<_It>
503 struct _First_and_curr<_It>
505 _First_and_curr() =
default;
508 _First_and_curr(_It __first, _It __curr)
509 : _M_first(__first), _M_curr(__curr) { }
511 template<convertible_to<_It> _It2>
513 _First_and_curr(
const _First_and_curr<_It2>& __other)
514 : _M_first(__other._M_first), _M_curr(__other._M_curr) { }
520 _First_and_curr<_Iter> _M_first_and_curr;
522 uint8_t _M_buf_index = 0;
523 uint8_t _M_buf_last = 0;
524 uint8_t _M_to_increment = 0;
526 [[no_unique_address]] _Sent _M_last;
528 template<
typename _FromFmt2,
typename _ToFmt2,
529 input_iterator _Iter2, sentinel_for<_Iter2> _Sent2,
530 typename _ErrHandler>
531 requires convertible_to<iter_value_t<_Iter2>, _FromFmt2>
532 friend class _Utf_iterator;
535 template<
typename _ToFormat, ranges::input_range _Range>
537 :
public ranges::view_interface<_Utf_view<_ToFormat, _Range>>
539 using _Iterator = _Utf_iterator<ranges::range_value_t<_Range>,
540 _ToFormat, ranges::iterator_t<_Range>,
541 ranges::sentinel_t<_Range>>;
543 template<
typename _Iter,
typename _Sent>
545 _M_begin(_Iter __first, _Sent __last)
547 if constexpr (bidirectional_iterator<_Iter>)
548 return _Iterator(__first, __first, __last);
550 return _Iterator(__first, __last);
553 template<
typename _Iter,
typename _Sent>
555 _M_end(_Iter __first, _Sent __last)
557 if constexpr (!is_same_v<_Iter, _Sent>)
559 else if constexpr (bidirectional_iterator<_Iter>)
560 return _Iterator(__first, __last, __last);
562 return _Iterator(__last, __last);
569 _Utf_view(_Range&& __r) : _M_base(
std::
forward<_Range>(__r)) { }
571 constexpr auto begin()
572 {
return _M_begin(ranges::begin(_M_base), ranges::end(_M_base)); }
575 {
return _M_end(ranges::begin(_M_base), ranges::end(_M_base)); }
577 constexpr bool empty()
const {
return ranges::empty(_M_base); }
580 template<
typename _View>
581 using _Utf8_view = _Utf_view<char8_t, _View>;
582 template<
typename _View>
583 using _Utf16_view = _Utf_view<char16_t, _View>;
584 template<
typename _View>
585 using _Utf32_view = _Utf_view<char32_t, _View>;
587inline namespace __v15_1_0
589#define _GLIBCXX_GET_UNICODE_DATA 150100
590#include "unicode-data.h"
591#ifdef _GLIBCXX_GET_UNICODE_DATA
592# error "Invalid unicode data"
597 __field_width(
char32_t __c)
noexcept
599 if (__c < __width_edges[0]) [[likely]]
602 auto* __p = std::upper_bound(__width_edges,
std::end(__width_edges), __c);
603 return (__p - __width_edges) % 2 + 1;
607 constexpr _Gcb_property
608 __grapheme_cluster_break_property(
char32_t __c)
noexcept
610 constexpr uint32_t __mask = (1 << __gcb_shift_bits) - 1;
611 auto* __end =
std::end(__gcb_edges);
612 auto* __p = std::lower_bound(__gcb_edges, __end,
613 (__c << __gcb_shift_bits) | __mask);
614 return _Gcb_property(__p[-1] & __mask);
618 __is_incb_linker(
char32_t __c)
noexcept
620 const auto __end =
std::end(__incb_linkers);
622 return std::find(__incb_linkers, __end, __c) != __end;
627 __incb_property(
char32_t __c)
noexcept
629 if ((__c << 2) < __incb_edges[0]) [[likely]]
632 constexpr uint32_t __mask = 0x3;
633 auto* __end =
std::end(__incb_edges);
634 auto* __p = std::lower_bound(__incb_edges, __end, (__c << 2) | __mask);
635 return _InCB(__p[-1] & __mask);
639 __is_extended_pictographic(
char32_t __c)
641 if (__c < __xpicto_edges[0]) [[likely]]
644 auto* __p = std::upper_bound(__xpicto_edges,
std::end(__xpicto_edges), __c);
645 return (__p - __xpicto_edges) % 2;
648 struct _Grapheme_cluster_iterator_base
651 _Gcb_property _M_prop;
652 enum class _XPicto :
unsigned char { _Init, _Zwj, _Matched, _Failed };
653 _XPicto _M_xpicto_seq_state = _XPicto::_Init;
654 unsigned char _M_RI_count = 0;
655 bool _M_incb_linker_seen =
false;
658 _M_reset(
char32_t __c, _Gcb_property __p)
662 _M_xpicto_seq_state = _XPicto::_Init;
664 _M_incb_linker_seen =
false;
668 _M_update_xpicto_seq_state(
char32_t __c, _Gcb_property __p)
670 if (_M_xpicto_seq_state == _XPicto::_Failed)
673 auto __next_state = _XPicto::_Failed;
674 if (_M_xpicto_seq_state != _XPicto::_Zwj)
676 if (__p == _Gcb_property::_Gcb_ZWJ)
678 if (_M_xpicto_seq_state == _XPicto::_Matched)
679 __next_state = _XPicto::_Zwj;
682 else if (__is_extended_pictographic(_M_c))
683 __next_state = _XPicto::_Zwj;
685 else if (__p == _Gcb_property::_Gcb_Extend)
686 __next_state = _M_xpicto_seq_state;
692 if (__p == _Gcb_property::_Gcb_Other
693 && __is_extended_pictographic(__c))
694 __next_state = _XPicto::_Matched;
696 _M_xpicto_seq_state = __next_state;
700 _M_update_ri_count(_Gcb_property __p)
702 if (__p == _Gcb_property::_Gcb_Regional_Indicator)
709 _M_update_incb_state(
char32_t __c, _Gcb_property)
711 if (__is_incb_linker(__c))
712 _M_incb_linker_seen =
true;
717 template<ranges::forward_range _View>
718 class _Grapheme_cluster_view
719 :
public ranges::view_interface<_Grapheme_cluster_view<_View>>
724 _Grapheme_cluster_view(
const _View& __v)
725 : _M_begin(_Utf32_view(__v).begin())
728 constexpr auto begin()
const {
return _M_begin; }
729 constexpr auto end()
const {
return _M_begin.end(); }
732 struct _Iterator :
private _Grapheme_cluster_iterator_base
736 using _U32_iterator = ranges::iterator_t<_Utf32_view<_View>>;
744 using value_type = char32_t;
745 using iterator_concept = forward_iterator_tag;
746 using difference_type = ptrdiff_t;
749 _Iterator(_U32_iterator __i)
752 if (__i != __i.end())
755 _M_prop = __grapheme_cluster_break_property(_M_c);
772 const auto __end = _M_base.end();
773 if (_M_base != __end)
775 auto __p_prev = _M_prop;
777 while (++__it != __end)
779 char32_t __c = *__it;
780 auto __p = __grapheme_cluster_break_property(*__it);
781 _M_update_xpicto_seq_state(__c, __p);
782 _M_update_ri_count(__p);
783 _M_update_incb_state(__c, __p);
784 if (_M_is_break(__p_prev, __p, __it))
806 operator==(
const _Iterator& __i)
const
807 {
return _M_base == __i._M_base; }
811 operator==(
const ranges::sentinel_t<_View>& __i)
const
812 {
return _M_base == __i; }
815 constexpr auto base()
const {
return _M_base.base(); }
818 constexpr auto end()
const {
return _M_base.end(); }
822 width() const noexcept
823 {
return __field_width(_M_c); }
826 _U32_iterator _M_base;
834 _M_is_break(_Gcb_property __p1, _Gcb_property __p2,
835 _U32_iterator __curr)
const
837 using enum _Gcb_property;
839 if (__p1 == _Gcb_Control || __p1 == _Gcb_LF)
843 return __p2 != _Gcb_LF;
846 if (__p2 == _Gcb_Control || __p2 == _Gcb_CR || __p2 == _Gcb_LF)
863 if (__p1 == _Gcb_LV || __p1 == _Gcb_V)
874 if (__p1 == _Gcb_LVT || __p1 == _Gcb_T)
875 return __p2 != _Gcb_T;
878 if (__p2 == _Gcb_Extend || __p2 == _Gcb_ZWJ)
885 if (__p2 == _Gcb_SpacingMark)
888 if (__p1 == _Gcb_Prepend)
894 if (_M_incb_linker_seen
895 && __incb_property(_M_c) == _InCB::_Consonant
896 && __incb_property(*__curr) == _InCB::_Consonant)
900 bool __have_linker =
false;
902 while (++__it != __curr)
904 if (__is_incb_linker(*__it))
905 __have_linker =
true;
908 auto __incb = __incb_property(*__it);
909 if (__incb == _InCB::_Consonant)
910 __have_linker =
false;
911 else if (__incb != _InCB::_Extend)
915 if (__it == __curr && __have_linker)
922 if (__p1 == _Gcb_ZWJ && _M_xpicto_seq_state == _XPicto::_Matched)
929 if (__p1 == _Gcb_property::_Gcb_Regional_Indicator && __p1 == __p2)
930 return (_M_RI_count & 1) == 0;
943 template<
typename _CharT>
945 __field_width(basic_string_view<_CharT> __s)
947 if (__s.empty()) [[unlikely]]
949 _Grapheme_cluster_view __gc(__s);
950 auto __it = __gc.begin();
951 const auto __end = __gc.end();
952 size_t __n = __it.width();
953 while (++__it != __end)
960 template<
typename _CharT>
962 __truncate(basic_string_view<_CharT>& __s,
size_t __max)
964 if (__s.empty()) [[unlikely]]
967 _Grapheme_cluster_view __gc(__s);
968 auto __it = __gc.begin();
969 const auto __end = __gc.end();
970 size_t __n = __it.width();
976 while (++__it != __end)
978 size_t __n2 = __n + __it.width();
981 __s = basic_string_view<_CharT>(__s.begin(), __it.base());
989 template<
typename _CharT>
991 __literal_encoding_is_unicode()
993 if constexpr (is_same_v<_CharT, char8_t>)
995 else if constexpr (is_same_v<_CharT, char16_t>)
997 else if constexpr (is_same_v<_CharT, char32_t>)
1000 const char* __enc =
"";
1002#ifdef __GNUC_EXECUTION_CHARSET_NAME
1003 auto __remove_iso10646_prefix = [](
const char* __s) {
1005 if (__s[0] ==
'I' || __s[0] ==
'i')
1006 if (__s[1] ==
'S' || __s[1] ==
's')
1007 if (__s[2] ==
'O' || __s[2] ==
'o')
1008 if (string_view(__s + 3).starts_with(
"-10646/"))
1013 if constexpr (is_same_v<_CharT, char>)
1014 __enc = __remove_iso10646_prefix(__GNUC_EXECUTION_CHARSET_NAME);
1015# if defined _GLIBCXX_USE_WCHAR_T && defined __GNUC_WIDE_EXECUTION_CHARSET_NAME
1017 __enc = __remove_iso10646_prefix(__GNUC_WIDE_EXECUTION_CHARSET_NAME);
1020 if ((__enc[0] ==
'U' || __enc[0] ==
'u')
1021 && (__enc[1] ==
'T' || __enc[1] ==
't')
1022 && (__enc[2] ==
'F' || __enc[2] ==
'f'))
1025 if (__enc[0] ==
'-')
1027 if (__enc[0] ==
'8')
1028 return __enc[1] ==
'\0' || string_view(__enc + 1) ==
"//";
1029 else if constexpr (!is_same_v<_CharT, char>)
1031 string_view __s(__enc);
1032 if (__s.ends_with(
"//"))
1033 __s.remove_suffix(2);
1034 return __s ==
"16" || __s ==
"32";
1037#elif defined __clang_literal_encoding__
1038 if constexpr (is_same_v<_CharT, char>)
1039 __enc = __clang_literal_encoding__;
1040# if defined _GLIBCXX_USE_WCHAR_T && defined __clang_wide_literal_encoding__
1042 __enc = __clang_wide_literal_encoding__;
1045 string_view __s(__enc);
1048 else if constexpr (!is_same_v<_CharT, char>)
1049 return __s ==
"UTF-16" || __s ==
"UTF-32";
1056 __literal_encoding_is_utf8()
1057 {
return __literal_encoding_is_unicode<char>(); }
1061_GLIBCXX_END_NAMESPACE_VERSION
constexpr std::remove_reference< _Tp >::type && move(_Tp &&__t) noexcept
Convert a value to an rvalue.
constexpr _Tp && forward(typename std::remove_reference< _Tp >::type &__t) noexcept
Forward an lvalue.
_Tp * end(valarray< _Tp > &__va) noexcept
Return an iterator pointing to one past the last element of the valarray.
ISO C++ entities toplevel namespace is std.
constexpr void advance(_InputIterator &__i, _Distance __n)
A generalization of pointer arithmetic.
__numeric_traits_integer< _Tp > __int_traits
Convenience alias for __numeric_traits<integer-type>.