7 #ifndef __UTFITERATOR_H__
8 #define __UTFITERATOR_H__
12 #if U_SHOW_CPLUSPLUS_API || U_SHOW_CPLUSPLUS_HEADER_API || !defined(UTYPES_H)
15 #if defined(__cpp_lib_ranges)
19 #include <string_view>
20 #include <type_traits>
135 #ifndef U_HIDE_DRAFT_API
170 namespace U_HEADER_ONLY_NAMESPACE {
173 #if U_CPLUSPLUS_VERSION >= 20
176 template<
typename Iter>
180 template<
typename Iter>
184 template<
typename Iter>
188 template<
typename Iter>
192 template<
typename Range>
193 constexpr
bool range = std::ranges::range<Range>;
198 template<
typename Iter>
202 template<
typename Iter>
206 template<
typename Iter>
209 std::forward_iterator_tag,
210 typename std::iterator_traits<Iter>::iterator_category>;
213 template<
typename Iter>
216 std::bidirectional_iterator_tag,
217 typename std::iterator_traits<Iter>::iterator_category>;
220 template<
typename Range,
typename =
void>
224 template<
typename Range>
227 std::void_t<decltype(std::declval<Range>().begin()),
228 decltype(std::declval<Range>().end())>> : std::true_type {};
231 template<
typename Range>
240 template <
typename... Args>
247 template<
typename CP32,
bool skipSurrogates>
249 static_assert(
sizeof(CP32) == 4,
"CP32 must be a 32-bit type to hold a code point");
273 if (skipSurrogates && c_ == 0xd800) {
301 template<
typename CP32>
303 static_assert(
sizeof(CP32) == 4,
"CP32 must be a 32-bit type to hold a code point");
332 template<
typename CP32>
334 static_assert(
sizeof(CP32) == 4,
"CP32 must be a 32-bit type to hold a code point");
366 template<
typename CP32,
typename UnitIter,
typename =
void>
368 static_assert(
sizeof(CP32) == 4,
"CP32 must be a 32-bit type to hold a code point");
394 UnitIter
begin()
const {
return start_; }
401 UnitIter
end()
const {
return limit_; }
409 #if U_CPLUSPLUS_VERSION >= 20
415 template<std::contiguous_iterator Iter = UnitIter>
416 std::basic_string_view<Unit>
stringView()
const {
417 return std::basic_string_view<Unit>(
begin(),
end());
425 template<typename Iter = UnitIter, typename Unit = typename std::iterator_traits<Iter>::value_type>
426 std::enable_if_t<std::is_pointer_v<Iter> ||
427 std::is_same_v<Iter, typename std::basic_string<Unit>::iterator> ||
428 std::is_same_v<Iter, typename std::basic_string<Unit>::const_iterator> ||
429 std::is_same_v<Iter, typename std::basic_string_view<Unit>::iterator> ||
430 std::is_same_v<Iter, typename std::basic_string_view<Unit>::const_iterator>,
431 std::basic_string_view<Unit>>
433 return std::basic_string_view<Unit>(&*start_, len_);
448 template<
typename CP32,
typename UnitIter>
449 class UnsafeCodeUnits<
452 std::enable_if_t<!prv::forward_iterator<UnitIter>>> {
453 static_assert(
sizeof(CP32) == 4,
"CP32 must be a 32-bit type to hold a code point");
462 uint8_t
length()
const {
return len_; }
486 template<
typename CP32,
typename UnitIter,
typename =
void>
511 template<
typename CP32,
typename UnitIter>
515 std::enable_if_t<!prv::forward_iterator<UnitIter>>> :
516 public UnsafeCodeUnits<CP32, UnitIter> {
535 typename UnitIter,
typename LimitIter = UnitIter,
typename =
void>
549 template<
typename CP32, UTFIllFormedBehavior behavior,
typename UnitIter,
typename LimitIter>
553 std::enable_if_t<sizeof(typename prv::iter_value_t<UnitIter>) == 1>> {
554 static_assert(
sizeof(CP32) == 4,
"CP32 must be a 32-bit type to hold a code point");
556 "For 8-bit strings, the SURROGATE option does not have an equivalent.");
566 U_FORCE_INLINE static void inc(UnitIter &p,
const LimitIter &limit) {
572 if ((0xe0 <= b && b < 0xf0)) {
577 }
else if (b < 0xe0) {
607 if (0xe0 <= b2 && b2 <= 0xf4) {
626 UnitIter &p0, UnitIter &p,
const LimitIter &limit) {
627 constexpr
bool isMultiPass = prv::forward_iterator<UnitIter>;
629 CP32 c = uint8_t(*p);
632 if constexpr (isMultiPass) {
633 return {c, 1,
true, p0, p};
649 (c = (c << 6) | (t & 0x3f), ++length, ++p != limit) &&
650 (t = *p - 0x80) <= 0x3f) &&
652 (c = (c << 6) | t, ++length, ++p != limit)
654 c >= 0xc2 && (c &= 0x1f, 1)) &&
656 (t = *p - 0x80) <= 0x3f) {
660 if constexpr (isMultiPass) {
661 return {c, length,
true, p0, p};
663 return {c, length,
true};
666 if constexpr (isMultiPass) {
667 return {sub(), length,
false, p0, p};
669 return {sub(), length,
false};
673 U_FORCE_INLINE static CodeUnits<CP32, UnitIter> decAndRead(UnitIter start, UnitIter &p) {
676 CP32 c = uint8_t(*--p);
678 return {c, 1,
true, p, p0};
686 c = ((b1 - 0xc0) << 6) | (c & 0x3f);
687 return {c, 2,
true, p, p0};
688 }
else if (b1 < 0xf0 ?
693 return {sub(), 2,
false, p, p0};
699 if (0xe0 <= b2 && b2 <= 0xf4) {
704 c = (b2 << 12) | ((b1 & 0x3f) << 6) | c;
705 return {c, 3,
true, p, p0};
710 return {sub(), 3,
false, p, p0};
714 if (0xf0 <= b3 && b3 <= 0xf4) {
718 c = (b3 << 18) | ((b2 & 0x3f) << 12) | ((b1 & 0x3f) << 6) | c;
719 return {c, 4,
true, p, p0};
725 return {sub(), 1,
false, p, p0};
730 template<
typename CP32, UTFIllFormedBehavior behavior,
typename UnitIter,
typename LimitIter>
734 std::enable_if_t<sizeof(typename prv::iter_value_t<UnitIter>) == 2>> {
735 static_assert(
sizeof(CP32) == 4,
"CP32 must be a 32-bit type to hold a code point");
746 U_FORCE_INLINE static void inc(UnitIter &p,
const LimitIter &limit) {
764 UnitIter &p0, UnitIter &p,
const LimitIter &limit) {
765 constexpr
bool isMultiPass = prv::forward_iterator<UnitIter>;
767 CP32 c =
static_cast<CP32
>(*p);
770 if constexpr (isMultiPass) {
771 return {c, 1,
true, p0, p};
780 if constexpr (isMultiPass) {
781 return {c, 2,
true, p0, p};
786 if constexpr (isMultiPass) {
787 return {sub(c), 1,
false, p0, p};
789 return {sub(c), 1,
false};
795 U_FORCE_INLINE static CodeUnits<CP32, UnitIter> decAndRead(UnitIter start, UnitIter &p) {
798 CP32 c =
static_cast<CP32
>(*--p);
800 return {c, 1,
true, p, p0};
807 return {c, 2,
true, p, p0};
809 return {sub(c), 1,
false, p, p0};
816 template<
typename CP32, UTFIllFormedBehavior behavior,
typename UnitIter,
typename LimitIter>
820 std::enable_if_t<sizeof(typename prv::iter_value_t<UnitIter>) == 4>> {
821 static_assert(
sizeof(CP32) == 4,
"CP32 must be a 32-bit type to hold a code point");
824 U_FORCE_INLINE static CP32 sub(
bool forSurrogate, CP32 surrogate) {
841 UnitIter &p0, UnitIter &p,
const LimitIter &) {
842 constexpr
bool isMultiPass = prv::forward_iterator<UnitIter>;
846 if (uc < 0xd800 || (0xe000 <= uc && uc <= 0x10ffff)) {
847 if constexpr (isMultiPass) {
848 return {c, 1,
true, p0, p};
853 if constexpr (isMultiPass) {
854 return {sub(uc < 0xe000, c), 1,
false, p0, p};
856 return {sub(uc < 0xe000, c), 1,
false};
861 U_FORCE_INLINE static CodeUnits<CP32, UnitIter> decAndRead(UnitIter , UnitIter &p) {
865 if (uc < 0xd800 || (0xe000 <= uc && uc <= 0x10ffff)) {
866 return {c, 1,
true, p, p0};
868 return {sub(uc < 0xe000, c), 1,
false, p, p0};
875 template<
typename CP32,
typename UnitIter,
typename =
void>
879 template<
typename CP32,
typename UnitIter>
883 std::enable_if_t<sizeof(typename prv::iter_value_t<UnitIter>) == 1>> {
884 static_assert(
sizeof(CP32) == 4,
"CP32 must be a 32-bit type to hold a code point");
897 U_FORCE_INLINE static UnsafeCodeUnits<CP32, UnitIter> readAndInc(UnitIter &p0, UnitIter &p) {
898 constexpr
bool isMultiPass = prv::forward_iterator<UnitIter>;
900 CP32 c = uint8_t(*p);
903 if constexpr (isMultiPass) {
904 return {c, 1, p0, p};
908 }
else if (c < 0xe0) {
909 c = ((c & 0x1f) << 6) | (*p & 0x3f);
911 if constexpr (isMultiPass) {
912 return {c, 2, p0, p};
916 }
else if (c < 0xf0) {
919 c = uint16_t(c << 12) | ((*p & 0x3f) << 6);
923 if constexpr (isMultiPass) {
924 return {c, 3, p0, p};
929 c = ((c & 7) << 18) | ((*p & 0x3f) << 12);
931 c |= (*p & 0x3f) << 6;
935 if constexpr (isMultiPass) {
936 return {c, 4, p0, p};
943 U_FORCE_INLINE static UnsafeCodeUnits<CP32, UnitIter> decAndRead(UnitIter &p) {
946 CP32 c = uint8_t(*--p);
948 return {c, 1, p, p0};
953 for (uint8_t shift = 6;;) {
957 c |= uint32_t{b} << shift;
960 c |= (uint32_t{b} & 0x3f) << shift;
966 return {c, count, p, p0};
971 template<
typename CP32,
typename UnitIter>
975 std::enable_if_t<sizeof(typename prv::iter_value_t<UnitIter>) == 2>> {
976 static_assert(
sizeof(CP32) == 4,
"CP32 must be a 32-bit type to hold a code point");
994 U_FORCE_INLINE static UnsafeCodeUnits<CP32, UnitIter> readAndInc(UnitIter &p0, UnitIter &p) {
995 constexpr
bool isMultiPass = prv::forward_iterator<UnitIter>;
997 CP32 c =
static_cast<CP32
>(*p);
1000 if constexpr (isMultiPass) {
1001 return {c, 1, p0, p};
1009 if constexpr (isMultiPass) {
1010 return {c, 2, p0, p};
1017 U_FORCE_INLINE static UnsafeCodeUnits<CP32, UnitIter> decAndRead(UnitIter &p) {
1020 CP32 c =
static_cast<CP32
>(*--p);
1022 return {c, 1, p, p0};
1026 return {c, 2, p, p0};
1032 template<
typename CP32,
typename UnitIter>
1033 class UnsafeUTFImpl<
1036 std::enable_if_t<sizeof(typename prv::iter_value_t<UnitIter>) == 4>> {
1037 static_assert(
sizeof(CP32) == 4,
"CP32 must be a 32-bit type to hold a code point");
1047 U_FORCE_INLINE static UnsafeCodeUnits<CP32, UnitIter> readAndInc(UnitIter &p0, UnitIter &p) {
1048 constexpr
bool isMultiPass = prv::forward_iterator<UnitIter>;
1051 if constexpr (isMultiPass) {
1052 return {c, 1, p0, p};
1058 U_FORCE_INLINE static UnsafeCodeUnits<CP32, UnitIter> decAndRead(UnitIter &p) {
1061 return {c, 1, p, p0};
1093 typename UnitIter,
typename LimitIter = UnitIter,
typename =
void>
1095 static_assert(
sizeof(CP32) == 4,
"CP32 must be a 32-bit type to hold a code point");
1096 using Impl = UTFImpl<CP32, behavior, UnitIter, LimitIter>;
1120 prv::bidirectional_iterator<UnitIter>,
1121 std::bidirectional_iterator_tag,
1122 std::forward_iterator_tag>;
1138 p_(p), start_(start), limit_(limit), units_(0, 0, false, p, p) {}
1151 p_(p), start_(p), limit_(limit), units_(0, 0, false, p, p) {}
1187 return getLogicalPosition() == other.getLogicalPosition();
1206 !std::is_same_v<Sentinel, UTFIterator> && !std::is_same_v<Sentinel, UnitIter>,
1209 return iter.getLogicalPosition() == s;
1212 #if U_CPLUSPLUS_VERSION < 20
1225 !std::is_same_v<Sentinel, UTFIterator> && !std::is_same_v<Sentinel, UnitIter>,
1228 return iter.getLogicalPosition() == s;
1238 !std::is_same_v<Sentinel, UTFIterator> && !std::is_same_v<Sentinel, UnitIter>,
1249 !std::is_same_v<Sentinel, UTFIterator> && !std::is_same_v<Sentinel, UnitIter>,
1263 units_ = Impl::readAndInc(p0, p_, limit_);
1280 units_ = Impl::readAndInc(p0, p_, limit_);
1283 return Proxy(units_);
1296 }
else if (state_ == 0) {
1297 Impl::inc(p_, limit_);
1320 }
else if (state_ == 0) {
1322 units_ = Impl::readAndInc(p0, p_, limit_);
1343 template<
typename Iter = UnitIter>
1345 std::enable_if_t<prv::bidirectional_iterator<Iter>,
UTFIterator &>
1349 p_ = units_.begin();
1351 units_ = Impl::decAndRead(start_, p_);
1363 template<
typename Iter = UnitIter>
1365 std::enable_if_t<prv::bidirectional_iterator<Iter>,
UTFIterator>
1373 friend class std::reverse_iterator<
UTFIterator<CP32, behavior, UnitIter>>;
1376 return state_ <= 0 ? p_ : units_.begin();
1380 mutable UnitIter p_;
1387 mutable CodeUnits<CP32, UnitIter> units_;
1392 mutable int8_t state_ = 0;
1395 #ifndef U_IN_DOXYGEN
1397 template<
typename CP32, UTFIllFormedBehavior behavior,
typename UnitIter,
typename LimitIter>
1400 UnitIter, LimitIter,
1401 std::enable_if_t<!prv::forward_iterator<UnitIter>>> {
1402 static_assert(
sizeof(CP32) == 4,
"CP32 must be a 32-bit type to hold a code point");
1403 using Impl = UTFImpl<CP32, behavior, UnitIter, LimitIter>;
1410 explicit Proxy(CodeUnits<CP32, UnitIter> &units) : units_(units) {}
1411 CodeUnits<CP32, UnitIter> &
operator*() {
return units_; }
1412 CodeUnits<CP32, UnitIter> *
operator->() {
return &units_; }
1414 CodeUnits<CP32, UnitIter> units_;
1418 using value_type = CodeUnits<CP32, UnitIter>;
1437 return p_ == other.p_ && ahead_ == other.ahead_;
1445 !std::is_same_v<Sentinel, UTFIterator> && !std::is_same_v<Sentinel, UnitIter>,
1448 return !iter.ahead_ && iter.p_ == s;
1451 #if U_CPLUSPLUS_VERSION < 20
1454 !std::is_same_v<Sentinel, UTFIterator> && !std::is_same_v<Sentinel, UnitIter>,
1457 return !iter.ahead_ && iter.p_ == s;
1462 !std::is_same_v<Sentinel, UTFIterator> && !std::is_same_v<Sentinel, UnitIter>,
1468 !std::is_same_v<Sentinel, UTFIterator> && !std::is_same_v<Sentinel, UnitIter>,
1475 units_ = Impl::readAndInc(p_, p_, limit_);
1483 units_ = Impl::readAndInc(p_, p_, limit_);
1486 return Proxy(units_);
1494 Impl::inc(p_, limit_);
1504 units_ = Impl::readAndInc(p_, p_, limit_);
1507 return Proxy(units_);
1512 mutable UnitIter p_;
1518 mutable CodeUnits<CP32, UnitIter> units_ = {0, 0,
false};
1522 mutable bool ahead_ =
false;
1528 #ifndef U_IN_DOXYGEN
1532 template<
typename CP32, UTFIllFormedBehavior behavior,
typename UnitIter>
1533 class std::reverse_iterator<U_HEADER_ONLY_NAMESPACE::UTFIterator<CP32, behavior, UnitIter>> {
1534 static_assert(
sizeof(CP32) == 4,
"CP32 must be a 32-bit type to hold a code point");
1535 using Impl = U_HEADER_ONLY_NAMESPACE::UTFImpl<CP32, behavior, UnitIter>;
1542 explicit Proxy(CodeUnits_ units) : units_(units) {}
1543 CodeUnits_ &operator*() {
return units_; }
1544 CodeUnits_ *operator->() {
return &units_; }
1550 using value_type = CodeUnits_;
1551 using reference = value_type;
1552 using pointer = Proxy;
1554 using iterator_category = std::bidirectional_iterator_tag;
1557 p_(iter.getLogicalPosition()), start_(iter.start_), limit_(iter.limit_),
1558 units_(0, 0, false, p_, p_) {}
1559 U_FORCE_INLINE reverse_iterator() : p_{}, start_{}, limit_{}, units_(0, 0, false, p_, p_) {}
1561 U_FORCE_INLINE reverse_iterator(reverse_iterator &&src) noexcept =
default;
1562 U_FORCE_INLINE reverse_iterator &operator=(reverse_iterator &&src) noexcept =
default;
1564 U_FORCE_INLINE reverse_iterator(
const reverse_iterator &other) =
default;
1565 U_FORCE_INLINE reverse_iterator &operator=(
const reverse_iterator &other) =
default;
1568 return getLogicalPosition() == other.getLogicalPosition();
1574 units_ = Impl::decAndRead(start_, p_);
1582 units_ = Impl::decAndRead(start_, p_);
1585 return Proxy(units_);
1592 }
else if (state_ == 0) {
1593 Impl::dec(start_, p_);
1596 p_ = units_.begin();
1605 reverse_iterator result(*
this);
1608 }
else if (state_ == 0) {
1609 units_ = Impl::decAndRead(start_, p_);
1610 reverse_iterator result(*
this);
1615 reverse_iterator result(*
this);
1617 p_ = units_.begin();
1629 units_ = Impl::readAndInc(p0, p_, limit_);
1635 reverse_iterator result(*
this);
1642 return state_ >= 0 ? p_ : units_.end();
1646 mutable UnitIter p_;
1653 mutable CodeUnits_ units_;
1658 mutable int8_t state_ = 0;
1662 namespace U_HEADER_ONLY_NAMESPACE {
1687 typename UnitIter,
typename LimitIter = UnitIter>
1690 std::move(start), std::move(p), std::move(limit));
1714 typename UnitIter,
typename LimitIter = UnitIter>
1717 std::move(p), std::move(limit));
1744 template<
typename CP32, UTFIllFormedBehavior behavior,
typename UnitIter>
1776 template<
typename CP32, UTFIllFormedBehavior behavior,
typename Range>
1778 static_assert(
sizeof(CP32) == 4,
"CP32 must be a 32-bit type to hold a code point");
1791 template<
typename R = Range,
typename = std::enable_if_t<!std::is_reference_v<R>>>
1801 template<
typename R = Range,
typename = std::enable_if_t<std::is_reference_v<R>>,
typename =
void>
1815 return utfIterator<CP32, behavior>(unitRange.begin(), unitRange.end());
1822 template<
typename R = Range,
typename = std::enable_if_t<prv::range<const R>>>
1824 return utfIterator<CP32, behavior>(unitRange.begin(), unitRange.end());
1832 using UnitIter = decltype(unitRange.begin());
1833 using LimitIter = decltype(unitRange.end());
1834 if constexpr (!std::is_same_v<UnitIter, LimitIter>) {
1836 return unitRange.end();
1837 }
else if constexpr (prv::bidirectional_iterator<UnitIter>) {
1838 return utfIterator<CP32, behavior>(unitRange.begin(), unitRange.end(), unitRange.end());
1841 return utfIterator<CP32, behavior>(unitRange.end(), unitRange.end());
1849 template<
typename R = Range,
typename = std::enable_if_t<prv::range<const R>>>
1851 using UnitIter = decltype(unitRange.begin());
1852 using LimitIter = decltype(unitRange.end());
1853 if constexpr (!std::is_same_v<UnitIter, LimitIter>) {
1855 return unitRange.end();
1856 }
else if constexpr (prv::bidirectional_iterator<UnitIter>) {
1857 return utfIterator<CP32, behavior>(unitRange.begin(), unitRange.end(), unitRange.end());
1860 return utfIterator<CP32, behavior>(unitRange.end(), unitRange.end());
1869 return std::make_reverse_iterator(
end());
1877 return std::make_reverse_iterator(
begin());
1885 template<
typename CP32, UTFIllFormedBehavior behavior>
1888 __cpp_lib_bind_back >= 2022'02
1889 : std::ranges::range_adaptor_closure<UTFStringCodePointsAdaptor<CP32, behavior>>
1893 template<
typename Range>
1895 #if defined(__cpp_lib_ranges) && __cpp_lib_ranges >= 2021'10
1897 std::forward<Range>(unitRange));
1899 if constexpr (prv::is_basic_string_view_v<std::decay_t<Range>>) {
1903 std::forward<Range>(unitRange));
1925 template<
typename CP32, UTFIllFormedBehavior behavior>
1951 template<
typename CP32,
typename UnitIter,
typename =
void>
1953 static_assert(
sizeof(CP32) == 4,
"CP32 must be a 32-bit type to hold a code point");
1954 using Impl = UnsafeUTFImpl<CP32, UnitIter>;
1978 prv::bidirectional_iterator<UnitIter>,
1979 std::bidirectional_iterator_tag,
1980 std::forward_iterator_tag>;
2015 return getLogicalPosition() == other.getLogicalPosition();
2032 !std::is_same_v<Sentinel, UnsafeUTFIterator> && !std::is_same_v<Sentinel, UnitIter>,
2035 return iter.getLogicalPosition() == s;
2038 #if U_CPLUSPLUS_VERSION < 20
2047 !std::is_same_v<Sentinel, UnsafeUTFIterator> && !std::is_same_v<Sentinel, UnitIter>,
2050 return iter.getLogicalPosition() == s;
2060 !std::is_same_v<Sentinel, UnsafeUTFIterator> && !std::is_same_v<Sentinel, UnitIter>,
2071 !std::is_same_v<Sentinel, UnsafeUTFIterator> && !std::is_same_v<Sentinel, UnitIter>,
2085 units_ = Impl::readAndInc(p0, p_);
2102 units_ = Impl::readAndInc(p0, p_);
2105 return Proxy(units_);
2118 }
else if (state_ == 0) {
2142 }
else if (state_ == 0) {
2144 units_ = Impl::readAndInc(p0, p_);
2165 template<
typename Iter = UnitIter>
2171 p_ = units_.begin();
2173 units_ = Impl::decAndRead(p_);
2185 template<
typename Iter = UnitIter>
2198 return state_ <= 0 ? p_ : units_.begin();
2202 mutable UnitIter p_;
2205 mutable UnsafeCodeUnits<CP32, UnitIter> units_;
2210 mutable int8_t state_ = 0;
2213 #ifndef U_IN_DOXYGEN
2215 template<
typename CP32,
typename UnitIter>
2216 class UnsafeUTFIterator<
2219 std::enable_if_t<!prv::forward_iterator<UnitIter>>> {
2220 static_assert(
sizeof(CP32) == 4,
"CP32 must be a 32-bit type to hold a code point");
2221 using Impl = UnsafeUTFImpl<CP32, UnitIter>;
2228 explicit Proxy(UnsafeCodeUnits<CP32, UnitIter> &units) : units_(units) {}
2229 UnsafeCodeUnits<CP32, UnitIter> &
operator*() {
return units_; }
2230 UnsafeCodeUnits<CP32, UnitIter> *
operator->() {
return &units_; }
2232 UnsafeCodeUnits<CP32, UnitIter> units_;
2236 using value_type = UnsafeCodeUnits<CP32, UnitIter>;
2251 return p_ == other.p_ && ahead_ == other.ahead_;
2259 !std::is_same_v<Sentinel, UnsafeUTFIterator> && !std::is_same_v<Sentinel, UnitIter>,
2262 return !iter.ahead_ && iter.p_ == s;
2265 #if U_CPLUSPLUS_VERSION < 20
2268 !std::is_same_v<Sentinel, UnsafeUTFIterator> && !std::is_same_v<Sentinel, UnitIter>,
2271 return !iter.ahead_ && iter.p_ == s;
2276 !std::is_same_v<Sentinel, UnsafeUTFIterator> && !std::is_same_v<Sentinel, UnitIter>,
2282 !std::is_same_v<Sentinel, UnsafeUTFIterator> && !std::is_same_v<Sentinel, UnitIter>,
2289 units_ = Impl::readAndInc(p_, p_);
2297 units_ = Impl::readAndInc(p_, p_);
2300 return Proxy(units_);
2318 units_ = Impl::readAndInc(p_, p_);
2321 return Proxy(units_);
2326 mutable UnitIter p_;
2329 mutable UnsafeCodeUnits<CP32, UnitIter> units_ = {0, 0};
2333 mutable bool ahead_ =
false;
2339 #ifndef U_IN_DOXYGEN
2343 template<
typename CP32,
typename UnitIter>
2344 class std::reverse_iterator<U_HEADER_ONLY_NAMESPACE::UnsafeUTFIterator<CP32, UnitIter>> {
2345 static_assert(
sizeof(CP32) == 4,
"CP32 must be a 32-bit type to hold a code point");
2346 using Impl = U_HEADER_ONLY_NAMESPACE::UnsafeUTFImpl<CP32, UnitIter>;
2353 explicit Proxy(UnsafeCodeUnits_ units) : units_(units) {}
2354 UnsafeCodeUnits_ &operator*() {
return units_; }
2355 UnsafeCodeUnits_ *operator->() {
return &units_; }
2357 UnsafeCodeUnits_ units_;
2361 using value_type = UnsafeCodeUnits_;
2362 using reference = value_type;
2363 using pointer = Proxy;
2365 using iterator_category = std::bidirectional_iterator_tag;
2368 p_(iter.getLogicalPosition()), units_(0, 0, p_, p_) {}
2369 U_FORCE_INLINE reverse_iterator() : p_{}, units_(0, 0, p_, p_) {}
2371 U_FORCE_INLINE reverse_iterator(reverse_iterator &&src) noexcept =
default;
2372 U_FORCE_INLINE reverse_iterator &operator=(reverse_iterator &&src) noexcept =
default;
2374 U_FORCE_INLINE reverse_iterator(
const reverse_iterator &other) =
default;
2375 U_FORCE_INLINE reverse_iterator &operator=(
const reverse_iterator &other) =
default;
2378 return getLogicalPosition() == other.getLogicalPosition();
2384 units_ = Impl::decAndRead(p_);
2392 units_ = Impl::decAndRead(p_);
2395 return Proxy(units_);
2402 }
else if (state_ == 0) {
2406 p_ = units_.begin();
2415 reverse_iterator result(*
this);
2418 }
else if (state_ == 0) {
2419 units_ = Impl::decAndRead(p_);
2420 reverse_iterator result(*
this);
2425 reverse_iterator result(*
this);
2427 p_ = units_.begin();
2439 units_ = Impl::readAndInc(p0, p_);
2445 reverse_iterator result(*
this);
2452 return state_ >= 0 ? p_ : units_.end();
2456 mutable UnitIter p_;
2459 mutable UnsafeCodeUnits_ units_;
2464 mutable int8_t state_ = 0;
2468 namespace U_HEADER_ONLY_NAMESPACE {
2485 template<
typename CP32,
typename UnitIter>
2517 template<
typename CP32,
typename Range>
2519 static_assert(
sizeof(CP32) == 4,
"CP32 must be a 32-bit type to hold a code point");
2532 template<
typename R = Range,
typename = std::enable_if_t<!std::is_reference_v<R>>>
2542 template<
typename R = Range,
typename = std::enable_if_t<std::is_reference_v<R>>,
typename =
void>
2556 return unsafeUTFIterator<CP32>(unitRange.begin());
2563 template<
typename R = Range,
typename = std::enable_if_t<prv::range<const R>>>
2565 return unsafeUTFIterator<CP32>(unitRange.begin());
2573 using UnitIter = decltype(unitRange.begin());
2574 using LimitIter = decltype(unitRange.end());
2575 if constexpr (!std::is_same_v<UnitIter, LimitIter>) {
2577 return unitRange.end();
2579 return unsafeUTFIterator<CP32>(unitRange.end());
2587 template<
typename R = Range,
typename = std::enable_if_t<prv::range<const R>>>
2589 using UnitIter = decltype(unitRange.begin());
2590 using LimitIter = decltype(unitRange.end());
2591 if constexpr (!std::is_same_v<UnitIter, LimitIter>) {
2593 return unitRange.end();
2595 return unsafeUTFIterator<CP32>(unitRange.end());
2604 return std::make_reverse_iterator(
end());
2612 return std::make_reverse_iterator(
begin());
2620 template<
typename CP32>
2623 __cpp_lib_bind_back >= 2022'02
2624 : std::ranges::range_adaptor_closure<UnsafeUTFStringCodePointsAdaptor<CP32>>
2628 template<
typename Range>
2630 #if defined(__cpp_lib_ranges) && __cpp_lib_ranges >= 2021'10
2633 if constexpr (prv::is_basic_string_view_v<std::decay_t<Range>>) {
2657 template<
typename CP32>
2663 #if defined(__cpp_lib_ranges)
2664 template <
typename CP32, UTFIllFormedBehavior behavior,
typename Range>
2665 constexpr
bool std::ranges::enable_borrowed_range<
2667 std::ranges::enable_borrowed_range<Range>;
2669 template <
typename CP32,
typename Range>
2670 constexpr
bool std::ranges::enable_borrowed_range<
2672 std::ranges::enable_borrowed_range<Range>;
U_COMMON_API UBool operator==(const StringPiece &x, const StringPiece &y)
Global operator == for StringPiece.
bool operator!=(const StringPiece &x, const StringPiece &y)
Global operator != for StringPiece.
#define U_SENTINEL
This value is intended for sentinel values for APIs that (take or) return single code points (UChar32...
#define U_FORCE_INLINE
Forces function inlining on compilers that are known to support it.
C API: 16-bit Unicode handling macros.
#define U16_IS_SURROGATE_TRAIL(c)
Assuming c is a surrogate code point (U16_IS_SURROGATE(c)), is it a trail surrogate?
#define U16_IS_SURROGATE_LEAD(c)
Assuming c is a surrogate code point (U16_IS_SURROGATE(c)), is it a lead surrogate?
#define U16_GET_SUPPLEMENTARY(lead, trail)
Get a supplementary code point value (U+10000..U+10ffff) from its lead and trail surrogates.
#define U16_IS_SURROGATE(c)
Is this code unit a surrogate (U+d800..U+dfff)?
#define U16_IS_LEAD(c)
Is this code unit a lead surrogate (U+d800..U+dbff)?
#define U16_IS_TRAIL(c)
Is this code unit a trail surrogate (U+dc00..U+dfff)?
C API: 8-bit Unicode handling macros.
#define U8_COUNT_TRAIL_BYTES_UNSAFE(leadByte)
Counts the trail bytes for a UTF-8 lead byte of a valid UTF-8 sequence.
#define U8_IS_VALID_LEAD3_AND_T1(lead, t1)
Internal 3-byte UTF-8 validity check.
#define U8_IS_VALID_LEAD4_AND_T1(lead, t1)
Internal 4-byte UTF-8 validity check.
#define U8_IS_SINGLE(c)
Does this code unit (byte) encode a code point by itself (US-ASCII 0..0x7f)?
#define U8_LEAD3_T1_BITS
Internal bit vector for 3-byte UTF-8 validity check, for use in U8_IS_VALID_LEAD3_AND_T1.
#define U8_LEAD4_T1_BITS
Internal bit vector for 4-byte UTF-8 validity check, for use in U8_IS_VALID_LEAD4_AND_T1.
#define U8_IS_LEAD(c)
Is this code unit (byte) a UTF-8 lead byte? (0xC2..0xF4)
#define U8_MASK_LEAD_BYTE(leadByte, countTrailBytes)
Mask a UTF-8 lead byte, leave only the lower bits that form part of the code point value.
#define U8_IS_TRAIL(c)
Is this code unit (byte) a UTF-8 trail byte? (0x80..0xBF)
auto unsafeUTFIterator(UnitIter iter)
UnsafeUTFIterator factory function.
typename std::iterator_traits< Iter >::difference_type iter_difference_t
constexpr bool is_basic_string_view_v
constexpr bool forward_iterator
auto utfIterator(UnitIter start, UnitIter p, LimitIter limit)
UTFIterator factory function for start <= p < limit.
constexpr UTFStringCodePointsAdaptor< CP32, behavior > utfStringCodePoints
Range adaptor function object returning a UTFStringCodePoints object that represents a "range" of cod...
typename std::iterator_traits< Iter >::value_type iter_value_t
constexpr bool bidirectional_iterator
constexpr UnsafeUTFStringCodePointsAdaptor< CP32 > unsafeUTFStringCodePoints
Range adaptor function object returning an UnsafeUTFStringCodePoints object that represents a "range"...
UTFIllFormedBehavior
Some defined behaviors for handling ill-formed Unicode strings.
@ UTF_BEHAVIOR_FFFD
Returns U+FFFD Replacement Character.
@ UTF_BEHAVIOR_SURROGATE
UTF-8: Not allowed; UTF-16: returns the unpaired surrogate; UTF-32: returns the surrogate code point,...
@ UTF_BEHAVIOR_NEGATIVE
Returns a negative value (-1=U_SENTINEL) instead of a code point.
Basic definitions for ICU, for both C and C++ APIs.
C API: API for accessing ICU version numbers.