7 #ifndef __UTFITERATOR_H__
8 #define __UTFITERATOR_H__
12 #if U_SHOW_CPLUSPLUS_API || U_SHOW_CPLUSPLUS_HEADER_API || !defined(UTYPES_H)
15 #if defined(__cpp_lib_ranges)
19 #include <string_view>
20 #include <type_traits>
135 #ifndef U_HIDE_DRAFT_API
170 namespace U_HEADER_ONLY_NAMESPACE {
173 #if U_CPLUSPLUS_VERSION >= 20
176 template<
typename Iter>
180 template<
typename Iter>
184 template<
typename Iter>
188 template<
typename Iter>
192 template<
typename Range>
193 constexpr
bool range = std::ranges::range<Range>;
198 template<
typename Iter>
202 template<
typename Iter>
206 template<
typename Iter>
209 std::forward_iterator_tag,
210 typename std::iterator_traits<Iter>::iterator_category>;
213 template<
typename Iter>
216 std::bidirectional_iterator_tag,
217 typename std::iterator_traits<Iter>::iterator_category>;
220 template<
typename Range,
typename =
void>
224 template<
typename Range>
227 std::void_t<decltype(std::declval<Range>().begin()),
228 decltype(std::declval<Range>().end())>> : std::true_type {};
231 template<
typename Range>
240 template <
typename... Args>
247 template<
typename CP32,
bool skipSurrogates>
249 static_assert(
sizeof(CP32) == 4,
"CP32 must be a 32-bit type to hold a code point");
273 if (skipSurrogates && c_ == 0xd800) {
301 template<
typename CP32>
303 static_assert(
sizeof(CP32) == 4,
"CP32 must be a 32-bit type to hold a code point");
332 template<
typename CP32>
334 static_assert(
sizeof(CP32) == 4,
"CP32 must be a 32-bit type to hold a code point");
366 template<
typename CP32,
typename UnitIter,
typename =
void>
368 static_assert(
sizeof(CP32) == 4,
"CP32 must be a 32-bit type to hold a code point");
394 UnitIter
begin()
const {
return start_; }
401 UnitIter
end()
const {
return limit_; }
409 #if U_CPLUSPLUS_VERSION >= 20
415 template<std::contiguous_iterator Iter = UnitIter>
416 std::basic_string_view<Unit>
stringView()
const {
417 return std::basic_string_view<Unit>(
begin(),
end());
425 template<typename Iter = UnitIter, typename Unit = typename std::iterator_traits<Iter>::value_type>
426 std::enable_if_t<std::is_pointer_v<Iter> ||
427 std::is_same_v<Iter, typename std::basic_string<Unit>::iterator> ||
428 std::is_same_v<Iter, typename std::basic_string<Unit>::const_iterator> ||
429 std::is_same_v<Iter, typename std::basic_string_view<Unit>::iterator> ||
430 std::is_same_v<Iter, typename std::basic_string_view<Unit>::const_iterator>,
431 std::basic_string_view<Unit>>
433 return std::basic_string_view<Unit>(&*start_, len_);
448 template<
typename CP32,
typename UnitIter>
449 class UnsafeCodeUnits<
452 std::enable_if_t<!prv::forward_iterator<UnitIter>>> {
453 static_assert(
sizeof(CP32) == 4,
"CP32 must be a 32-bit type to hold a code point");
462 uint8_t
length()
const {
return len_; }
486 template<
typename CP32,
typename UnitIter,
typename =
void>
511 template<
typename CP32,
typename UnitIter>
515 std::enable_if_t<!prv::forward_iterator<UnitIter>>> :
516 public UnsafeCodeUnits<CP32, UnitIter> {
535 typename UnitIter,
typename LimitIter = UnitIter,
typename =
void>
549 template<
typename CP32, UTFIllFormedBehavior behavior,
typename UnitIter,
typename LimitIter>
553 std::enable_if_t<sizeof(typename prv::iter_value_t<UnitIter>) == 1>> {
554 static_assert(
sizeof(CP32) == 4,
"CP32 must be a 32-bit type to hold a code point");
556 "For 8-bit strings, the SURROGATE option does not have an equivalent.");
568 U_FORCE_INLINE static void inc(UnitIter &p,
const LimitIter &limit) {
574 if ((0xe0 <= b && b < 0xf0)) {
579 }
else if (b < 0xe0) {
609 if (0xe0 <= b2 && b2 <= 0xf4) {
628 UnitIter &p0, UnitIter &p,
const LimitIter &limit) {
629 constexpr
bool isMultiPass = prv::forward_iterator<UnitIter>;
631 CP32 c = uint8_t(*p);
634 if constexpr (isMultiPass) {
635 return {c, 1,
true, p0, p};
651 (c = (c << 6) | (t & 0x3f), ++length, ++p != limit) &&
652 (t = *p - 0x80) <= 0x3f) &&
654 (c = (c << 6) | t, ++length, ++p != limit)
656 c >= 0xc2 && (c &= 0x1f, 1)) &&
658 (t = *p - 0x80) <= 0x3f) {
662 if constexpr (isMultiPass) {
663 return {c, length,
true, p0, p};
665 return {c, length,
true};
668 if constexpr (isMultiPass) {
669 return {sub(), length,
false, p0, p};
671 return {sub(), length,
false};
675 U_FORCE_INLINE static CodeUnits<CP32, UnitIter> decAndRead(UnitIter start, UnitIter &p) {
678 CP32 c = uint8_t(*--p);
680 return {c, 1,
true, p, p0};
688 c = ((b1 - 0xc0) << 6) | (c & 0x3f);
689 return {c, 2,
true, p, p0};
690 }
else if (b1 < 0xf0 ?
695 return {sub(), 2,
false, p, p0};
701 if (0xe0 <= b2 && b2 <= 0xf4) {
706 c = (b2 << 12) | ((b1 & 0x3f) << 6) | c;
707 return {c, 3,
true, p, p0};
712 return {sub(), 3,
false, p, p0};
716 if (0xf0 <= b3 && b3 <= 0xf4) {
720 c = (b3 << 18) | ((b2 & 0x3f) << 12) | ((b1 & 0x3f) << 6) | c;
721 return {c, 4,
true, p, p0};
727 return {sub(), 1,
false, p, p0};
732 template<
typename CP32, UTFIllFormedBehavior behavior,
typename UnitIter,
typename LimitIter>
736 std::enable_if_t<sizeof(typename prv::iter_value_t<UnitIter>) == 2>> {
737 static_assert(
sizeof(CP32) == 4,
"CP32 must be a 32-bit type to hold a code point");
751 U_FORCE_INLINE static void inc(UnitIter &p,
const LimitIter &limit) {
769 UnitIter &p0, UnitIter &p,
const LimitIter &limit) {
770 constexpr
bool isMultiPass = prv::forward_iterator<UnitIter>;
772 CP32 c =
static_cast<CP32
>(*p);
775 if constexpr (isMultiPass) {
776 return {c, 1,
true, p0, p};
785 if constexpr (isMultiPass) {
786 return {c, 2,
true, p0, p};
791 if constexpr (isMultiPass) {
792 return {sub(c), 1,
false, p0, p};
794 return {sub(c), 1,
false};
800 U_FORCE_INLINE static CodeUnits<CP32, UnitIter> decAndRead(UnitIter start, UnitIter &p) {
803 CP32 c =
static_cast<CP32
>(*--p);
805 return {c, 1,
true, p, p0};
812 return {c, 2,
true, p, p0};
814 return {sub(c), 1,
false, p, p0};
821 template<
typename CP32, UTFIllFormedBehavior behavior,
typename UnitIter,
typename LimitIter>
825 std::enable_if_t<sizeof(typename prv::iter_value_t<UnitIter>) == 4>> {
826 static_assert(
sizeof(CP32) == 4,
"CP32 must be a 32-bit type to hold a code point");
829 U_FORCE_INLINE static CP32 sub(
bool forSurrogate, CP32 surrogate) {
836 return forSurrogate ? surrogate : 0xfffd;
849 UnitIter &p0, UnitIter &p,
const LimitIter &) {
850 constexpr
bool isMultiPass = prv::forward_iterator<UnitIter>;
854 if (uc < 0xd800 || (0xe000 <= uc && uc <= 0x10ffff)) {
855 if constexpr (isMultiPass) {
856 return {c, 1,
true, p0, p};
861 if constexpr (isMultiPass) {
862 return {sub(uc < 0xe000, c), 1,
false, p0, p};
864 return {sub(uc < 0xe000, c), 1,
false};
869 U_FORCE_INLINE static CodeUnits<CP32, UnitIter> decAndRead(UnitIter , UnitIter &p) {
873 if (uc < 0xd800 || (0xe000 <= uc && uc <= 0x10ffff)) {
874 return {c, 1,
true, p, p0};
876 return {sub(uc < 0xe000, c), 1,
false, p, p0};
883 template<
typename CP32,
typename UnitIter,
typename =
void>
887 template<
typename CP32,
typename UnitIter>
891 std::enable_if_t<sizeof(typename prv::iter_value_t<UnitIter>) == 1>> {
892 static_assert(
sizeof(CP32) == 4,
"CP32 must be a 32-bit type to hold a code point");
905 U_FORCE_INLINE static UnsafeCodeUnits<CP32, UnitIter> readAndInc(UnitIter &p0, UnitIter &p) {
906 constexpr
bool isMultiPass = prv::forward_iterator<UnitIter>;
908 CP32 c = uint8_t(*p);
911 if constexpr (isMultiPass) {
912 return {c, 1, p0, p};
916 }
else if (c < 0xe0) {
917 c = ((c & 0x1f) << 6) | (*p & 0x3f);
919 if constexpr (isMultiPass) {
920 return {c, 2, p0, p};
924 }
else if (c < 0xf0) {
927 c = uint16_t(c << 12) | ((*p & 0x3f) << 6);
931 if constexpr (isMultiPass) {
932 return {c, 3, p0, p};
937 c = ((c & 7) << 18) | ((*p & 0x3f) << 12);
939 c |= (*p & 0x3f) << 6;
943 if constexpr (isMultiPass) {
944 return {c, 4, p0, p};
951 U_FORCE_INLINE static UnsafeCodeUnits<CP32, UnitIter> decAndRead(UnitIter &p) {
954 CP32 c = uint8_t(*--p);
956 return {c, 1, p, p0};
961 for (uint8_t shift = 6;;) {
965 c |= uint32_t{b} << shift;
968 c |= (uint32_t{b} & 0x3f) << shift;
974 return {c, count, p, p0};
979 template<
typename CP32,
typename UnitIter>
983 std::enable_if_t<sizeof(typename prv::iter_value_t<UnitIter>) == 2>> {
984 static_assert(
sizeof(CP32) == 4,
"CP32 must be a 32-bit type to hold a code point");
1002 U_FORCE_INLINE static UnsafeCodeUnits<CP32, UnitIter> readAndInc(UnitIter &p0, UnitIter &p) {
1003 constexpr
bool isMultiPass = prv::forward_iterator<UnitIter>;
1005 CP32 c =
static_cast<CP32
>(*p);
1008 if constexpr (isMultiPass) {
1009 return {c, 1, p0, p};
1017 if constexpr (isMultiPass) {
1018 return {c, 2, p0, p};
1025 U_FORCE_INLINE static UnsafeCodeUnits<CP32, UnitIter> decAndRead(UnitIter &p) {
1028 CP32 c =
static_cast<CP32
>(*--p);
1030 return {c, 1, p, p0};
1034 return {c, 2, p, p0};
1040 template<
typename CP32,
typename UnitIter>
1041 class UnsafeUTFImpl<
1044 std::enable_if_t<sizeof(typename prv::iter_value_t<UnitIter>) == 4>> {
1045 static_assert(
sizeof(CP32) == 4,
"CP32 must be a 32-bit type to hold a code point");
1055 U_FORCE_INLINE static UnsafeCodeUnits<CP32, UnitIter> readAndInc(UnitIter &p0, UnitIter &p) {
1056 constexpr
bool isMultiPass = prv::forward_iterator<UnitIter>;
1059 if constexpr (isMultiPass) {
1060 return {c, 1, p0, p};
1066 U_FORCE_INLINE static UnsafeCodeUnits<CP32, UnitIter> decAndRead(UnitIter &p) {
1069 return {c, 1, p, p0};
1101 typename UnitIter,
typename LimitIter = UnitIter,
typename =
void>
1103 static_assert(
sizeof(CP32) == 4,
"CP32 must be a 32-bit type to hold a code point");
1104 using Impl = UTFImpl<CP32, behavior, UnitIter, LimitIter>;
1128 prv::bidirectional_iterator<UnitIter>,
1129 std::bidirectional_iterator_tag,
1130 std::forward_iterator_tag>;
1146 p_(p), start_(start), limit_(limit), units_(0, 0, false, p, p) {}
1159 p_(p), start_(p), limit_(limit), units_(0, 0, false, p, p) {}
1195 return getLogicalPosition() == other.getLogicalPosition();
1214 !std::is_same_v<Sentinel, UTFIterator> && !std::is_same_v<Sentinel, UnitIter>,
1217 return iter.getLogicalPosition() == s;
1220 #if U_CPLUSPLUS_VERSION < 20
1233 !std::is_same_v<Sentinel, UTFIterator> && !std::is_same_v<Sentinel, UnitIter>,
1236 return iter.getLogicalPosition() == s;
1246 !std::is_same_v<Sentinel, UTFIterator> && !std::is_same_v<Sentinel, UnitIter>,
1257 !std::is_same_v<Sentinel, UTFIterator> && !std::is_same_v<Sentinel, UnitIter>,
1271 units_ = Impl::readAndInc(p0, p_, limit_);
1288 units_ = Impl::readAndInc(p0, p_, limit_);
1291 return Proxy(units_);
1304 }
else if (state_ == 0) {
1305 Impl::inc(p_, limit_);
1328 }
else if (state_ == 0) {
1330 units_ = Impl::readAndInc(p0, p_, limit_);
1351 template<
typename Iter = UnitIter>
1353 std::enable_if_t<prv::bidirectional_iterator<Iter>,
UTFIterator &>
1357 p_ = units_.begin();
1359 units_ = Impl::decAndRead(start_, p_);
1371 template<
typename Iter = UnitIter>
1373 std::enable_if_t<prv::bidirectional_iterator<Iter>,
UTFIterator>
1381 friend class std::reverse_iterator<
UTFIterator<CP32, behavior, UnitIter>>;
1384 return state_ <= 0 ? p_ : units_.begin();
1388 mutable UnitIter p_;
1395 mutable CodeUnits<CP32, UnitIter> units_;
1400 mutable int8_t state_ = 0;
1403 #ifndef U_IN_DOXYGEN
1405 template<
typename CP32, UTFIllFormedBehavior behavior,
typename UnitIter,
typename LimitIter>
1408 UnitIter, LimitIter,
1409 std::enable_if_t<!prv::forward_iterator<UnitIter>>> {
1410 static_assert(
sizeof(CP32) == 4,
"CP32 must be a 32-bit type to hold a code point");
1411 using Impl = UTFImpl<CP32, behavior, UnitIter, LimitIter>;
1418 explicit Proxy(CodeUnits<CP32, UnitIter> &units) : units_(units) {}
1419 CodeUnits<CP32, UnitIter> &
operator*() {
return units_; }
1420 CodeUnits<CP32, UnitIter> *
operator->() {
return &units_; }
1422 CodeUnits<CP32, UnitIter> units_;
1426 using value_type = CodeUnits<CP32, UnitIter>;
1445 return p_ == other.p_ && ahead_ == other.ahead_;
1453 !std::is_same_v<Sentinel, UTFIterator> && !std::is_same_v<Sentinel, UnitIter>,
1456 return !iter.ahead_ && iter.p_ == s;
1459 #if U_CPLUSPLUS_VERSION < 20
1462 !std::is_same_v<Sentinel, UTFIterator> && !std::is_same_v<Sentinel, UnitIter>,
1465 return !iter.ahead_ && iter.p_ == s;
1470 !std::is_same_v<Sentinel, UTFIterator> && !std::is_same_v<Sentinel, UnitIter>,
1476 !std::is_same_v<Sentinel, UTFIterator> && !std::is_same_v<Sentinel, UnitIter>,
1483 units_ = Impl::readAndInc(p_, p_, limit_);
1491 units_ = Impl::readAndInc(p_, p_, limit_);
1494 return Proxy(units_);
1502 Impl::inc(p_, limit_);
1512 units_ = Impl::readAndInc(p_, p_, limit_);
1515 return Proxy(units_);
1520 mutable UnitIter p_;
1526 mutable CodeUnits<CP32, UnitIter> units_ = {0, 0,
false};
1530 mutable bool ahead_ =
false;
1536 #ifndef U_IN_DOXYGEN
1540 template<
typename CP32, UTFIllFormedBehavior behavior,
typename UnitIter>
1541 class std::reverse_iterator<U_HEADER_ONLY_NAMESPACE::UTFIterator<CP32, behavior, UnitIter>> {
1542 static_assert(
sizeof(CP32) == 4,
"CP32 must be a 32-bit type to hold a code point");
1543 using Impl = U_HEADER_ONLY_NAMESPACE::UTFImpl<CP32, behavior, UnitIter>;
1550 explicit Proxy(CodeUnits_ units) : units_(units) {}
1551 CodeUnits_ &operator*() {
return units_; }
1552 CodeUnits_ *operator->() {
return &units_; }
1558 using value_type = CodeUnits_;
1559 using reference = value_type;
1560 using pointer = Proxy;
1562 using iterator_category = std::bidirectional_iterator_tag;
1565 p_(iter.getLogicalPosition()), start_(iter.start_), limit_(iter.limit_),
1566 units_(0, 0, false, p_, p_) {}
1567 U_FORCE_INLINE reverse_iterator() : p_{}, start_{}, limit_{}, units_(0, 0, false, p_, p_) {}
1569 U_FORCE_INLINE reverse_iterator(reverse_iterator &&src) noexcept =
default;
1570 U_FORCE_INLINE reverse_iterator &operator=(reverse_iterator &&src) noexcept =
default;
1572 U_FORCE_INLINE reverse_iterator(
const reverse_iterator &other) =
default;
1573 U_FORCE_INLINE reverse_iterator &operator=(
const reverse_iterator &other) =
default;
1576 return getLogicalPosition() == other.getLogicalPosition();
1582 units_ = Impl::decAndRead(start_, p_);
1590 units_ = Impl::decAndRead(start_, p_);
1593 return Proxy(units_);
1600 }
else if (state_ == 0) {
1601 Impl::dec(start_, p_);
1604 p_ = units_.begin();
1613 reverse_iterator result(*
this);
1616 }
else if (state_ == 0) {
1617 units_ = Impl::decAndRead(start_, p_);
1618 reverse_iterator result(*
this);
1623 reverse_iterator result(*
this);
1625 p_ = units_.begin();
1637 units_ = Impl::readAndInc(p0, p_, limit_);
1643 reverse_iterator result(*
this);
1650 return state_ >= 0 ? p_ : units_.end();
1654 mutable UnitIter p_;
1661 mutable CodeUnits_ units_;
1666 mutable int8_t state_ = 0;
1670 namespace U_HEADER_ONLY_NAMESPACE {
1695 typename UnitIter,
typename LimitIter = UnitIter>
1698 std::move(start), std::move(p), std::move(limit));
1722 typename UnitIter,
typename LimitIter = UnitIter>
1725 std::move(p), std::move(limit));
1752 template<
typename CP32, UTFIllFormedBehavior behavior,
typename UnitIter>
1784 template<
typename CP32, UTFIllFormedBehavior behavior,
typename Range>
1786 static_assert(
sizeof(CP32) == 4,
"CP32 must be a 32-bit type to hold a code point");
1799 template<
typename R = Range,
typename = std::enable_if_t<!std::is_reference_v<R>>>
1809 template<
typename R = Range,
typename = std::enable_if_t<std::is_reference_v<R>>,
typename =
void>
1823 return utfIterator<CP32, behavior>(unitRange.begin(), unitRange.end());
1830 template<
typename R = Range,
typename = std::enable_if_t<prv::range<const R>>>
1832 return utfIterator<CP32, behavior>(unitRange.begin(), unitRange.end());
1840 using UnitIter = decltype(unitRange.begin());
1841 using LimitIter = decltype(unitRange.end());
1842 if constexpr (!std::is_same_v<UnitIter, LimitIter>) {
1844 return unitRange.end();
1845 }
else if constexpr (prv::bidirectional_iterator<UnitIter>) {
1846 return utfIterator<CP32, behavior>(unitRange.begin(), unitRange.end(), unitRange.end());
1849 return utfIterator<CP32, behavior>(unitRange.end(), unitRange.end());
1857 template<
typename R = Range,
typename = std::enable_if_t<prv::range<const R>>>
1859 using UnitIter = decltype(unitRange.begin());
1860 using LimitIter = decltype(unitRange.end());
1861 if constexpr (!std::is_same_v<UnitIter, LimitIter>) {
1863 return unitRange.end();
1864 }
else if constexpr (prv::bidirectional_iterator<UnitIter>) {
1865 return utfIterator<CP32, behavior>(unitRange.begin(), unitRange.end(), unitRange.end());
1868 return utfIterator<CP32, behavior>(unitRange.end(), unitRange.end());
1877 return std::make_reverse_iterator(
end());
1885 return std::make_reverse_iterator(
begin());
1893 template<
typename CP32, UTFIllFormedBehavior behavior>
1896 __cpp_lib_bind_back >= 2022'02
1897 : std::ranges::range_adaptor_closure<UTFStringCodePointsAdaptor<CP32, behavior>>
1901 template<
typename Range>
1903 #if defined(__cpp_lib_ranges) && __cpp_lib_ranges >= 2021'10
1905 std::forward<Range>(unitRange));
1907 if constexpr (prv::is_basic_string_view_v<std::decay_t<Range>>) {
1911 std::forward<Range>(unitRange));
1933 template<
typename CP32, UTFIllFormedBehavior behavior>
1959 template<
typename CP32,
typename UnitIter,
typename =
void>
1961 static_assert(
sizeof(CP32) == 4,
"CP32 must be a 32-bit type to hold a code point");
1962 using Impl = UnsafeUTFImpl<CP32, UnitIter>;
1986 prv::bidirectional_iterator<UnitIter>,
1987 std::bidirectional_iterator_tag,
1988 std::forward_iterator_tag>;
2023 return getLogicalPosition() == other.getLogicalPosition();
2040 !std::is_same_v<Sentinel, UnsafeUTFIterator> && !std::is_same_v<Sentinel, UnitIter>,
2043 return iter.getLogicalPosition() == s;
2046 #if U_CPLUSPLUS_VERSION < 20
2055 !std::is_same_v<Sentinel, UnsafeUTFIterator> && !std::is_same_v<Sentinel, UnitIter>,
2058 return iter.getLogicalPosition() == s;
2068 !std::is_same_v<Sentinel, UnsafeUTFIterator> && !std::is_same_v<Sentinel, UnitIter>,
2079 !std::is_same_v<Sentinel, UnsafeUTFIterator> && !std::is_same_v<Sentinel, UnitIter>,
2093 units_ = Impl::readAndInc(p0, p_);
2110 units_ = Impl::readAndInc(p0, p_);
2113 return Proxy(units_);
2126 }
else if (state_ == 0) {
2150 }
else if (state_ == 0) {
2152 units_ = Impl::readAndInc(p0, p_);
2173 template<
typename Iter = UnitIter>
2179 p_ = units_.begin();
2181 units_ = Impl::decAndRead(p_);
2193 template<
typename Iter = UnitIter>
2206 return state_ <= 0 ? p_ : units_.begin();
2210 mutable UnitIter p_;
2213 mutable UnsafeCodeUnits<CP32, UnitIter> units_;
2218 mutable int8_t state_ = 0;
2221 #ifndef U_IN_DOXYGEN
2223 template<
typename CP32,
typename UnitIter>
2224 class UnsafeUTFIterator<
2227 std::enable_if_t<!prv::forward_iterator<UnitIter>>> {
2228 static_assert(
sizeof(CP32) == 4,
"CP32 must be a 32-bit type to hold a code point");
2229 using Impl = UnsafeUTFImpl<CP32, UnitIter>;
2236 explicit Proxy(UnsafeCodeUnits<CP32, UnitIter> &units) : units_(units) {}
2237 UnsafeCodeUnits<CP32, UnitIter> &
operator*() {
return units_; }
2238 UnsafeCodeUnits<CP32, UnitIter> *
operator->() {
return &units_; }
2240 UnsafeCodeUnits<CP32, UnitIter> units_;
2244 using value_type = UnsafeCodeUnits<CP32, UnitIter>;
2259 return p_ == other.p_ && ahead_ == other.ahead_;
2267 !std::is_same_v<Sentinel, UnsafeUTFIterator> && !std::is_same_v<Sentinel, UnitIter>,
2270 return !iter.ahead_ && iter.p_ == s;
2273 #if U_CPLUSPLUS_VERSION < 20
2276 !std::is_same_v<Sentinel, UnsafeUTFIterator> && !std::is_same_v<Sentinel, UnitIter>,
2279 return !iter.ahead_ && iter.p_ == s;
2284 !std::is_same_v<Sentinel, UnsafeUTFIterator> && !std::is_same_v<Sentinel, UnitIter>,
2290 !std::is_same_v<Sentinel, UnsafeUTFIterator> && !std::is_same_v<Sentinel, UnitIter>,
2297 units_ = Impl::readAndInc(p_, p_);
2305 units_ = Impl::readAndInc(p_, p_);
2308 return Proxy(units_);
2326 units_ = Impl::readAndInc(p_, p_);
2329 return Proxy(units_);
2334 mutable UnitIter p_;
2337 mutable UnsafeCodeUnits<CP32, UnitIter> units_ = {0, 0};
2341 mutable bool ahead_ =
false;
2347 #ifndef U_IN_DOXYGEN
2351 template<
typename CP32,
typename UnitIter>
2352 class std::reverse_iterator<U_HEADER_ONLY_NAMESPACE::UnsafeUTFIterator<CP32, UnitIter>> {
2353 static_assert(
sizeof(CP32) == 4,
"CP32 must be a 32-bit type to hold a code point");
2354 using Impl = U_HEADER_ONLY_NAMESPACE::UnsafeUTFImpl<CP32, UnitIter>;
2361 explicit Proxy(UnsafeCodeUnits_ units) : units_(units) {}
2362 UnsafeCodeUnits_ &operator*() {
return units_; }
2363 UnsafeCodeUnits_ *operator->() {
return &units_; }
2365 UnsafeCodeUnits_ units_;
2369 using value_type = UnsafeCodeUnits_;
2370 using reference = value_type;
2371 using pointer = Proxy;
2373 using iterator_category = std::bidirectional_iterator_tag;
2376 p_(iter.getLogicalPosition()), units_(0, 0, p_, p_) {}
2377 U_FORCE_INLINE reverse_iterator() : p_{}, units_(0, 0, p_, p_) {}
2379 U_FORCE_INLINE reverse_iterator(reverse_iterator &&src) noexcept =
default;
2380 U_FORCE_INLINE reverse_iterator &operator=(reverse_iterator &&src) noexcept =
default;
2382 U_FORCE_INLINE reverse_iterator(
const reverse_iterator &other) =
default;
2383 U_FORCE_INLINE reverse_iterator &operator=(
const reverse_iterator &other) =
default;
2386 return getLogicalPosition() == other.getLogicalPosition();
2392 units_ = Impl::decAndRead(p_);
2400 units_ = Impl::decAndRead(p_);
2403 return Proxy(units_);
2410 }
else if (state_ == 0) {
2414 p_ = units_.begin();
2423 reverse_iterator result(*
this);
2426 }
else if (state_ == 0) {
2427 units_ = Impl::decAndRead(p_);
2428 reverse_iterator result(*
this);
2433 reverse_iterator result(*
this);
2435 p_ = units_.begin();
2447 units_ = Impl::readAndInc(p0, p_);
2453 reverse_iterator result(*
this);
2460 return state_ >= 0 ? p_ : units_.end();
2464 mutable UnitIter p_;
2467 mutable UnsafeCodeUnits_ units_;
2472 mutable int8_t state_ = 0;
2476 namespace U_HEADER_ONLY_NAMESPACE {
2493 template<
typename CP32,
typename UnitIter>
2525 template<
typename CP32,
typename Range>
2527 static_assert(
sizeof(CP32) == 4,
"CP32 must be a 32-bit type to hold a code point");
2540 template<
typename R = Range,
typename = std::enable_if_t<!std::is_reference_v<R>>>
2550 template<
typename R = Range,
typename = std::enable_if_t<std::is_reference_v<R>>,
typename =
void>
2564 return unsafeUTFIterator<CP32>(unitRange.begin());
2571 template<
typename R = Range,
typename = std::enable_if_t<prv::range<const R>>>
2573 return unsafeUTFIterator<CP32>(unitRange.begin());
2581 using UnitIter = decltype(unitRange.begin());
2582 using LimitIter = decltype(unitRange.end());
2583 if constexpr (!std::is_same_v<UnitIter, LimitIter>) {
2585 return unitRange.end();
2587 return unsafeUTFIterator<CP32>(unitRange.end());
2595 template<
typename R = Range,
typename = std::enable_if_t<prv::range<const R>>>
2597 using UnitIter = decltype(unitRange.begin());
2598 using LimitIter = decltype(unitRange.end());
2599 if constexpr (!std::is_same_v<UnitIter, LimitIter>) {
2601 return unitRange.end();
2603 return unsafeUTFIterator<CP32>(unitRange.end());
2612 return std::make_reverse_iterator(
end());
2620 return std::make_reverse_iterator(
begin());
2628 template<
typename CP32>
2631 __cpp_lib_bind_back >= 2022'02
2632 : std::ranges::range_adaptor_closure<UnsafeUTFStringCodePointsAdaptor<CP32>>
2636 template<
typename Range>
2638 #if defined(__cpp_lib_ranges) && __cpp_lib_ranges >= 2021'10
2641 if constexpr (prv::is_basic_string_view_v<std::decay_t<Range>>) {
2665 template<
typename CP32>
2671 #if defined(__cpp_lib_ranges)
2672 template <
typename CP32, UTFIllFormedBehavior behavior,
typename Range>
2673 constexpr
bool std::ranges::enable_borrowed_range<
2675 std::ranges::enable_borrowed_range<Range>;
2677 template <
typename CP32,
typename Range>
2678 constexpr
bool std::ranges::enable_borrowed_range<
2680 std::ranges::enable_borrowed_range<Range>;
U_COMMON_API UBool operator==(const StringPiece &x, const StringPiece &y)
Global operator == for StringPiece.
bool operator!=(const StringPiece &x, const StringPiece &y)
Global operator != for StringPiece.
#define U_SENTINEL
This value is intended for sentinel values for APIs that (take or) return single code points (UChar32...
#define U_FORCE_INLINE
Forces function inlining on compilers that are known to support it.
C API: 16-bit Unicode handling macros.
#define U16_IS_SURROGATE_TRAIL(c)
Assuming c is a surrogate code point (U16_IS_SURROGATE(c)), is it a trail surrogate?
#define U16_IS_SURROGATE_LEAD(c)
Assuming c is a surrogate code point (U16_IS_SURROGATE(c)), is it a lead surrogate?
#define U16_GET_SUPPLEMENTARY(lead, trail)
Get a supplementary code point value (U+10000..U+10ffff) from its lead and trail surrogates.
#define U16_IS_SURROGATE(c)
Is this code unit a surrogate (U+d800..U+dfff)?
#define U16_IS_LEAD(c)
Is this code unit a lead surrogate (U+d800..U+dbff)?
#define U16_IS_TRAIL(c)
Is this code unit a trail surrogate (U+dc00..U+dfff)?
C API: 8-bit Unicode handling macros.
#define U8_COUNT_TRAIL_BYTES_UNSAFE(leadByte)
Counts the trail bytes for a UTF-8 lead byte of a valid UTF-8 sequence.
#define U8_IS_VALID_LEAD3_AND_T1(lead, t1)
Internal 3-byte UTF-8 validity check.
#define U8_IS_VALID_LEAD4_AND_T1(lead, t1)
Internal 4-byte UTF-8 validity check.
#define U8_IS_SINGLE(c)
Does this code unit (byte) encode a code point by itself (US-ASCII 0..0x7f)?
#define U8_LEAD3_T1_BITS
Internal bit vector for 3-byte UTF-8 validity check, for use in U8_IS_VALID_LEAD3_AND_T1.
#define U8_LEAD4_T1_BITS
Internal bit vector for 4-byte UTF-8 validity check, for use in U8_IS_VALID_LEAD4_AND_T1.
#define U8_IS_LEAD(c)
Is this code unit (byte) a UTF-8 lead byte? (0xC2..0xF4)
#define U8_MASK_LEAD_BYTE(leadByte, countTrailBytes)
Mask a UTF-8 lead byte, leave only the lower bits that form part of the code point value.
#define U8_IS_TRAIL(c)
Is this code unit (byte) a UTF-8 trail byte? (0x80..0xBF)
auto unsafeUTFIterator(UnitIter iter)
UnsafeUTFIterator factory function.
typename std::iterator_traits< Iter >::difference_type iter_difference_t
constexpr bool is_basic_string_view_v
constexpr bool forward_iterator
auto utfIterator(UnitIter start, UnitIter p, LimitIter limit)
UTFIterator factory function for start <= p < limit.
constexpr UTFStringCodePointsAdaptor< CP32, behavior > utfStringCodePoints
Range adaptor function object returning a UTFStringCodePoints object that represents a "range" of cod...
typename std::iterator_traits< Iter >::value_type iter_value_t
constexpr bool bidirectional_iterator
constexpr UnsafeUTFStringCodePointsAdaptor< CP32 > unsafeUTFStringCodePoints
Range adaptor function object returning an UnsafeUTFStringCodePoints object that represents a "range"...
UTFIllFormedBehavior
Some defined behaviors for handling ill-formed Unicode strings.
@ UTF_BEHAVIOR_FFFD
Returns U+FFFD Replacement Character.
@ UTF_BEHAVIOR_SURROGATE
UTF-8: Not allowed; UTF-16: returns the unpaired surrogate; UTF-32: returns the surrogate code point,...
@ UTF_BEHAVIOR_NEGATIVE
Returns a negative value (-1=U_SENTINEL) instead of a code point.
Basic definitions for ICU, for both C and C++ APIs.
C API: API for accessing ICU version numbers.