7 #ifndef __UTFITERATOR_H__
8 #define __UTFITERATOR_H__
12 #if U_SHOW_CPLUSPLUS_API || U_SHOW_CPLUSPLUS_HEADER_API || !defined(UTYPES_H)
15 #if defined(__cpp_lib_ranges)
19 #include <string_view>
20 #include <type_traits>
135 #ifndef U_HIDE_DRAFT_API
170 namespace U_HEADER_ONLY_NAMESPACE {
173 #if U_CPLUSPLUS_VERSION >= 20
176 template<
typename Iter>
180 template<
typename Iter>
184 template<
typename Iter>
188 template<
typename Iter>
192 template<
typename Range>
193 constexpr
bool range = std::ranges::range<Range>;
198 template<
typename Iter>
202 template<
typename Iter>
206 template<
typename Iter>
209 std::forward_iterator_tag,
210 typename std::iterator_traits<Iter>::iterator_category>;
213 template<
typename Iter>
216 std::bidirectional_iterator_tag,
217 typename std::iterator_traits<Iter>::iterator_category>;
220 template<
typename Range,
typename =
void>
224 template<
typename Range>
227 std::void_t<decltype(std::declval<Range>().begin()),
228 decltype(std::declval<Range>().end())>> : std::true_type {};
231 template<
typename Range>
237 template<
typename CP32,
bool skipSurrogates>
239 static_assert(
sizeof(CP32) == 4,
"CP32 must be a 32-bit type to hold a code point");
263 if (skipSurrogates && c_ == 0xd800) {
291 template<
typename CP32>
293 static_assert(
sizeof(CP32) == 4,
"CP32 must be a 32-bit type to hold a code point");
322 template<
typename CP32>
324 static_assert(
sizeof(CP32) == 4,
"CP32 must be a 32-bit type to hold a code point");
356 template<
typename CP32,
typename UnitIter,
typename =
void>
358 static_assert(
sizeof(CP32) == 4,
"CP32 must be a 32-bit type to hold a code point");
384 UnitIter
begin()
const {
return start_; }
391 UnitIter
end()
const {
return limit_; }
399 #if U_CPLUSPLUS_VERSION >= 20
405 template<std::contiguous_iterator Iter = UnitIter>
406 std::basic_string_view<Unit>
stringView()
const {
407 return std::basic_string_view<Unit>(
begin(),
end());
415 template<typename Iter = UnitIter, typename Unit = typename std::iterator_traits<Iter>::value_type>
416 std::enable_if_t<std::is_pointer_v<Iter> ||
417 std::is_same_v<Iter, typename std::basic_string<Unit>::iterator> ||
418 std::is_same_v<Iter, typename std::basic_string<Unit>::const_iterator> ||
419 std::is_same_v<Iter, typename std::basic_string_view<Unit>::iterator> ||
420 std::is_same_v<Iter, typename std::basic_string_view<Unit>::const_iterator>,
421 std::basic_string_view<Unit>>
423 return std::basic_string_view<Unit>(&*start_, len_);
438 template<
typename CP32,
typename UnitIter>
439 class UnsafeCodeUnits<
442 std::enable_if_t<!prv::forward_iterator<UnitIter>>> {
443 static_assert(
sizeof(CP32) == 4,
"CP32 must be a 32-bit type to hold a code point");
452 uint8_t
length()
const {
return len_; }
476 template<
typename CP32,
typename UnitIter,
typename =
void>
501 template<
typename CP32,
typename UnitIter>
505 std::enable_if_t<!prv::forward_iterator<UnitIter>>> :
506 public UnsafeCodeUnits<CP32, UnitIter> {
525 typename UnitIter,
typename LimitIter = UnitIter,
typename =
void>
539 template<
typename CP32, UTFIllFormedBehavior behavior,
typename UnitIter,
typename LimitIter>
543 std::enable_if_t<sizeof(typename prv::iter_value_t<UnitIter>) == 1>> {
544 static_assert(
sizeof(CP32) == 4,
"CP32 must be a 32-bit type to hold a code point");
546 "For 8-bit strings, the SURROGATE option does not have an equivalent.");
556 U_FORCE_INLINE static void inc(UnitIter &p,
const LimitIter &limit) {
562 if ((0xe0 <= b && b < 0xf0)) {
567 }
else if (b < 0xe0) {
597 if (0xe0 <= b2 && b2 <= 0xf4) {
616 UnitIter &p0, UnitIter &p,
const LimitIter &limit) {
617 constexpr
bool isMultiPass = prv::forward_iterator<UnitIter>;
619 CP32 c = uint8_t(*p);
622 if constexpr (isMultiPass) {
623 return {c, 1,
true, p0, p};
639 (c = (c << 6) | (t & 0x3f), ++length, ++p != limit) &&
640 (t = *p - 0x80) <= 0x3f) &&
642 (c = (c << 6) | t, ++length, ++p != limit)
644 c >= 0xc2 && (c &= 0x1f, 1)) &&
646 (t = *p - 0x80) <= 0x3f) {
650 if constexpr (isMultiPass) {
651 return {c, length,
true, p0, p};
653 return {c, length,
true};
656 if constexpr (isMultiPass) {
657 return {sub(), length,
false, p0, p};
659 return {sub(), length,
false};
663 U_FORCE_INLINE static CodeUnits<CP32, UnitIter> decAndRead(UnitIter start, UnitIter &p) {
666 CP32 c = uint8_t(*--p);
668 return {c, 1,
true, p, p0};
676 c = ((b1 - 0xc0) << 6) | (c & 0x3f);
677 return {c, 2,
true, p, p0};
678 }
else if (b1 < 0xf0 ?
683 return {sub(), 2,
false, p, p0};
689 if (0xe0 <= b2 && b2 <= 0xf4) {
694 c = (b2 << 12) | ((b1 & 0x3f) << 6) | c;
695 return {c, 3,
true, p, p0};
700 return {sub(), 3,
false, p, p0};
704 if (0xf0 <= b3 && b3 <= 0xf4) {
708 c = (b3 << 18) | ((b2 & 0x3f) << 12) | ((b1 & 0x3f) << 6) | c;
709 return {c, 4,
true, p, p0};
715 return {sub(), 1,
false, p, p0};
720 template<
typename CP32, UTFIllFormedBehavior behavior,
typename UnitIter,
typename LimitIter>
724 std::enable_if_t<sizeof(typename prv::iter_value_t<UnitIter>) == 2>> {
725 static_assert(
sizeof(CP32) == 4,
"CP32 must be a 32-bit type to hold a code point");
736 U_FORCE_INLINE static void inc(UnitIter &p,
const LimitIter &limit) {
754 UnitIter &p0, UnitIter &p,
const LimitIter &limit) {
755 constexpr
bool isMultiPass = prv::forward_iterator<UnitIter>;
757 CP32 c =
static_cast<CP32
>(*p);
760 if constexpr (isMultiPass) {
761 return {c, 1,
true, p0, p};
770 if constexpr (isMultiPass) {
771 return {c, 2,
true, p0, p};
776 if constexpr (isMultiPass) {
777 return {sub(c), 1,
false, p0, p};
779 return {sub(c), 1,
false};
785 U_FORCE_INLINE static CodeUnits<CP32, UnitIter> decAndRead(UnitIter start, UnitIter &p) {
788 CP32 c =
static_cast<CP32
>(*--p);
790 return {c, 1,
true, p, p0};
797 return {c, 2,
true, p, p0};
799 return {sub(c), 1,
false, p, p0};
806 template<
typename CP32, UTFIllFormedBehavior behavior,
typename UnitIter,
typename LimitIter>
810 std::enable_if_t<sizeof(typename prv::iter_value_t<UnitIter>) == 4>> {
811 static_assert(
sizeof(CP32) == 4,
"CP32 must be a 32-bit type to hold a code point");
814 U_FORCE_INLINE static CP32 sub(
bool forSurrogate, CP32 surrogate) {
831 UnitIter &p0, UnitIter &p,
const LimitIter &) {
832 constexpr
bool isMultiPass = prv::forward_iterator<UnitIter>;
836 if (uc < 0xd800 || (0xe000 <= uc && uc <= 0x10ffff)) {
837 if constexpr (isMultiPass) {
838 return {c, 1,
true, p0, p};
843 if constexpr (isMultiPass) {
844 return {sub(uc < 0xe000, c), 1,
false, p0, p};
846 return {sub(uc < 0xe000, c), 1,
false};
851 U_FORCE_INLINE static CodeUnits<CP32, UnitIter> decAndRead(UnitIter , UnitIter &p) {
855 if (uc < 0xd800 || (0xe000 <= uc && uc <= 0x10ffff)) {
856 return {c, 1,
true, p, p0};
858 return {sub(uc < 0xe000, c), 1,
false, p, p0};
865 template<
typename CP32,
typename UnitIter,
typename =
void>
869 template<
typename CP32,
typename UnitIter>
873 std::enable_if_t<sizeof(typename prv::iter_value_t<UnitIter>) == 1>> {
874 static_assert(
sizeof(CP32) == 4,
"CP32 must be a 32-bit type to hold a code point");
887 U_FORCE_INLINE static UnsafeCodeUnits<CP32, UnitIter> readAndInc(UnitIter &p0, UnitIter &p) {
888 constexpr
bool isMultiPass = prv::forward_iterator<UnitIter>;
890 CP32 c = uint8_t(*p);
893 if constexpr (isMultiPass) {
894 return {c, 1, p0, p};
898 }
else if (c < 0xe0) {
899 c = ((c & 0x1f) << 6) | (*p & 0x3f);
901 if constexpr (isMultiPass) {
902 return {c, 2, p0, p};
906 }
else if (c < 0xf0) {
909 c = uint16_t(c << 12) | ((*p & 0x3f) << 6);
913 if constexpr (isMultiPass) {
914 return {c, 3, p0, p};
919 c = ((c & 7) << 18) | ((*p & 0x3f) << 12);
921 c |= (*p & 0x3f) << 6;
925 if constexpr (isMultiPass) {
926 return {c, 4, p0, p};
933 U_FORCE_INLINE static UnsafeCodeUnits<CP32, UnitIter> decAndRead(UnitIter &p) {
936 CP32 c = uint8_t(*--p);
938 return {c, 1, p, p0};
943 for (uint8_t shift = 6;;) {
947 c |= uint32_t{b} << shift;
950 c |= (uint32_t{b} & 0x3f) << shift;
956 return {c, count, p, p0};
961 template<
typename CP32,
typename UnitIter>
965 std::enable_if_t<sizeof(typename prv::iter_value_t<UnitIter>) == 2>> {
966 static_assert(
sizeof(CP32) == 4,
"CP32 must be a 32-bit type to hold a code point");
984 U_FORCE_INLINE static UnsafeCodeUnits<CP32, UnitIter> readAndInc(UnitIter &p0, UnitIter &p) {
985 constexpr
bool isMultiPass = prv::forward_iterator<UnitIter>;
987 CP32 c =
static_cast<CP32
>(*p);
990 if constexpr (isMultiPass) {
991 return {c, 1, p0, p};
999 if constexpr (isMultiPass) {
1000 return {c, 2, p0, p};
1007 U_FORCE_INLINE static UnsafeCodeUnits<CP32, UnitIter> decAndRead(UnitIter &p) {
1010 CP32 c =
static_cast<CP32
>(*--p);
1012 return {c, 1, p, p0};
1016 return {c, 2, p, p0};
1022 template<
typename CP32,
typename UnitIter>
1023 class UnsafeUTFImpl<
1026 std::enable_if_t<sizeof(typename prv::iter_value_t<UnitIter>) == 4>> {
1027 static_assert(
sizeof(CP32) == 4,
"CP32 must be a 32-bit type to hold a code point");
1037 U_FORCE_INLINE static UnsafeCodeUnits<CP32, UnitIter> readAndInc(UnitIter &p0, UnitIter &p) {
1038 constexpr
bool isMultiPass = prv::forward_iterator<UnitIter>;
1041 if constexpr (isMultiPass) {
1042 return {c, 1, p0, p};
1048 U_FORCE_INLINE static UnsafeCodeUnits<CP32, UnitIter> decAndRead(UnitIter &p) {
1051 return {c, 1, p, p0};
1083 typename UnitIter,
typename LimitIter = UnitIter,
typename =
void>
1085 static_assert(
sizeof(CP32) == 4,
"CP32 must be a 32-bit type to hold a code point");
1086 using Impl = UTFImpl<CP32, behavior, UnitIter, LimitIter>;
1110 prv::bidirectional_iterator<UnitIter>,
1111 std::bidirectional_iterator_tag,
1112 std::forward_iterator_tag>;
1128 p_(p), start_(start), limit_(limit), units_(0, 0, false, p, p) {}
1141 p_(p), start_(p), limit_(limit), units_(0, 0, false, p, p) {}
1177 return getLogicalPosition() == other.getLogicalPosition();
1196 !std::is_same_v<Sentinel, UTFIterator> && !std::is_same_v<Sentinel, UnitIter>,
1199 return iter.getLogicalPosition() == s;
1202 #if U_CPLUSPLUS_VERSION < 20
1215 !std::is_same_v<Sentinel, UTFIterator> && !std::is_same_v<Sentinel, UnitIter>,
1218 return iter.getLogicalPosition() == s;
1228 !std::is_same_v<Sentinel, UTFIterator> && !std::is_same_v<Sentinel, UnitIter>,
1239 !std::is_same_v<Sentinel, UTFIterator> && !std::is_same_v<Sentinel, UnitIter>,
1253 units_ = Impl::readAndInc(p0, p_, limit_);
1270 units_ = Impl::readAndInc(p0, p_, limit_);
1273 return Proxy(units_);
1286 }
else if (state_ == 0) {
1287 Impl::inc(p_, limit_);
1310 }
else if (state_ == 0) {
1312 units_ = Impl::readAndInc(p0, p_, limit_);
1333 template<
typename Iter = UnitIter>
1335 std::enable_if_t<prv::bidirectional_iterator<Iter>,
UTFIterator &>
1339 p_ = units_.begin();
1341 units_ = Impl::decAndRead(start_, p_);
1353 template<
typename Iter = UnitIter>
1355 std::enable_if_t<prv::bidirectional_iterator<Iter>,
UTFIterator>
1363 friend class std::reverse_iterator<
UTFIterator<CP32, behavior, UnitIter>>;
1366 return state_ <= 0 ? p_ : units_.begin();
1370 mutable UnitIter p_;
1377 mutable CodeUnits<CP32, UnitIter> units_;
1382 mutable int8_t state_ = 0;
1385 #ifndef U_IN_DOXYGEN
1387 template<
typename CP32, UTFIllFormedBehavior behavior,
typename UnitIter,
typename LimitIter>
1390 UnitIter, LimitIter,
1391 std::enable_if_t<!prv::forward_iterator<UnitIter>>> {
1392 static_assert(
sizeof(CP32) == 4,
"CP32 must be a 32-bit type to hold a code point");
1393 using Impl = UTFImpl<CP32, behavior, UnitIter, LimitIter>;
1400 explicit Proxy(CodeUnits<CP32, UnitIter> &units) : units_(units) {}
1401 CodeUnits<CP32, UnitIter> &
operator*() {
return units_; }
1402 CodeUnits<CP32, UnitIter> *
operator->() {
return &units_; }
1404 CodeUnits<CP32, UnitIter> units_;
1408 using value_type = CodeUnits<CP32, UnitIter>;
1427 return p_ == other.p_ && ahead_ == other.ahead_;
1435 !std::is_same_v<Sentinel, UTFIterator> && !std::is_same_v<Sentinel, UnitIter>,
1438 return !iter.ahead_ && iter.p_ == s;
1441 #if U_CPLUSPLUS_VERSION < 20
1444 !std::is_same_v<Sentinel, UTFIterator> && !std::is_same_v<Sentinel, UnitIter>,
1447 return !iter.ahead_ && iter.p_ == s;
1452 !std::is_same_v<Sentinel, UTFIterator> && !std::is_same_v<Sentinel, UnitIter>,
1458 !std::is_same_v<Sentinel, UTFIterator> && !std::is_same_v<Sentinel, UnitIter>,
1465 units_ = Impl::readAndInc(p_, p_, limit_);
1473 units_ = Impl::readAndInc(p_, p_, limit_);
1476 return Proxy(units_);
1484 Impl::inc(p_, limit_);
1494 units_ = Impl::readAndInc(p_, p_, limit_);
1497 return Proxy(units_);
1502 mutable UnitIter p_;
1508 mutable CodeUnits<CP32, UnitIter> units_ = {0, 0,
false};
1512 mutable bool ahead_ =
false;
1518 #ifndef U_IN_DOXYGEN
1522 template<
typename CP32, UTFIllFormedBehavior behavior,
typename UnitIter>
1523 class std::reverse_iterator<U_HEADER_ONLY_NAMESPACE::UTFIterator<CP32, behavior, UnitIter>> {
1524 static_assert(
sizeof(CP32) == 4,
"CP32 must be a 32-bit type to hold a code point");
1525 using Impl = U_HEADER_ONLY_NAMESPACE::UTFImpl<CP32, behavior, UnitIter>;
1532 explicit Proxy(CodeUnits_ units) : units_(units) {}
1533 CodeUnits_ &operator*() {
return units_; }
1534 CodeUnits_ *operator->() {
return &units_; }
1540 using value_type = CodeUnits_;
1541 using reference = value_type;
1542 using pointer = Proxy;
1544 using iterator_category = std::bidirectional_iterator_tag;
1547 p_(iter.getLogicalPosition()), start_(iter.start_), limit_(iter.limit_),
1548 units_(0, 0, false, p_, p_) {}
1549 U_FORCE_INLINE reverse_iterator() : p_{}, start_{}, limit_{}, units_(0, 0, false, p_, p_) {}
1551 U_FORCE_INLINE reverse_iterator(reverse_iterator &&src) noexcept =
default;
1552 U_FORCE_INLINE reverse_iterator &operator=(reverse_iterator &&src) noexcept =
default;
1554 U_FORCE_INLINE reverse_iterator(
const reverse_iterator &other) =
default;
1555 U_FORCE_INLINE reverse_iterator &operator=(
const reverse_iterator &other) =
default;
1558 return getLogicalPosition() == other.getLogicalPosition();
1564 units_ = Impl::decAndRead(start_, p_);
1572 units_ = Impl::decAndRead(start_, p_);
1575 return Proxy(units_);
1582 }
else if (state_ == 0) {
1583 Impl::dec(start_, p_);
1586 p_ = units_.begin();
1595 reverse_iterator result(*
this);
1598 }
else if (state_ == 0) {
1599 units_ = Impl::decAndRead(start_, p_);
1600 reverse_iterator result(*
this);
1605 reverse_iterator result(*
this);
1607 p_ = units_.begin();
1619 units_ = Impl::readAndInc(p0, p_, limit_);
1625 reverse_iterator result(*
this);
1632 return state_ >= 0 ? p_ : units_.end();
1636 mutable UnitIter p_;
1643 mutable CodeUnits_ units_;
1648 mutable int8_t state_ = 0;
1652 namespace U_HEADER_ONLY_NAMESPACE {
1677 typename UnitIter,
typename LimitIter = UnitIter>
1680 std::move(start), std::move(p), std::move(limit));
1704 typename UnitIter,
typename LimitIter = UnitIter>
1707 std::move(p), std::move(limit));
1734 template<
typename CP32, UTFIllFormedBehavior behavior,
typename UnitIter>
1751 template<
typename CP32, UTFIllFormedBehavior behavior,
typename Range>
1753 static_assert(
sizeof(CP32) == 4,
"CP32 must be a 32-bit type to hold a code point");
1766 template<
typename R = Range,
typename = std::enable_if_t<!std::is_reference_v<R>>>
1776 template<
typename R = Range,
typename = std::enable_if_t<std::is_reference_v<R>>,
typename =
void>
1790 return utfIterator<CP32, behavior>(unitRange.begin(), unitRange.end());
1797 template<
typename R = Range,
typename = std::enable_if_t<prv::range<const R>>>
1799 return utfIterator<CP32, behavior>(unitRange.begin(), unitRange.end());
1807 using UnitIter = decltype(unitRange.begin());
1808 using LimitIter = decltype(unitRange.end());
1809 if constexpr (!std::is_same_v<UnitIter, LimitIter>) {
1811 return unitRange.end();
1812 }
else if constexpr (prv::bidirectional_iterator<UnitIter>) {
1813 return utfIterator<CP32, behavior>(unitRange.begin(), unitRange.end(), unitRange.end());
1816 return utfIterator<CP32, behavior>(unitRange.end(), unitRange.end());
1824 template<
typename R = Range,
typename = std::enable_if_t<prv::range<const R>>>
1826 using UnitIter = decltype(unitRange.begin());
1827 using LimitIter = decltype(unitRange.end());
1828 if constexpr (!std::is_same_v<UnitIter, LimitIter>) {
1830 return unitRange.end();
1831 }
else if constexpr (prv::bidirectional_iterator<UnitIter>) {
1832 return utfIterator<CP32, behavior>(unitRange.begin(), unitRange.end(), unitRange.end());
1835 return utfIterator<CP32, behavior>(unitRange.end(), unitRange.end());
1844 return std::make_reverse_iterator(
end());
1852 return std::make_reverse_iterator(
begin());
1860 template<
typename CP32, UTFIllFormedBehavior behavior>
1863 __cpp_lib_bind_back >= 2022'02
1864 : std::ranges::range_adaptor_closure<UTFStringCodePointsAdaptor<CP32, behavior>>
1868 template<
typename Range>
1870 #if defined(__cpp_lib_ranges) && __cpp_lib_ranges >= 2021'10
1872 std::forward<Range>(unitRange));
1893 template<
typename CP32, UTFIllFormedBehavior behavior>
1919 template<
typename CP32,
typename UnitIter,
typename =
void>
1921 static_assert(
sizeof(CP32) == 4,
"CP32 must be a 32-bit type to hold a code point");
1922 using Impl = UnsafeUTFImpl<CP32, UnitIter>;
1946 prv::bidirectional_iterator<UnitIter>,
1947 std::bidirectional_iterator_tag,
1948 std::forward_iterator_tag>;
1983 return getLogicalPosition() == other.getLogicalPosition();
2000 !std::is_same_v<Sentinel, UnsafeUTFIterator> && !std::is_same_v<Sentinel, UnitIter>,
2003 return iter.getLogicalPosition() == s;
2006 #if U_CPLUSPLUS_VERSION < 20
2015 !std::is_same_v<Sentinel, UnsafeUTFIterator> && !std::is_same_v<Sentinel, UnitIter>,
2018 return iter.getLogicalPosition() == s;
2028 !std::is_same_v<Sentinel, UnsafeUTFIterator> && !std::is_same_v<Sentinel, UnitIter>,
2039 !std::is_same_v<Sentinel, UnsafeUTFIterator> && !std::is_same_v<Sentinel, UnitIter>,
2053 units_ = Impl::readAndInc(p0, p_);
2070 units_ = Impl::readAndInc(p0, p_);
2073 return Proxy(units_);
2086 }
else if (state_ == 0) {
2110 }
else if (state_ == 0) {
2112 units_ = Impl::readAndInc(p0, p_);
2133 template<
typename Iter = UnitIter>
2139 p_ = units_.begin();
2141 units_ = Impl::decAndRead(p_);
2153 template<
typename Iter = UnitIter>
2166 return state_ <= 0 ? p_ : units_.begin();
2170 mutable UnitIter p_;
2173 mutable UnsafeCodeUnits<CP32, UnitIter> units_;
2178 mutable int8_t state_ = 0;
2181 #ifndef U_IN_DOXYGEN
2183 template<
typename CP32,
typename UnitIter>
2184 class UnsafeUTFIterator<
2187 std::enable_if_t<!prv::forward_iterator<UnitIter>>> {
2188 static_assert(
sizeof(CP32) == 4,
"CP32 must be a 32-bit type to hold a code point");
2189 using Impl = UnsafeUTFImpl<CP32, UnitIter>;
2196 explicit Proxy(UnsafeCodeUnits<CP32, UnitIter> &units) : units_(units) {}
2197 UnsafeCodeUnits<CP32, UnitIter> &
operator*() {
return units_; }
2198 UnsafeCodeUnits<CP32, UnitIter> *
operator->() {
return &units_; }
2200 UnsafeCodeUnits<CP32, UnitIter> units_;
2204 using value_type = UnsafeCodeUnits<CP32, UnitIter>;
2219 return p_ == other.p_ && ahead_ == other.ahead_;
2227 !std::is_same_v<Sentinel, UnsafeUTFIterator> && !std::is_same_v<Sentinel, UnitIter>,
2230 return !iter.ahead_ && iter.p_ == s;
2233 #if U_CPLUSPLUS_VERSION < 20
2236 !std::is_same_v<Sentinel, UnsafeUTFIterator> && !std::is_same_v<Sentinel, UnitIter>,
2239 return !iter.ahead_ && iter.p_ == s;
2244 !std::is_same_v<Sentinel, UnsafeUTFIterator> && !std::is_same_v<Sentinel, UnitIter>,
2250 !std::is_same_v<Sentinel, UnsafeUTFIterator> && !std::is_same_v<Sentinel, UnitIter>,
2257 units_ = Impl::readAndInc(p_, p_);
2265 units_ = Impl::readAndInc(p_, p_);
2268 return Proxy(units_);
2286 units_ = Impl::readAndInc(p_, p_);
2289 return Proxy(units_);
2294 mutable UnitIter p_;
2297 mutable UnsafeCodeUnits<CP32, UnitIter> units_ = {0, 0};
2301 mutable bool ahead_ =
false;
2307 #ifndef U_IN_DOXYGEN
2311 template<
typename CP32,
typename UnitIter>
2312 class std::reverse_iterator<U_HEADER_ONLY_NAMESPACE::UnsafeUTFIterator<CP32, UnitIter>> {
2313 static_assert(
sizeof(CP32) == 4,
"CP32 must be a 32-bit type to hold a code point");
2314 using Impl = U_HEADER_ONLY_NAMESPACE::UnsafeUTFImpl<CP32, UnitIter>;
2321 explicit Proxy(UnsafeCodeUnits_ units) : units_(units) {}
2322 UnsafeCodeUnits_ &operator*() {
return units_; }
2323 UnsafeCodeUnits_ *operator->() {
return &units_; }
2325 UnsafeCodeUnits_ units_;
2329 using value_type = UnsafeCodeUnits_;
2330 using reference = value_type;
2331 using pointer = Proxy;
2333 using iterator_category = std::bidirectional_iterator_tag;
2336 p_(iter.getLogicalPosition()), units_(0, 0, p_, p_) {}
2337 U_FORCE_INLINE reverse_iterator() : p_{}, units_(0, 0, p_, p_) {}
2339 U_FORCE_INLINE reverse_iterator(reverse_iterator &&src) noexcept =
default;
2340 U_FORCE_INLINE reverse_iterator &operator=(reverse_iterator &&src) noexcept =
default;
2342 U_FORCE_INLINE reverse_iterator(
const reverse_iterator &other) =
default;
2343 U_FORCE_INLINE reverse_iterator &operator=(
const reverse_iterator &other) =
default;
2346 return getLogicalPosition() == other.getLogicalPosition();
2352 units_ = Impl::decAndRead(p_);
2360 units_ = Impl::decAndRead(p_);
2363 return Proxy(units_);
2370 }
else if (state_ == 0) {
2374 p_ = units_.begin();
2383 reverse_iterator result(*
this);
2386 }
else if (state_ == 0) {
2387 units_ = Impl::decAndRead(p_);
2388 reverse_iterator result(*
this);
2393 reverse_iterator result(*
this);
2395 p_ = units_.begin();
2407 units_ = Impl::readAndInc(p0, p_);
2413 reverse_iterator result(*
this);
2420 return state_ >= 0 ? p_ : units_.end();
2424 mutable UnitIter p_;
2427 mutable UnsafeCodeUnits_ units_;
2432 mutable int8_t state_ = 0;
2436 namespace U_HEADER_ONLY_NAMESPACE {
2453 template<
typename CP32,
typename UnitIter>
2469 template<
typename CP32,
typename Range>
2471 static_assert(
sizeof(CP32) == 4,
"CP32 must be a 32-bit type to hold a code point");
2484 template<
typename R = Range,
typename = std::enable_if_t<!std::is_reference_v<R>>>
2494 template<
typename R = Range,
typename = std::enable_if_t<std::is_reference_v<R>>,
typename =
void>
2508 return unsafeUTFIterator<CP32>(unitRange.begin());
2515 template<
typename R = Range,
typename = std::enable_if_t<prv::range<const R>>>
2517 return unsafeUTFIterator<CP32>(unitRange.begin());
2525 using UnitIter = decltype(unitRange.begin());
2526 using LimitIter = decltype(unitRange.end());
2527 if constexpr (!std::is_same_v<UnitIter, LimitIter>) {
2529 return unitRange.end();
2531 return unsafeUTFIterator<CP32>(unitRange.end());
2539 template<
typename R = Range,
typename = std::enable_if_t<prv::range<const R>>>
2541 using UnitIter = decltype(unitRange.begin());
2542 using LimitIter = decltype(unitRange.end());
2543 if constexpr (!std::is_same_v<UnitIter, LimitIter>) {
2545 return unitRange.end();
2547 return unsafeUTFIterator<CP32>(unitRange.end());
2556 return std::make_reverse_iterator(
end());
2564 return std::make_reverse_iterator(
begin());
2572 template<
typename CP32>
2575 __cpp_lib_bind_back >= 2022'02
2576 : std::ranges::range_adaptor_closure<UnsafeUTFStringCodePointsAdaptor<CP32>>
2580 template<
typename Range>
2582 #if defined(__cpp_lib_ranges) && __cpp_lib_ranges >= 2021'10
2603 template<
typename CP32>
U_COMMON_API UBool operator==(const StringPiece &x, const StringPiece &y)
Global operator == for StringPiece.
bool operator!=(const StringPiece &x, const StringPiece &y)
Global operator != for StringPiece.
#define U_SENTINEL
This value is intended for sentinel values for APIs that (take or) return single code points (UChar32...
#define U_FORCE_INLINE
Forces function inlining on compilers that are known to support it.
C API: 16-bit Unicode handling macros.
#define U16_IS_SURROGATE_TRAIL(c)
Assuming c is a surrogate code point (U16_IS_SURROGATE(c)), is it a trail surrogate?
#define U16_IS_SURROGATE_LEAD(c)
Assuming c is a surrogate code point (U16_IS_SURROGATE(c)), is it a lead surrogate?
#define U16_GET_SUPPLEMENTARY(lead, trail)
Get a supplementary code point value (U+10000..U+10ffff) from its lead and trail surrogates.
#define U16_IS_SURROGATE(c)
Is this code unit a surrogate (U+d800..U+dfff)?
#define U16_IS_LEAD(c)
Is this code unit a lead surrogate (U+d800..U+dbff)?
#define U16_IS_TRAIL(c)
Is this code unit a trail surrogate (U+dc00..U+dfff)?
C API: 8-bit Unicode handling macros.
#define U8_COUNT_TRAIL_BYTES_UNSAFE(leadByte)
Counts the trail bytes for a UTF-8 lead byte of a valid UTF-8 sequence.
#define U8_IS_VALID_LEAD3_AND_T1(lead, t1)
Internal 3-byte UTF-8 validity check.
#define U8_IS_VALID_LEAD4_AND_T1(lead, t1)
Internal 4-byte UTF-8 validity check.
#define U8_IS_SINGLE(c)
Does this code unit (byte) encode a code point by itself (US-ASCII 0..0x7f)?
#define U8_LEAD3_T1_BITS
Internal bit vector for 3-byte UTF-8 validity check, for use in U8_IS_VALID_LEAD3_AND_T1.
#define U8_LEAD4_T1_BITS
Internal bit vector for 4-byte UTF-8 validity check, for use in U8_IS_VALID_LEAD4_AND_T1.
#define U8_IS_LEAD(c)
Is this code unit (byte) a UTF-8 lead byte? (0xC2..0xF4)
#define U8_MASK_LEAD_BYTE(leadByte, countTrailBytes)
Mask a UTF-8 lead byte, leave only the lower bits that form part of the code point value.
#define U8_IS_TRAIL(c)
Is this code unit (byte) a UTF-8 trail byte? (0x80..0xBF)
auto unsafeUTFIterator(UnitIter iter)
UnsafeUTFIterator factory function.
typename std::iterator_traits< Iter >::difference_type iter_difference_t
constexpr bool forward_iterator
auto utfIterator(UnitIter start, UnitIter p, LimitIter limit)
UTFIterator factory function for start <= p < limit.
constexpr UTFStringCodePointsAdaptor< CP32, behavior > utfStringCodePoints
Range adaptor function object returning a UTFStringCodePoints object that represents a "range" of cod...
typename std::iterator_traits< Iter >::value_type iter_value_t
constexpr bool bidirectional_iterator
constexpr UnsafeUTFStringCodePointsAdaptor< CP32 > unsafeUTFStringCodePoints
Range adaptor function object returning an UnsafeUTFStringCodePoints object that represents a "range"...
UTFIllFormedBehavior
Some defined behaviors for handling ill-formed Unicode strings.
@ UTF_BEHAVIOR_FFFD
Returns U+FFFD Replacement Character.
@ UTF_BEHAVIOR_SURROGATE
UTF-8: Not allowed; UTF-16: returns the unpaired surrogate; UTF-32: returns the surrogate code point,...
@ UTF_BEHAVIOR_NEGATIVE
Returns a negative value (-1=U_SENTINEL) instead of a code point.
Basic definitions for ICU, for both C and C++ APIs.
C API: API for accessing ICU version numbers.