ICU 78.2  78.2
unistr.h
Go to the documentation of this file.
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 **********************************************************************
5 * Copyright (C) 1998-2016, International Business Machines
6 * Corporation and others. All Rights Reserved.
7 **********************************************************************
8 *
9 * File unistr.h
10 *
11 * Modification History:
12 *
13 * Date Name Description
14 * 09/25/98 stephen Creation.
15 * 11/11/98 stephen Changed per 11/9 code review.
16 * 04/20/99 stephen Overhauled per 4/16 code review.
17 * 11/18/99 aliu Made to inherit from Replaceable. Added method
18 * handleReplaceBetween(); other methods unchanged.
19 * 06/25/01 grhoten Remove dependency on iostream.
20 ******************************************************************************
21 */
22 
23 #ifndef UNISTR_H
24 #define UNISTR_H
25 
31 #include "unicode/utypes.h"
32 
33 #if U_SHOW_CPLUSPLUS_API
34 
35 #include <cstddef>
36 #include <string_view>
37 #include "unicode/char16ptr.h"
38 #include "unicode/rep.h"
39 #include "unicode/std_string.h"
40 #include "unicode/stringpiece.h"
41 #include "unicode/bytestream.h"
42 
43 struct UConverter; // unicode/ucnv.h
44 
45 #ifndef USTRING_H
51 U_CAPI int32_t U_EXPORT2 u_strlen(const UChar *s);
52 #endif
53 
54 U_NAMESPACE_BEGIN
55 
56 #if !UCONFIG_NO_BREAK_ITERATION
57 class BreakIterator; // unicode/brkiter.h
58 #endif
59 class Edits;
60 
61 U_NAMESPACE_END
62 
63 // Not #ifndef U_HIDE_INTERNAL_API because UnicodeString needs the UStringCaseMapper.
70 typedef int32_t U_CALLCONV
71 UStringCaseMapper(int32_t caseLocale, uint32_t options,
73  icu::BreakIterator *iter,
74 #endif
75  char16_t *dest, int32_t destCapacity,
76  const char16_t *src, int32_t srcLength,
77  icu::Edits *edits,
78  UErrorCode &errorCode);
79 
80 U_NAMESPACE_BEGIN
81 
82 class Locale; // unicode/locid.h
83 class StringCharacterIterator;
84 class UnicodeStringAppendable; // unicode/appendable.h
85 
86 /* The <iostream> include has been moved to unicode/ustream.h */
87 
98 #define US_INV icu::UnicodeString::kInvariant
99 
120 #if !U_CHAR16_IS_TYPEDEF
121 # define UNICODE_STRING(cs, _length) icu::UnicodeString(true, u ## cs, _length)
122 #else
123 # define UNICODE_STRING(cs, _length) icu::UnicodeString(true, (const char16_t*)u ## cs, _length)
124 #endif
125 
135 #define UNICODE_STRING_SIMPLE(cs) UNICODE_STRING(cs, -1)
136 
144 #ifndef UNISTR_FROM_CHAR_EXPLICIT
145 # if defined(U_COMBINED_IMPLEMENTATION) || defined(U_COMMON_IMPLEMENTATION) || defined(U_I18N_IMPLEMENTATION) || defined(U_IO_IMPLEMENTATION)
146  // Auto-"explicit" in ICU library code.
147 # define UNISTR_FROM_CHAR_EXPLICIT explicit
148 # else
149  // Empty by default for source code compatibility.
150 # define UNISTR_FROM_CHAR_EXPLICIT
151 # endif
152 #endif
153 
164 #ifndef UNISTR_FROM_STRING_EXPLICIT
165 # if defined(U_COMBINED_IMPLEMENTATION) || defined(U_COMMON_IMPLEMENTATION) || defined(U_I18N_IMPLEMENTATION) || defined(U_IO_IMPLEMENTATION)
166  // Auto-"explicit" in ICU library code.
167 # define UNISTR_FROM_STRING_EXPLICIT explicit
168 # else
169  // Empty by default for source code compatibility.
170 # define UNISTR_FROM_STRING_EXPLICIT
171 # endif
172 #endif
173 
207 #ifndef UNISTR_OBJECT_SIZE
208 # define UNISTR_OBJECT_SIZE 64
209 #endif
210 
303 {
304 public:
306  using value_type = char16_t;
307 
316  enum EInvariant {
321  kInvariant
322  };
323 
324  //========================================
325  // Read-only operations
326  //========================================
327 
328  /* Comparison - bitwise only - for international comparison use collation */
329 
337  inline bool operator== (const UnicodeString& text) const;
338 
354  template<typename S, typename = std::enable_if_t<ConvertibleToU16StringView<S>>>
355  inline bool operator==(const S &text) const {
356  std::u16string_view sv(internal::toU16StringView(text));
357  uint32_t len; // unsigned to avoid a compiler warning
358  return !isBogus() && (len = length()) == sv.length() && doEquals(sv.data(), len);
359  }
360 
368  inline bool operator!= (const UnicodeString& text) const;
369 
387  template<typename S, typename = std::enable_if_t<ConvertibleToU16StringView<S>>>
388  inline bool operator!=(const S &text) const {
389  return !operator==(text);
390  }
391 
399  inline UBool operator> (const UnicodeString& text) const;
400 
408  inline UBool operator< (const UnicodeString& text) const;
409 
417  inline UBool operator>= (const UnicodeString& text) const;
418 
426  inline UBool operator<= (const UnicodeString& text) const;
427 
439  inline int8_t compare(const UnicodeString& text) const;
440 
456  inline int8_t compare(int32_t start,
457  int32_t length,
458  const UnicodeString& text) const;
459 
477  inline int8_t compare(int32_t start,
478  int32_t length,
479  const UnicodeString& srcText,
480  int32_t srcStart,
481  int32_t srcLength) const;
482 
495  inline int8_t compare(ConstChar16Ptr srcChars,
496  int32_t srcLength) const;
497 
512  inline int8_t compare(int32_t start,
513  int32_t length,
514  const char16_t *srcChars) const;
515 
533  inline int8_t compare(int32_t start,
534  int32_t length,
535  const char16_t *srcChars,
536  int32_t srcStart,
537  int32_t srcLength) const;
538 
556  inline int8_t compareBetween(int32_t start,
557  int32_t limit,
558  const UnicodeString& srcText,
559  int32_t srcStart,
560  int32_t srcLimit) const;
561 
579  inline int8_t compareCodePointOrder(const UnicodeString& text) const;
580 
600  inline int8_t compareCodePointOrder(int32_t start,
601  int32_t length,
602  const UnicodeString& srcText) const;
603 
625  inline int8_t compareCodePointOrder(int32_t start,
626  int32_t length,
627  const UnicodeString& srcText,
628  int32_t srcStart,
629  int32_t srcLength) const;
630 
649  inline int8_t compareCodePointOrder(ConstChar16Ptr srcChars,
650  int32_t srcLength) const;
651 
671  inline int8_t compareCodePointOrder(int32_t start,
672  int32_t length,
673  const char16_t *srcChars) const;
674 
696  inline int8_t compareCodePointOrder(int32_t start,
697  int32_t length,
698  const char16_t *srcChars,
699  int32_t srcStart,
700  int32_t srcLength) const;
701 
723  inline int8_t compareCodePointOrderBetween(int32_t start,
724  int32_t limit,
725  const UnicodeString& srcText,
726  int32_t srcStart,
727  int32_t srcLimit) const;
728 
747  inline int8_t caseCompare(const UnicodeString& text, uint32_t options) const;
748 
769  inline int8_t caseCompare(int32_t start,
770  int32_t length,
771  const UnicodeString& srcText,
772  uint32_t options) const;
773 
796  inline int8_t caseCompare(int32_t start,
797  int32_t length,
798  const UnicodeString& srcText,
799  int32_t srcStart,
800  int32_t srcLength,
801  uint32_t options) const;
802 
822  inline int8_t caseCompare(ConstChar16Ptr srcChars,
823  int32_t srcLength,
824  uint32_t options) const;
825 
846  inline int8_t caseCompare(int32_t start,
847  int32_t length,
848  const char16_t *srcChars,
849  uint32_t options) const;
850 
873  inline int8_t caseCompare(int32_t start,
874  int32_t length,
875  const char16_t *srcChars,
876  int32_t srcStart,
877  int32_t srcLength,
878  uint32_t options) const;
879 
902  inline int8_t caseCompareBetween(int32_t start,
903  int32_t limit,
904  const UnicodeString& srcText,
905  int32_t srcStart,
906  int32_t srcLimit,
907  uint32_t options) const;
908 
916  inline UBool startsWith(const UnicodeString& text) const;
917 
928  inline UBool startsWith(const UnicodeString& srcText,
929  int32_t srcStart,
930  int32_t srcLength) const;
931 
940  inline UBool startsWith(ConstChar16Ptr srcChars,
941  int32_t srcLength) const;
942 
952  inline UBool startsWith(const char16_t *srcChars,
953  int32_t srcStart,
954  int32_t srcLength) const;
955 
963  inline UBool endsWith(const UnicodeString& text) const;
964 
975  inline UBool endsWith(const UnicodeString& srcText,
976  int32_t srcStart,
977  int32_t srcLength) const;
978 
987  inline UBool endsWith(ConstChar16Ptr srcChars,
988  int32_t srcLength) const;
989 
1000  inline UBool endsWith(const char16_t *srcChars,
1001  int32_t srcStart,
1002  int32_t srcLength) const;
1003 
1004 
1005  /* Searching - bitwise only */
1006 
1015  inline int32_t indexOf(const UnicodeString& text) const;
1016 
1026  inline int32_t indexOf(const UnicodeString& text,
1027  int32_t start) const;
1028 
1040  inline int32_t indexOf(const UnicodeString& text,
1041  int32_t start,
1042  int32_t length) const;
1043 
1060  inline int32_t indexOf(const UnicodeString& srcText,
1061  int32_t srcStart,
1062  int32_t srcLength,
1063  int32_t start,
1064  int32_t length) const;
1065 
1077  inline int32_t indexOf(const char16_t *srcChars,
1078  int32_t srcLength,
1079  int32_t start) const;
1080 
1093  inline int32_t indexOf(ConstChar16Ptr srcChars,
1094  int32_t srcLength,
1095  int32_t start,
1096  int32_t length) const;
1097 
1114  int32_t indexOf(const char16_t *srcChars,
1115  int32_t srcStart,
1116  int32_t srcLength,
1117  int32_t start,
1118  int32_t length) const;
1119 
1127  inline int32_t indexOf(char16_t c) const;
1128 
1137  inline int32_t indexOf(UChar32 c) const;
1138 
1147  inline int32_t indexOf(char16_t c,
1148  int32_t start) const;
1149 
1159  inline int32_t indexOf(UChar32 c,
1160  int32_t start) const;
1161 
1172  inline int32_t indexOf(char16_t c,
1173  int32_t start,
1174  int32_t length) const;
1175 
1187  inline int32_t indexOf(UChar32 c,
1188  int32_t start,
1189  int32_t length) const;
1190 
1199  inline int32_t lastIndexOf(const UnicodeString& text) const;
1200 
1210  inline int32_t lastIndexOf(const UnicodeString& text,
1211  int32_t start) const;
1212 
1224  inline int32_t lastIndexOf(const UnicodeString& text,
1225  int32_t start,
1226  int32_t length) const;
1227 
1244  inline int32_t lastIndexOf(const UnicodeString& srcText,
1245  int32_t srcStart,
1246  int32_t srcLength,
1247  int32_t start,
1248  int32_t length) const;
1249 
1260  inline int32_t lastIndexOf(const char16_t *srcChars,
1261  int32_t srcLength,
1262  int32_t start) const;
1263 
1276  inline int32_t lastIndexOf(ConstChar16Ptr srcChars,
1277  int32_t srcLength,
1278  int32_t start,
1279  int32_t length) const;
1280 
1297  int32_t lastIndexOf(const char16_t *srcChars,
1298  int32_t srcStart,
1299  int32_t srcLength,
1300  int32_t start,
1301  int32_t length) const;
1302 
1310  inline int32_t lastIndexOf(char16_t c) const;
1311 
1320  inline int32_t lastIndexOf(UChar32 c) const;
1321 
1330  inline int32_t lastIndexOf(char16_t c,
1331  int32_t start) const;
1332 
1342  inline int32_t lastIndexOf(UChar32 c,
1343  int32_t start) const;
1344 
1355  inline int32_t lastIndexOf(char16_t c,
1356  int32_t start,
1357  int32_t length) const;
1358 
1370  inline int32_t lastIndexOf(UChar32 c,
1371  int32_t start,
1372  int32_t length) const;
1373 
1374 
1375  /* Character access */
1376 
1385  inline char16_t charAt(int32_t offset) const;
1386 
1394  inline char16_t operator[] (int32_t offset) const;
1395 
1407  UChar32 char32At(int32_t offset) const;
1408 
1424  int32_t getChar32Start(int32_t offset) const;
1425 
1442  int32_t getChar32Limit(int32_t offset) const;
1443 
1494  int32_t moveIndex32(int32_t index, int32_t delta) const;
1495 
1496  /* Substring extraction */
1497 
1513  inline void extract(int32_t start,
1514  int32_t length,
1515  Char16Ptr dst,
1516  int32_t dstStart = 0) const;
1517 
1539  int32_t
1540  extract(Char16Ptr dest, int32_t destCapacity,
1541  UErrorCode &errorCode) const;
1542 
1552  inline void extract(int32_t start,
1553  int32_t length,
1554  UnicodeString& target) const;
1555 
1567  inline void extractBetween(int32_t start,
1568  int32_t limit,
1569  char16_t *dst,
1570  int32_t dstStart = 0) const;
1571 
1580  virtual void extractBetween(int32_t start,
1581  int32_t limit,
1582  UnicodeString& target) const override;
1583 
1605  int32_t extract(int32_t start,
1606  int32_t startLength,
1607  char *target,
1608  int32_t targetCapacity,
1609  enum EInvariant inv) const;
1610 
1611 #if U_CHARSET_IS_UTF8 || !UCONFIG_NO_CONVERSION
1612 
1632  int32_t extract(int32_t start,
1633  int32_t startLength,
1634  char *target,
1635  uint32_t targetLength) const;
1636 
1637 #endif
1638 
1639 #if !UCONFIG_NO_CONVERSION
1640 
1666  inline int32_t extract(int32_t start,
1667  int32_t startLength,
1668  char* target,
1669  const char* codepage = nullptr) const;
1670 
1700  int32_t extract(int32_t start,
1701  int32_t startLength,
1702  char *target,
1703  uint32_t targetLength,
1704  const char *codepage) const;
1705 
1723  int32_t extract(char *dest, int32_t destCapacity,
1724  UConverter *cnv,
1725  UErrorCode &errorCode) const;
1726 
1727 #endif
1728 
1742  UnicodeString tempSubString(int32_t start=0, int32_t length=INT32_MAX) const;
1743 
1754  inline UnicodeString tempSubStringBetween(int32_t start, int32_t limit=INT32_MAX) const;
1755 
1767  void toUTF8(ByteSink &sink) const;
1768 
1782  template<typename StringClass>
1783  StringClass &toUTF8String(StringClass &result) const {
1784  StringByteSink<StringClass> sbs(&result, length());
1785  toUTF8(sbs);
1786  return result;
1787  }
1788 
1789 #ifndef U_HIDE_DRAFT_API
1801  template<typename StringClass>
1802  StringClass toUTF8String() const {
1803  StringClass result;
1804  StringByteSink<StringClass> sbs(&result, length());
1805  toUTF8(sbs);
1806  return result;
1807  }
1808 #endif // U_HIDE_DRAFT_API
1809 
1825  int32_t toUTF32(UChar32 *utf32, int32_t capacity, UErrorCode &errorCode) const;
1826 
1827  /* Length operations */
1828 
1837  inline int32_t length() const;
1838 
1852  int32_t
1853  countChar32(int32_t start=0, int32_t length=INT32_MAX) const;
1854 
1878  UBool
1879  hasMoreChar32Than(int32_t start, int32_t length, int32_t number) const;
1880 
1886  inline UBool isEmpty() const;
1887 
1897  inline int32_t getCapacity() const;
1898 
1899  /* Other operations */
1900 
1906  inline int32_t hashCode() const;
1907 
1920  inline UBool isBogus() const;
1921 
1922 #ifndef U_HIDE_DRAFT_API
1923 private:
1924  // These type aliases are private; there is no guarantee that they will remain
1925  // aliases to the same types in subsequent versions of ICU.
1926  // Note that whether `std::u16string_view::const_iterator` is a pointer or a
1927  // class that models contiguous_iterator is platform-dependent.
1928  using unspecified_iterator = std::u16string_view::const_iterator;
1929  using unspecified_reverse_iterator = std::u16string_view::const_reverse_iterator;
1930 
1931 public:
1937  unspecified_iterator begin() const { return std::u16string_view(*this).begin(); }
1943  unspecified_iterator end() const { return std::u16string_view(*this).end(); }
1949  unspecified_reverse_iterator rbegin() const { return std::u16string_view(*this).rbegin(); }
1955  unspecified_reverse_iterator rend() const { return std::u16string_view(*this).rend(); }
1956 #endif // U_HIDE_DRAFT_API
1957 
1958  //========================================
1959  // Write operations
1960  //========================================
1961 
1962  /* Assignment operations */
1963 
1983 
2010 
2021  template<typename S, typename = std::enable_if_t<ConvertibleToU16StringView<S>>>
2022  inline UnicodeString &operator=(const S &src) {
2023  unBogus();
2024  return doReplace(0, length(), internal::toU16StringView(src));
2025  }
2026 
2036 
2042  void swap(UnicodeString &other) noexcept;
2043 
2050  friend inline void U_EXPORT2
2051  swap(UnicodeString &s1, UnicodeString &s2) noexcept {
2052  s1.swap(s2);
2053  }
2054 
2062  inline UnicodeString& operator= (char16_t ch);
2063 
2071  inline UnicodeString& operator= (UChar32 ch);
2072 
2084  inline UnicodeString& setTo(const UnicodeString& srcText,
2085  int32_t srcStart);
2086 
2100  inline UnicodeString& setTo(const UnicodeString& srcText,
2101  int32_t srcStart,
2102  int32_t srcLength);
2103 
2112  inline UnicodeString& setTo(const UnicodeString& srcText);
2113 
2122  inline UnicodeString& setTo(const char16_t *srcChars,
2123  int32_t srcLength);
2124 
2133  inline UnicodeString& setTo(char16_t srcChar);
2134 
2143  inline UnicodeString& setTo(UChar32 srcChar);
2144 
2168  UnicodeString &setTo(UBool isTerminated,
2169  ConstChar16Ptr text,
2170  int32_t textLength);
2171 
2191  UnicodeString &setTo(char16_t *buffer,
2192  int32_t buffLength,
2193  int32_t buffCapacity);
2194 
2234  void setToBogus();
2235 
2243  UnicodeString& setCharAt(int32_t offset,
2244  char16_t ch);
2245 
2246 
2247  /* Append operations */
2248 
2256  inline UnicodeString& operator+= (char16_t ch);
2257 
2265  inline UnicodeString& operator+= (UChar32 ch);
2266 
2274  inline UnicodeString& operator+= (const UnicodeString& srcText);
2275 
2286  template<typename S, typename = std::enable_if_t<ConvertibleToU16StringView<S>>>
2287  inline UnicodeString& operator+=(const S &src) {
2288  return doAppend(internal::toU16StringView(src));
2289  }
2290 
2305  inline UnicodeString& append(const UnicodeString& srcText,
2306  int32_t srcStart,
2307  int32_t srcLength);
2308 
2316  inline UnicodeString& append(const UnicodeString& srcText);
2317 
2331  inline UnicodeString& append(const char16_t *srcChars,
2332  int32_t srcStart,
2333  int32_t srcLength);
2334 
2344  inline UnicodeString& append(ConstChar16Ptr srcChars,
2345  int32_t srcLength);
2346 
2357  template<typename S, typename = std::enable_if_t<ConvertibleToU16StringView<S>>>
2358  inline UnicodeString& append(const S &src) {
2359  return doAppend(internal::toU16StringView(src));
2360  }
2361 
2368  inline UnicodeString& append(char16_t srcChar);
2369 
2377 
2378 #ifndef U_HIDE_DRAFT_API
2386  inline void push_back(char16_t c) { append(c); }
2387 #endif // U_HIDE_DRAFT_API
2388 
2389  /* Insert operations */
2390 
2404  inline UnicodeString& insert(int32_t start,
2405  const UnicodeString& srcText,
2406  int32_t srcStart,
2407  int32_t srcLength);
2408 
2417  inline UnicodeString& insert(int32_t start,
2418  const UnicodeString& srcText);
2419 
2433  inline UnicodeString& insert(int32_t start,
2434  const char16_t *srcChars,
2435  int32_t srcStart,
2436  int32_t srcLength);
2437 
2447  inline UnicodeString& insert(int32_t start,
2448  ConstChar16Ptr srcChars,
2449  int32_t srcLength);
2450 
2459  inline UnicodeString& insert(int32_t start,
2460  char16_t srcChar);
2461 
2470  inline UnicodeString& insert(int32_t start,
2471  UChar32 srcChar);
2472 
2473 
2474  /* Replace operations */
2475 
2493  inline UnicodeString& replace(int32_t start,
2494  int32_t length,
2495  const UnicodeString& srcText,
2496  int32_t srcStart,
2497  int32_t srcLength);
2498 
2511  inline UnicodeString& replace(int32_t start,
2512  int32_t length,
2513  const UnicodeString& srcText);
2514 
2532  inline UnicodeString& replace(int32_t start,
2533  int32_t length,
2534  const char16_t *srcChars,
2535  int32_t srcStart,
2536  int32_t srcLength);
2537 
2550  inline UnicodeString& replace(int32_t start,
2551  int32_t length,
2552  ConstChar16Ptr srcChars,
2553  int32_t srcLength);
2554 
2566  inline UnicodeString& replace(int32_t start,
2567  int32_t length,
2568  char16_t srcChar);
2569 
2581  UnicodeString& replace(int32_t start, int32_t length, UChar32 srcChar);
2582 
2592  inline UnicodeString& replaceBetween(int32_t start,
2593  int32_t limit,
2594  const UnicodeString& srcText);
2595 
2610  inline UnicodeString& replaceBetween(int32_t start,
2611  int32_t limit,
2612  const UnicodeString& srcText,
2613  int32_t srcStart,
2614  int32_t srcLimit);
2615 
2623  virtual void handleReplaceBetween(int32_t start,
2624  int32_t limit,
2625  const UnicodeString& text) override;
2626 
2632  virtual UBool hasMetaData() const override;
2633 
2647  virtual void copy(int32_t start, int32_t limit, int32_t dest) override;
2648 
2649  /* Search and replace operations */
2650 
2659  inline UnicodeString& findAndReplace(const UnicodeString& oldText,
2660  const UnicodeString& newText);
2661 
2673  inline UnicodeString& findAndReplace(int32_t start,
2674  int32_t length,
2675  const UnicodeString& oldText,
2676  const UnicodeString& newText);
2677 
2696  int32_t length,
2697  const UnicodeString& oldText,
2698  int32_t oldStart,
2699  int32_t oldLength,
2700  const UnicodeString& newText,
2701  int32_t newStart,
2702  int32_t newLength);
2703 
2704 
2705  /* Remove operations */
2706 
2715  inline UnicodeString& remove();
2716 
2725  inline UnicodeString& remove(int32_t start,
2726  int32_t length = static_cast<int32_t>(INT32_MAX));
2727 
2736  inline UnicodeString& removeBetween(int32_t start,
2737  int32_t limit = static_cast<int32_t>(INT32_MAX));
2738 
2748  inline UnicodeString &retainBetween(int32_t start, int32_t limit = INT32_MAX);
2749 
2750  /* Length operations */
2751 
2763  UBool padLeading(int32_t targetLength,
2764  char16_t padChar = 0x0020);
2765 
2777  UBool padTrailing(int32_t targetLength,
2778  char16_t padChar = 0x0020);
2779 
2786  inline UBool truncate(int32_t targetLength);
2787 
2794 
2795  /* Miscellaneous operations */
2796 
2802  inline UnicodeString& reverse();
2803 
2812  inline UnicodeString& reverse(int32_t start,
2813  int32_t length);
2814 
2822 
2830  UnicodeString& toUpper(const Locale& locale);
2831 
2839 
2847  UnicodeString& toLower(const Locale& locale);
2848 
2849 #if !UCONFIG_NO_BREAK_ITERATION
2850 
2878 
2906  UnicodeString &toTitle(BreakIterator *titleIter, const Locale &locale);
2907 
2938  UnicodeString &toTitle(BreakIterator *titleIter, const Locale &locale, uint32_t options);
2939 
2940 #endif
2941 
2955  UnicodeString &foldCase(uint32_t options=0 /*U_FOLD_CASE_DEFAULT*/);
2956 
2957  //========================================
2958  // Access to the internal buffer
2959  //========================================
2960 
3004  char16_t *getBuffer(int32_t minCapacity);
3005 
3026  void releaseBuffer(int32_t newLength=-1);
3027 
3058  inline const char16_t *getBuffer() const;
3059 
3093  const char16_t *getTerminatedBuffer();
3094 
3101  inline operator std::u16string_view() const {
3102  return {getBuffer(), static_cast<std::u16string_view::size_type>(length())};
3103  }
3104 
3105 #if U_SIZEOF_WCHAR_T==2 || defined(U_IN_DOXYGEN)
3115  inline operator std::wstring_view() const {
3116  const char16_t *p = getBuffer();
3117 #ifdef U_ALIASING_BARRIER
3118  U_ALIASING_BARRIER(p);
3119 #endif
3120  return { reinterpret_cast<const wchar_t *>(p), (std::wstring_view::size_type)length() };
3121  }
3122 #endif // U_SIZEOF_WCHAR_T
3123 
3124  //========================================
3125  // Constructors
3126  //========================================
3127 
3131  inline UnicodeString();
3132 
3144  UnicodeString(int32_t capacity, UChar32 c, int32_t count);
3145 
3156 
3167 
3168 #ifdef U_HIDE_DRAFT_API
3188  UNISTR_FROM_STRING_EXPLICIT UnicodeString(const char16_t *text) :
3189  UnicodeString(text, -1) {}
3190 #endif // U_HIDE_DRAFT_API
3191 
3192 #if !U_CHAR16_IS_TYPEDEF && \
3193  (defined(U_HIDE_DRAFT_API) || (defined(_LIBCPP_VERSION) && _LIBCPP_VERSION >= 180000))
3213  UNISTR_FROM_STRING_EXPLICIT UnicodeString(const uint16_t *text) :
3214  UnicodeString(ConstChar16Ptr(text), -1) {}
3215 #endif
3216 
3217 #if defined(U_HIDE_DRAFT_API) && (U_SIZEOF_WCHAR_T==2 || defined(U_IN_DOXYGEN))
3238  UNISTR_FROM_STRING_EXPLICIT UnicodeString(const wchar_t *text) :
3239  UnicodeString(ConstChar16Ptr(text), -1) {}
3240 #endif
3241 
3252  UNISTR_FROM_STRING_EXPLICIT inline UnicodeString(const std::nullptr_t text);
3253 
3270  UnicodeString(const char16_t *text,
3271  int32_t textLength);
3272 
3273 #if !U_CHAR16_IS_TYPEDEF
3290  UnicodeString(const uint16_t *text, int32_t textLength) :
3291  UnicodeString(ConstChar16Ptr(text), textLength) {}
3292 #endif
3293 
3294 #if U_SIZEOF_WCHAR_T==2 || defined(U_IN_DOXYGEN)
3312  UnicodeString(const wchar_t *text, int32_t textLength) :
3313  UnicodeString(ConstChar16Ptr(text), textLength) {}
3314 #endif
3315 
3323  inline UnicodeString(const std::nullptr_t text, int32_t textLength);
3324 
3337  template<typename S, typename = std::enable_if_t<ConvertibleToU16StringView<S>>>
3339  fUnion.fFields.fLengthAndFlags = kShortString;
3340  doAppend(internal::toU16StringViewNullable(text));
3341  }
3342 
3373  UnicodeString(UBool isTerminated,
3374  ConstChar16Ptr text,
3375  int32_t textLength);
3376 
3395  UnicodeString(char16_t *buffer, int32_t buffLength, int32_t buffCapacity);
3396 
3397 #if !U_CHAR16_IS_TYPEDEF
3406  UnicodeString(uint16_t *buffer, int32_t buffLength, int32_t buffCapacity) :
3407  UnicodeString(Char16Ptr(buffer), buffLength, buffCapacity) {}
3408 #endif
3409 
3410 #if U_SIZEOF_WCHAR_T==2 || defined(U_IN_DOXYGEN)
3420  UnicodeString(wchar_t *buffer, int32_t buffLength, int32_t buffCapacity) :
3421  UnicodeString(Char16Ptr(buffer), buffLength, buffCapacity) {}
3422 #endif
3423 
3432  inline UnicodeString(std::nullptr_t buffer, int32_t buffLength, int32_t buffCapacity);
3433 
3434 #if U_CHARSET_IS_UTF8 || !UCONFIG_NO_CONVERSION
3435 
3459  UNISTR_FROM_STRING_EXPLICIT UnicodeString(const char *codepageData);
3460 
3469  UnicodeString(const char *codepageData, int32_t dataLength);
3470 
3471 #endif
3472 
3473 #if !UCONFIG_NO_CONVERSION
3474 
3492  UnicodeString(const char *codepageData, const char *codepage);
3493 
3511  UnicodeString(const char *codepageData, int32_t dataLength, const char *codepage);
3512 
3535  const char *src, int32_t srcLength,
3536  UConverter *cnv,
3537  UErrorCode &errorCode);
3538 
3539 #endif
3540 
3573  UnicodeString(const char *src, int32_t textLength, enum EInvariant inv);
3574 
3575 
3593 
3600  UnicodeString(UnicodeString &&src) noexcept;
3601 
3608  UnicodeString(const UnicodeString& src, int32_t srcStart);
3609 
3617  UnicodeString(const UnicodeString& src, int32_t srcStart, int32_t srcLength);
3618 
3632  virtual UnicodeString *clone() const override;
3633 
3637  virtual ~UnicodeString();
3638 
3661  template<typename S, typename = std::enable_if_t<ConvertibleToU16StringView<S>>>
3662  static inline UnicodeString readOnlyAlias(const S &text) {
3663  return readOnlyAliasFromU16StringView(internal::toU16StringView(text));
3664  }
3665 
3685  static inline UnicodeString readOnlyAlias(const UnicodeString &text) {
3686  return readOnlyAliasFromUnicodeString(text);
3687  }
3688 
3703 
3715  static UnicodeString fromUTF32(const UChar32 *utf32, int32_t length);
3716 
3717  /* Miscellaneous operations */
3718 
3754 
3774  UChar32 unescapeAt(int32_t &offset) const;
3775 
3781  static UClassID U_EXPORT2 getStaticClassID();
3782 
3788  virtual UClassID getDynamicClassID() const override;
3789 
3790  //========================================
3791  // Implementation methods
3792  //========================================
3793 
3794 protected:
3799  virtual int32_t getLength() const override;
3800 
3806  virtual char16_t getCharAt(int32_t offset) const override;
3807 
3813  virtual UChar32 getChar32At(int32_t offset) const override;
3814 
3815 private:
3816  static UnicodeString readOnlyAliasFromU16StringView(std::u16string_view text);
3817  static UnicodeString readOnlyAliasFromUnicodeString(const UnicodeString &text);
3818 
3819  // For char* constructors. Could be made public.
3820  UnicodeString &setToUTF8(StringPiece utf8);
3821  // For extract(char*).
3822  // We could make a toUTF8(target, capacity, errorCode) public but not
3823  // this version: New API will be cleaner if we make callers create substrings
3824  // rather than having start+length on every method,
3825  // and it should take a UErrorCode&.
3826  int32_t
3827  toUTF8(int32_t start, int32_t len,
3828  char *target, int32_t capacity) const;
3829 
3834  inline UBool doEquals(const UnicodeString &text, int32_t len) const {
3835  return doEquals(text.getArrayStart(), len);
3836  }
3837  UBool doEquals(const char16_t *text, int32_t len) const;
3838 
3839  inline UBool
3840  doEqualsSubstring(int32_t start,
3841  int32_t length,
3842  const UnicodeString& srcText,
3843  int32_t srcStart,
3844  int32_t srcLength) const;
3845 
3846  UBool doEqualsSubstring(int32_t start,
3847  int32_t length,
3848  const char16_t *srcChars,
3849  int32_t srcStart,
3850  int32_t srcLength) const;
3851 
3852  inline int8_t
3853  doCompare(int32_t start,
3854  int32_t length,
3855  const UnicodeString& srcText,
3856  int32_t srcStart,
3857  int32_t srcLength) const;
3858 
3859  int8_t doCompare(int32_t start,
3860  int32_t length,
3861  const char16_t *srcChars,
3862  int32_t srcStart,
3863  int32_t srcLength) const;
3864 
3865  inline int8_t
3866  doCompareCodePointOrder(int32_t start,
3867  int32_t length,
3868  const UnicodeString& srcText,
3869  int32_t srcStart,
3870  int32_t srcLength) const;
3871 
3872  int8_t doCompareCodePointOrder(int32_t start,
3873  int32_t length,
3874  const char16_t *srcChars,
3875  int32_t srcStart,
3876  int32_t srcLength) const;
3877 
3878  inline int8_t
3879  doCaseCompare(int32_t start,
3880  int32_t length,
3881  const UnicodeString &srcText,
3882  int32_t srcStart,
3883  int32_t srcLength,
3884  uint32_t options) const;
3885 
3886  int8_t
3887  doCaseCompare(int32_t start,
3888  int32_t length,
3889  const char16_t *srcChars,
3890  int32_t srcStart,
3891  int32_t srcLength,
3892  uint32_t options) const;
3893 
3894  int32_t doIndexOf(char16_t c,
3895  int32_t start,
3896  int32_t length) const;
3897 
3898  int32_t doIndexOf(UChar32 c,
3899  int32_t start,
3900  int32_t length) const;
3901 
3902  int32_t doLastIndexOf(char16_t c,
3903  int32_t start,
3904  int32_t length) const;
3905 
3906  int32_t doLastIndexOf(UChar32 c,
3907  int32_t start,
3908  int32_t length) const;
3909 
3910  void doExtract(int32_t start,
3911  int32_t length,
3912  char16_t *dst,
3913  int32_t dstStart) const;
3914 
3915  inline void doExtract(int32_t start,
3916  int32_t length,
3917  UnicodeString& target) const;
3918 
3919  inline char16_t doCharAt(int32_t offset) const;
3920 
3921  UnicodeString& doReplace(int32_t start,
3922  int32_t length,
3923  const UnicodeString& srcText,
3924  int32_t srcStart,
3925  int32_t srcLength);
3926 
3927  UnicodeString& doReplace(int32_t start,
3928  int32_t length,
3929  const char16_t *srcChars,
3930  int32_t srcStart,
3931  int32_t srcLength);
3932  UnicodeString& doReplace(int32_t start, int32_t length, std::u16string_view src);
3933 
3934  UnicodeString& doAppend(const UnicodeString& src, int32_t srcStart, int32_t srcLength);
3935  UnicodeString& doAppend(const char16_t *srcChars, int32_t srcStart, int32_t srcLength);
3936  UnicodeString& doAppend(std::u16string_view src);
3937 
3938  UnicodeString& doReverse(int32_t start,
3939  int32_t length);
3940 
3941  // calculate hash code
3942  int32_t doHashCode() const;
3943 
3944  // get pointer to start of array
3945  // these do not check for kOpenGetBuffer, unlike the public getBuffer() function
3946  inline char16_t* getArrayStart();
3947  inline const char16_t* getArrayStart() const;
3948 
3949  inline UBool hasShortLength() const;
3950  inline int32_t getShortLength() const;
3951 
3952  // A UnicodeString object (not necessarily its current buffer)
3953  // is writable unless it isBogus() or it has an "open" getBuffer(minCapacity).
3954  inline UBool isWritable() const;
3955 
3956  // Is the current buffer writable?
3957  inline UBool isBufferWritable() const;
3958 
3959  // None of the following does releaseArray().
3960  inline void setZeroLength();
3961  inline void setShortLength(int32_t len);
3962  inline void setLength(int32_t len);
3963  inline void setToEmpty();
3964  inline void setArray(char16_t *array, int32_t len, int32_t capacity); // sets length but not flags
3965 
3966  // allocate the array; result may be the stack buffer
3967  // sets refCount to 1 if appropriate
3968  // sets fArray, fCapacity, and flags
3969  // sets length to 0
3970  // returns boolean for success or failure
3971  UBool allocate(int32_t capacity);
3972 
3973  // release the array if owned
3974  void releaseArray();
3975 
3976  // turn a bogus string into an empty one
3977  void unBogus();
3978 
3979  // implements assignment operator, copy constructor, and fastCopyFrom()
3980  UnicodeString &copyFrom(const UnicodeString &src, UBool fastCopy=false);
3981 
3982  // Copies just the fields without memory management.
3983  void copyFieldsFrom(UnicodeString &src, UBool setSrcToBogus) noexcept;
3984 
3985  // Pin start and limit to acceptable values.
3986  inline void pinIndex(int32_t& start) const;
3987  inline void pinIndices(int32_t& start,
3988  int32_t& length) const;
3989 
3990 #if !UCONFIG_NO_CONVERSION
3991 
3992  /* Internal extract() using UConverter. */
3993  int32_t doExtract(int32_t start, int32_t length,
3994  char *dest, int32_t destCapacity,
3995  UConverter *cnv,
3996  UErrorCode &errorCode) const;
3997 
3998  /*
3999  * Real constructor for converting from codepage data.
4000  * It assumes that it is called with !fRefCounted.
4001  *
4002  * If `codepage==0`, then the default converter
4003  * is used for the platform encoding.
4004  * If `codepage` is an empty string (`""`),
4005  * then a simple conversion is performed on the codepage-invariant
4006  * subset ("invariant characters") of the platform encoding. See utypes.h.
4007  */
4008  void doCodepageCreate(const char *codepageData,
4009  int32_t dataLength,
4010  const char *codepage);
4011 
4012  /*
4013  * Worker function for creating a UnicodeString from
4014  * a codepage string using a UConverter.
4015  */
4016  void
4017  doCodepageCreate(const char *codepageData,
4018  int32_t dataLength,
4019  UConverter *converter,
4020  UErrorCode &status);
4021 
4022 #endif
4023 
4024  /*
4025  * This function is called when write access to the array
4026  * is necessary.
4027  *
4028  * We need to make a copy of the array if
4029  * the buffer is read-only, or
4030  * the buffer is refCounted (shared), and refCount>1, or
4031  * the buffer is too small.
4032  *
4033  * Return false if memory could not be allocated.
4034  */
4035  UBool cloneArrayIfNeeded(int32_t newCapacity = -1,
4036  int32_t growCapacity = -1,
4037  UBool doCopyArray = true,
4038  int32_t** pBufferToDelete = nullptr,
4039  UBool forceClone = false);
4040 
4046  UnicodeString &
4047  caseMap(int32_t caseLocale, uint32_t options,
4049  BreakIterator *iter,
4050 #endif
4051  UStringCaseMapper *stringCaseMapper);
4052 
4053  // ref counting
4054  void addRef();
4055  int32_t removeRef();
4056  int32_t refCount() const;
4057 
4058  // constants
4059  enum {
4065  US_STACKBUF_SIZE = static_cast<int32_t>(UNISTR_OBJECT_SIZE - sizeof(void*) - 2) / U_SIZEOF_UCHAR,
4066  kInvalidUChar=0xffff, // U+FFFF returned by charAt(invalid index)
4067  kInvalidHashCode=0, // invalid hash code
4068  kEmptyHashCode=1, // hash code for empty string
4069 
4070  // bit flag values for fLengthAndFlags
4071  kIsBogus=1, // this string is bogus, i.e., not valid or nullptr
4072  kUsingStackBuffer=2,// using fUnion.fStackFields instead of fUnion.fFields
4073  kRefCounted=4, // there is a refCount field before the characters in fArray
4074  kBufferIsReadonly=8,// do not write to this buffer
4075  kOpenGetBuffer=16, // getBuffer(minCapacity) was called (is "open"),
4076  // and releaseBuffer(newLength) must be called
4077  kAllStorageFlags=0x1f,
4078 
4079  kLengthShift=5, // remaining 11 bits for non-negative short length, or negative if long
4080  kLength1=1<<kLengthShift,
4081  kMaxShortLength=0x3ff, // max non-negative short length (leaves top bit 0)
4082  kLengthIsLarge=0xffe0, // short length < 0, real length is in fUnion.fFields.fLength
4083 
4084  // combined values for convenience
4085  kShortString=kUsingStackBuffer,
4086  kLongString=kRefCounted,
4087  kReadonlyAlias=kBufferIsReadonly,
4088  kWritableAlias=0
4089  };
4090 
4091  friend class UnicodeStringAppendable;
4092 
4093  union StackBufferOrFields; // forward declaration necessary before friend declaration
4094  friend union StackBufferOrFields; // make US_STACKBUF_SIZE visible inside fUnion
4095 
4096  /*
4097  * The following are all the class fields that are stored
4098  * in each UnicodeString object.
4099  * Note that UnicodeString has virtual functions,
4100  * therefore there is an implicit vtable pointer
4101  * as the first real field.
4102  * The fields should be aligned such that no padding is necessary.
4103  * On 32-bit machines, the size should be 32 bytes,
4104  * on 64-bit machines (8-byte pointers), it should be 40 bytes.
4105  *
4106  * We use a hack to achieve this.
4107  *
4108  * With at least some compilers, each of the following is forced to
4109  * a multiple of sizeof(pointer) [the largest field base unit here is a data pointer],
4110  * rounded up with additional padding if the fields do not already fit that requirement:
4111  * - sizeof(class UnicodeString)
4112  * - offsetof(UnicodeString, fUnion)
4113  * - sizeof(fUnion)
4114  * - sizeof(fStackFields)
4115  *
4116  * We optimize for the longest possible internal buffer for short strings.
4117  * fUnion.fStackFields begins with 2 bytes for storage flags
4118  * and the length of relatively short strings,
4119  * followed by the buffer for short string contents.
4120  * There is no padding inside fStackFields.
4121  *
4122  * Heap-allocated and aliased strings use fUnion.fFields.
4123  * Both fStackFields and fFields must begin with the same fields for flags and short length,
4124  * that is, those must have the same memory offsets inside the object,
4125  * because the flags must be inspected in order to decide which half of fUnion is being used.
4126  * We assume that the compiler does not reorder the fields.
4127  *
4128  * (Padding at the end of fFields is ok:
4129  * As long as it is no larger than fStackFields, it is not wasted space.)
4130  *
4131  * For some of the history of the UnicodeString class fields layout, see
4132  * - ICU ticket #11551 "longer UnicodeString contents in stack buffer"
4133  * - ICU ticket #11336 "UnicodeString: recombine stack buffer arrays"
4134  * - ICU ticket #8322 "why is sizeof(UnicodeString)==48?"
4135  */
4136  // (implicit) *vtable;
4137  union StackBufferOrFields {
4138  // fStackFields is used iff (fLengthAndFlags&kUsingStackBuffer) else fFields is used.
4139  // Each struct of the union must begin with fLengthAndFlags.
4140  struct {
4141  int16_t fLengthAndFlags; // bit fields: see constants above
4142  char16_t fBuffer[US_STACKBUF_SIZE]; // buffer for short strings
4143  } fStackFields;
4144  struct {
4145  int16_t fLengthAndFlags; // bit fields: see constants above
4146  int32_t fLength; // number of characters in fArray if >127; else undefined
4147  int32_t fCapacity; // capacity of fArray (in char16_ts)
4148  // array pointer last to minimize padding for machines with P128 data model
4149  // or pointer sizes that are not a power of 2
4150  char16_t *fArray; // the Unicode data
4151  } fFields;
4152  } fUnion;
4153 };
4154 
4163 U_COMMON_API UnicodeString U_EXPORT2
4165 
4176 template<
4177  typename US, typename S,
4178  typename = std::enable_if_t<ConvertibleToU16StringView<S> && std::is_same_v<US, UnicodeString>>>
4179 inline UnicodeString operator+(const US &s1, const S &s2) {
4180  return unistr_internalConcat(s1, internal::toU16StringView(s2));
4181 }
4182 
4183 #ifndef U_FORCE_HIDE_INTERNAL_API
4185 U_COMMON_API UnicodeString U_EXPORT2
4186 unistr_internalConcat(const UnicodeString &s1, std::u16string_view s2);
4187 #endif
4188 
4189 //========================================
4190 // Inline members
4191 //========================================
4192 
4193 //========================================
4194 // Privates
4195 //========================================
4196 
4197 inline void
4198 UnicodeString::pinIndex(int32_t& start) const
4199 {
4200  // pin index
4201  if(start < 0) {
4202  start = 0;
4203  } else if(start > length()) {
4204  start = length();
4205  }
4206 }
4207 
4208 inline void
4209 UnicodeString::pinIndices(int32_t& start,
4210  int32_t& _length) const
4211 {
4212  // pin indices
4213  int32_t len = length();
4214  if(start < 0) {
4215  start = 0;
4216  } else if(start > len) {
4217  start = len;
4218  }
4219  if(_length < 0) {
4220  _length = 0;
4221  } else if(_length > (len - start)) {
4222  _length = (len - start);
4223  }
4224 }
4225 
4226 inline char16_t*
4227 UnicodeString::getArrayStart() {
4228  return (fUnion.fFields.fLengthAndFlags&kUsingStackBuffer) ?
4229  fUnion.fStackFields.fBuffer : fUnion.fFields.fArray;
4230 }
4231 
4232 inline const char16_t*
4233 UnicodeString::getArrayStart() const {
4234  return (fUnion.fFields.fLengthAndFlags&kUsingStackBuffer) ?
4235  fUnion.fStackFields.fBuffer : fUnion.fFields.fArray;
4236 }
4237 
4238 //========================================
4239 // Default constructor
4240 //========================================
4241 
4242 inline
4243 UnicodeString::UnicodeString() {
4244  fUnion.fStackFields.fLengthAndFlags=kShortString;
4245 }
4246 
4247 inline UnicodeString::UnicodeString(const std::nullptr_t /*text*/) {
4248  fUnion.fStackFields.fLengthAndFlags=kShortString;
4249 }
4250 
4251 inline UnicodeString::UnicodeString(const std::nullptr_t /*text*/, int32_t /*length*/) {
4252  fUnion.fStackFields.fLengthAndFlags=kShortString;
4253 }
4254 
4255 inline UnicodeString::UnicodeString(std::nullptr_t /*buffer*/, int32_t /*buffLength*/, int32_t /*buffCapacity*/) {
4256  fUnion.fStackFields.fLengthAndFlags=kShortString;
4257 }
4258 
4259 //========================================
4260 // Read-only implementation methods
4261 //========================================
4262 inline UBool
4263 UnicodeString::hasShortLength() const {
4264  return fUnion.fFields.fLengthAndFlags>=0;
4265 }
4266 
4267 inline int32_t
4268 UnicodeString::getShortLength() const {
4269  // fLengthAndFlags must be non-negative -> short length >= 0
4270  // and arithmetic or logical shift does not matter.
4271  return fUnion.fFields.fLengthAndFlags>>kLengthShift;
4272 }
4273 
4274 inline int32_t
4275 UnicodeString::length() const {
4276  return hasShortLength() ? getShortLength() : fUnion.fFields.fLength;
4277 }
4278 
4279 inline int32_t
4280 UnicodeString::getCapacity() const {
4281  return (fUnion.fFields.fLengthAndFlags&kUsingStackBuffer) ?
4282  US_STACKBUF_SIZE : fUnion.fFields.fCapacity;
4283 }
4284 
4285 inline int32_t
4286 UnicodeString::hashCode() const
4287 { return doHashCode(); }
4288 
4289 inline UBool
4290 UnicodeString::isBogus() const
4291 { return fUnion.fFields.fLengthAndFlags & kIsBogus; }
4292 
4293 inline UBool
4294 UnicodeString::isWritable() const
4295 { return !(fUnion.fFields.fLengthAndFlags & (kOpenGetBuffer | kIsBogus)); }
4296 
4297 inline UBool
4298 UnicodeString::isBufferWritable() const
4299 {
4300  return
4301  !(fUnion.fFields.fLengthAndFlags&(kOpenGetBuffer|kIsBogus|kBufferIsReadonly)) &&
4302  (!(fUnion.fFields.fLengthAndFlags&kRefCounted) || refCount()==1);
4303 }
4304 
4305 inline const char16_t *
4306 UnicodeString::getBuffer() const {
4307  if(fUnion.fFields.fLengthAndFlags&(kIsBogus|kOpenGetBuffer)) {
4308  return nullptr;
4309  } else if(fUnion.fFields.fLengthAndFlags&kUsingStackBuffer) {
4310  return fUnion.fStackFields.fBuffer;
4311  } else {
4312  return fUnion.fFields.fArray;
4313  }
4314 }
4315 
4316 //========================================
4317 // Read-only alias methods
4318 //========================================
4319 inline int8_t
4320 UnicodeString::doCompare(int32_t start,
4321  int32_t thisLength,
4322  const UnicodeString& srcText,
4323  int32_t srcStart,
4324  int32_t srcLength) const
4325 {
4326  if(srcText.isBogus()) {
4327  return static_cast<int8_t>(!isBogus()); // 0 if both are bogus, 1 otherwise
4328  } else {
4329  srcText.pinIndices(srcStart, srcLength);
4330  return doCompare(start, thisLength, srcText.getArrayStart(), srcStart, srcLength);
4331  }
4332 }
4333 
4334 inline UBool
4335 UnicodeString::doEqualsSubstring(int32_t start,
4336  int32_t thisLength,
4337  const UnicodeString& srcText,
4338  int32_t srcStart,
4339  int32_t srcLength) const
4340 {
4341  if(srcText.isBogus()) {
4342  return isBogus();
4343  } else {
4344  srcText.pinIndices(srcStart, srcLength);
4345  return !isBogus() && doEqualsSubstring(start, thisLength, srcText.getArrayStart(), srcStart, srcLength);
4346  }
4347 }
4348 
4349 inline bool
4351 {
4352  if(isBogus()) {
4353  return text.isBogus();
4354  } else {
4355  int32_t len = length(), textLength = text.length();
4356  return !text.isBogus() && len == textLength && doEquals(text, len);
4357  }
4358 }
4359 
4360 inline bool
4362 { return (! operator==(text)); }
4363 
4364 inline UBool
4365 UnicodeString::operator> (const UnicodeString& text) const
4366 { return doCompare(0, length(), text, 0, text.length()) == 1; }
4367 
4368 inline UBool
4369 UnicodeString::operator< (const UnicodeString& text) const
4370 { return doCompare(0, length(), text, 0, text.length()) == -1; }
4371 
4372 inline UBool
4373 UnicodeString::operator>= (const UnicodeString& text) const
4374 { return doCompare(0, length(), text, 0, text.length()) != -1; }
4375 
4376 inline UBool
4377 UnicodeString::operator<= (const UnicodeString& text) const
4378 { return doCompare(0, length(), text, 0, text.length()) != 1; }
4379 
4380 inline int8_t
4381 UnicodeString::compare(const UnicodeString& text) const
4382 { return doCompare(0, length(), text, 0, text.length()); }
4383 
4384 inline int8_t
4385 UnicodeString::compare(int32_t start,
4386  int32_t _length,
4387  const UnicodeString& srcText) const
4388 { return doCompare(start, _length, srcText, 0, srcText.length()); }
4389 
4390 inline int8_t
4391 UnicodeString::compare(ConstChar16Ptr srcChars,
4392  int32_t srcLength) const
4393 { return doCompare(0, length(), srcChars, 0, srcLength); }
4394 
4395 inline int8_t
4396 UnicodeString::compare(int32_t start,
4397  int32_t _length,
4398  const UnicodeString& srcText,
4399  int32_t srcStart,
4400  int32_t srcLength) const
4401 { return doCompare(start, _length, srcText, srcStart, srcLength); }
4402 
4403 inline int8_t
4404 UnicodeString::compare(int32_t start,
4405  int32_t _length,
4406  const char16_t *srcChars) const
4407 { return doCompare(start, _length, srcChars, 0, _length); }
4408 
4409 inline int8_t
4410 UnicodeString::compare(int32_t start,
4411  int32_t _length,
4412  const char16_t *srcChars,
4413  int32_t srcStart,
4414  int32_t srcLength) const
4415 { return doCompare(start, _length, srcChars, srcStart, srcLength); }
4416 
4417 inline int8_t
4418 UnicodeString::compareBetween(int32_t start,
4419  int32_t limit,
4420  const UnicodeString& srcText,
4421  int32_t srcStart,
4422  int32_t srcLimit) const
4423 { return doCompare(start, limit - start,
4424  srcText, srcStart, srcLimit - srcStart); }
4425 
4426 inline int8_t
4427 UnicodeString::doCompareCodePointOrder(int32_t start,
4428  int32_t thisLength,
4429  const UnicodeString& srcText,
4430  int32_t srcStart,
4431  int32_t srcLength) const
4432 {
4433  if(srcText.isBogus()) {
4434  return static_cast<int8_t>(!isBogus()); // 0 if both are bogus, 1 otherwise
4435  } else {
4436  srcText.pinIndices(srcStart, srcLength);
4437  return doCompareCodePointOrder(start, thisLength, srcText.getArrayStart(), srcStart, srcLength);
4438  }
4439 }
4440 
4441 inline int8_t
4442 UnicodeString::compareCodePointOrder(const UnicodeString& text) const
4443 { return doCompareCodePointOrder(0, length(), text, 0, text.length()); }
4444 
4445 inline int8_t
4446 UnicodeString::compareCodePointOrder(int32_t start,
4447  int32_t _length,
4448  const UnicodeString& srcText) const
4449 { return doCompareCodePointOrder(start, _length, srcText, 0, srcText.length()); }
4450 
4451 inline int8_t
4452 UnicodeString::compareCodePointOrder(ConstChar16Ptr srcChars,
4453  int32_t srcLength) const
4454 { return doCompareCodePointOrder(0, length(), srcChars, 0, srcLength); }
4455 
4456 inline int8_t
4457 UnicodeString::compareCodePointOrder(int32_t start,
4458  int32_t _length,
4459  const UnicodeString& srcText,
4460  int32_t srcStart,
4461  int32_t srcLength) const
4462 { return doCompareCodePointOrder(start, _length, srcText, srcStart, srcLength); }
4463 
4464 inline int8_t
4465 UnicodeString::compareCodePointOrder(int32_t start,
4466  int32_t _length,
4467  const char16_t *srcChars) const
4468 { return doCompareCodePointOrder(start, _length, srcChars, 0, _length); }
4469 
4470 inline int8_t
4471 UnicodeString::compareCodePointOrder(int32_t start,
4472  int32_t _length,
4473  const char16_t *srcChars,
4474  int32_t srcStart,
4475  int32_t srcLength) const
4476 { return doCompareCodePointOrder(start, _length, srcChars, srcStart, srcLength); }
4477 
4478 inline int8_t
4479 UnicodeString::compareCodePointOrderBetween(int32_t start,
4480  int32_t limit,
4481  const UnicodeString& srcText,
4482  int32_t srcStart,
4483  int32_t srcLimit) const
4484 { return doCompareCodePointOrder(start, limit - start,
4485  srcText, srcStart, srcLimit - srcStart); }
4486 
4487 inline int8_t
4488 UnicodeString::doCaseCompare(int32_t start,
4489  int32_t thisLength,
4490  const UnicodeString &srcText,
4491  int32_t srcStart,
4492  int32_t srcLength,
4493  uint32_t options) const
4494 {
4495  if(srcText.isBogus()) {
4496  return static_cast<int8_t>(!isBogus()); // 0 if both are bogus, 1 otherwise
4497  } else {
4498  srcText.pinIndices(srcStart, srcLength);
4499  return doCaseCompare(start, thisLength, srcText.getArrayStart(), srcStart, srcLength, options);
4500  }
4501 }
4502 
4503 inline int8_t
4504 UnicodeString::caseCompare(const UnicodeString &text, uint32_t options) const {
4505  return doCaseCompare(0, length(), text, 0, text.length(), options);
4506 }
4507 
4508 inline int8_t
4509 UnicodeString::caseCompare(int32_t start,
4510  int32_t _length,
4511  const UnicodeString &srcText,
4512  uint32_t options) const {
4513  return doCaseCompare(start, _length, srcText, 0, srcText.length(), options);
4514 }
4515 
4516 inline int8_t
4517 UnicodeString::caseCompare(ConstChar16Ptr srcChars,
4518  int32_t srcLength,
4519  uint32_t options) const {
4520  return doCaseCompare(0, length(), srcChars, 0, srcLength, options);
4521 }
4522 
4523 inline int8_t
4524 UnicodeString::caseCompare(int32_t start,
4525  int32_t _length,
4526  const UnicodeString &srcText,
4527  int32_t srcStart,
4528  int32_t srcLength,
4529  uint32_t options) const {
4530  return doCaseCompare(start, _length, srcText, srcStart, srcLength, options);
4531 }
4532 
4533 inline int8_t
4534 UnicodeString::caseCompare(int32_t start,
4535  int32_t _length,
4536  const char16_t *srcChars,
4537  uint32_t options) const {
4538  return doCaseCompare(start, _length, srcChars, 0, _length, options);
4539 }
4540 
4541 inline int8_t
4542 UnicodeString::caseCompare(int32_t start,
4543  int32_t _length,
4544  const char16_t *srcChars,
4545  int32_t srcStart,
4546  int32_t srcLength,
4547  uint32_t options) const {
4548  return doCaseCompare(start, _length, srcChars, srcStart, srcLength, options);
4549 }
4550 
4551 inline int8_t
4552 UnicodeString::caseCompareBetween(int32_t start,
4553  int32_t limit,
4554  const UnicodeString &srcText,
4555  int32_t srcStart,
4556  int32_t srcLimit,
4557  uint32_t options) const {
4558  return doCaseCompare(start, limit - start, srcText, srcStart, srcLimit - srcStart, options);
4559 }
4560 
4561 inline int32_t
4562 UnicodeString::indexOf(const UnicodeString& srcText,
4563  int32_t srcStart,
4564  int32_t srcLength,
4565  int32_t start,
4566  int32_t _length) const
4567 {
4568  if(!srcText.isBogus()) {
4569  srcText.pinIndices(srcStart, srcLength);
4570  if(srcLength > 0) {
4571  return indexOf(srcText.getArrayStart(), srcStart, srcLength, start, _length);
4572  }
4573  }
4574  return -1;
4575 }
4576 
4577 inline int32_t
4578 UnicodeString::indexOf(const UnicodeString& text) const
4579 { return indexOf(text, 0, text.length(), 0, length()); }
4580 
4581 inline int32_t
4582 UnicodeString::indexOf(const UnicodeString& text,
4583  int32_t start) const {
4584  pinIndex(start);
4585  return indexOf(text, 0, text.length(), start, length() - start);
4586 }
4587 
4588 inline int32_t
4589 UnicodeString::indexOf(const UnicodeString& text,
4590  int32_t start,
4591  int32_t _length) const
4592 { return indexOf(text, 0, text.length(), start, _length); }
4593 
4594 inline int32_t
4595 UnicodeString::indexOf(const char16_t *srcChars,
4596  int32_t srcLength,
4597  int32_t start) const {
4598  pinIndex(start);
4599  return indexOf(srcChars, 0, srcLength, start, length() - start);
4600 }
4601 
4602 inline int32_t
4603 UnicodeString::indexOf(ConstChar16Ptr srcChars,
4604  int32_t srcLength,
4605  int32_t start,
4606  int32_t _length) const
4607 { return indexOf(srcChars, 0, srcLength, start, _length); }
4608 
4609 inline int32_t
4610 UnicodeString::indexOf(char16_t c,
4611  int32_t start,
4612  int32_t _length) const
4613 { return doIndexOf(c, start, _length); }
4614 
4615 inline int32_t
4616 UnicodeString::indexOf(UChar32 c,
4617  int32_t start,
4618  int32_t _length) const
4619 { return doIndexOf(c, start, _length); }
4620 
4621 inline int32_t
4622 UnicodeString::indexOf(char16_t c) const
4623 { return doIndexOf(c, 0, length()); }
4624 
4625 inline int32_t
4626 UnicodeString::indexOf(UChar32 c) const
4627 { return indexOf(c, 0, length()); }
4628 
4629 inline int32_t
4630 UnicodeString::indexOf(char16_t c,
4631  int32_t start) const {
4632  pinIndex(start);
4633  return doIndexOf(c, start, length() - start);
4634 }
4635 
4636 inline int32_t
4637 UnicodeString::indexOf(UChar32 c,
4638  int32_t start) const {
4639  pinIndex(start);
4640  return indexOf(c, start, length() - start);
4641 }
4642 
4643 inline int32_t
4644 UnicodeString::lastIndexOf(ConstChar16Ptr srcChars,
4645  int32_t srcLength,
4646  int32_t start,
4647  int32_t _length) const
4648 { return lastIndexOf(srcChars, 0, srcLength, start, _length); }
4649 
4650 inline int32_t
4651 UnicodeString::lastIndexOf(const char16_t *srcChars,
4652  int32_t srcLength,
4653  int32_t start) const {
4654  pinIndex(start);
4655  return lastIndexOf(srcChars, 0, srcLength, start, length() - start);
4656 }
4657 
4658 inline int32_t
4659 UnicodeString::lastIndexOf(const UnicodeString& srcText,
4660  int32_t srcStart,
4661  int32_t srcLength,
4662  int32_t start,
4663  int32_t _length) const
4664 {
4665  if(!srcText.isBogus()) {
4666  srcText.pinIndices(srcStart, srcLength);
4667  if(srcLength > 0) {
4668  return lastIndexOf(srcText.getArrayStart(), srcStart, srcLength, start, _length);
4669  }
4670  }
4671  return -1;
4672 }
4673 
4674 inline int32_t
4675 UnicodeString::lastIndexOf(const UnicodeString& text,
4676  int32_t start,
4677  int32_t _length) const
4678 { return lastIndexOf(text, 0, text.length(), start, _length); }
4679 
4680 inline int32_t
4681 UnicodeString::lastIndexOf(const UnicodeString& text,
4682  int32_t start) const {
4683  pinIndex(start);
4684  return lastIndexOf(text, 0, text.length(), start, length() - start);
4685 }
4686 
4687 inline int32_t
4688 UnicodeString::lastIndexOf(const UnicodeString& text) const
4689 { return lastIndexOf(text, 0, text.length(), 0, length()); }
4690 
4691 inline int32_t
4692 UnicodeString::lastIndexOf(char16_t c,
4693  int32_t start,
4694  int32_t _length) const
4695 { return doLastIndexOf(c, start, _length); }
4696 
4697 inline int32_t
4698 UnicodeString::lastIndexOf(UChar32 c,
4699  int32_t start,
4700  int32_t _length) const {
4701  return doLastIndexOf(c, start, _length);
4702 }
4703 
4704 inline int32_t
4705 UnicodeString::lastIndexOf(char16_t c) const
4706 { return doLastIndexOf(c, 0, length()); }
4707 
4708 inline int32_t
4709 UnicodeString::lastIndexOf(UChar32 c) const {
4710  return lastIndexOf(c, 0, length());
4711 }
4712 
4713 inline int32_t
4714 UnicodeString::lastIndexOf(char16_t c,
4715  int32_t start) const {
4716  pinIndex(start);
4717  return doLastIndexOf(c, start, length() - start);
4718 }
4719 
4720 inline int32_t
4721 UnicodeString::lastIndexOf(UChar32 c,
4722  int32_t start) const {
4723  pinIndex(start);
4724  return lastIndexOf(c, start, length() - start);
4725 }
4726 
4727 inline UBool
4728 UnicodeString::startsWith(const UnicodeString& text) const
4729 { return doEqualsSubstring(0, text.length(), text, 0, text.length()); }
4730 
4731 inline UBool
4732 UnicodeString::startsWith(const UnicodeString& srcText,
4733  int32_t srcStart,
4734  int32_t srcLength) const
4735 { return doEqualsSubstring(0, srcLength, srcText, srcStart, srcLength); }
4736 
4737 inline UBool
4738 UnicodeString::startsWith(ConstChar16Ptr srcChars, int32_t srcLength) const {
4739  if(srcLength < 0) {
4740  srcLength = u_strlen(toUCharPtr(srcChars));
4741  }
4742  return doEqualsSubstring(0, srcLength, srcChars, 0, srcLength);
4743 }
4744 
4745 inline UBool
4746 UnicodeString::startsWith(const char16_t *srcChars, int32_t srcStart, int32_t srcLength) const {
4747  if(srcLength < 0) {
4748  srcLength = u_strlen(toUCharPtr(srcChars));
4749  }
4750  return doEqualsSubstring(0, srcLength, srcChars, srcStart, srcLength);
4751 }
4752 
4753 inline UBool
4754 UnicodeString::endsWith(const UnicodeString& text) const
4755 { return doEqualsSubstring(length() - text.length(), text.length(),
4756  text, 0, text.length()); }
4757 
4758 inline UBool
4759 UnicodeString::endsWith(const UnicodeString& srcText,
4760  int32_t srcStart,
4761  int32_t srcLength) const {
4762  srcText.pinIndices(srcStart, srcLength);
4763  return doEqualsSubstring(length() - srcLength, srcLength,
4764  srcText, srcStart, srcLength);
4765 }
4766 
4767 inline UBool
4768 UnicodeString::endsWith(ConstChar16Ptr srcChars,
4769  int32_t srcLength) const {
4770  if(srcLength < 0) {
4771  srcLength = u_strlen(toUCharPtr(srcChars));
4772  }
4773  return doEqualsSubstring(length() - srcLength, srcLength, srcChars, 0, srcLength);
4774 }
4775 
4776 inline UBool
4777 UnicodeString::endsWith(const char16_t *srcChars,
4778  int32_t srcStart,
4779  int32_t srcLength) const {
4780  if(srcLength < 0) {
4781  srcLength = u_strlen(toUCharPtr(srcChars + srcStart));
4782  }
4783  return doEqualsSubstring(length() - srcLength, srcLength,
4784  srcChars, srcStart, srcLength);
4785 }
4786 
4787 //========================================
4788 // replace
4789 //========================================
4790 inline UnicodeString&
4791 UnicodeString::replace(int32_t start,
4792  int32_t _length,
4793  const UnicodeString& srcText)
4794 { return doReplace(start, _length, srcText, 0, srcText.length()); }
4795 
4796 inline UnicodeString&
4797 UnicodeString::replace(int32_t start,
4798  int32_t _length,
4799  const UnicodeString& srcText,
4800  int32_t srcStart,
4801  int32_t srcLength)
4802 { return doReplace(start, _length, srcText, srcStart, srcLength); }
4803 
4804 inline UnicodeString&
4805 UnicodeString::replace(int32_t start,
4806  int32_t _length,
4807  ConstChar16Ptr srcChars,
4808  int32_t srcLength)
4809 { return doReplace(start, _length, srcChars, 0, srcLength); }
4810 
4811 inline UnicodeString&
4812 UnicodeString::replace(int32_t start,
4813  int32_t _length,
4814  const char16_t *srcChars,
4815  int32_t srcStart,
4816  int32_t srcLength)
4817 { return doReplace(start, _length, srcChars, srcStart, srcLength); }
4818 
4819 inline UnicodeString&
4820 UnicodeString::replace(int32_t start,
4821  int32_t _length,
4822  char16_t srcChar)
4823 { return doReplace(start, _length, &srcChar, 0, 1); }
4824 
4825 inline UnicodeString&
4826 UnicodeString::replaceBetween(int32_t start,
4827  int32_t limit,
4828  const UnicodeString& srcText)
4829 { return doReplace(start, limit - start, srcText, 0, srcText.length()); }
4830 
4831 inline UnicodeString&
4832 UnicodeString::replaceBetween(int32_t start,
4833  int32_t limit,
4834  const UnicodeString& srcText,
4835  int32_t srcStart,
4836  int32_t srcLimit)
4837 { return doReplace(start, limit - start, srcText, srcStart, srcLimit - srcStart); }
4838 
4839 inline UnicodeString&
4840 UnicodeString::findAndReplace(const UnicodeString& oldText,
4841  const UnicodeString& newText)
4842 { return findAndReplace(0, length(), oldText, 0, oldText.length(),
4843  newText, 0, newText.length()); }
4844 
4845 inline UnicodeString&
4846 UnicodeString::findAndReplace(int32_t start,
4847  int32_t _length,
4848  const UnicodeString& oldText,
4849  const UnicodeString& newText)
4850 { return findAndReplace(start, _length, oldText, 0, oldText.length(),
4851  newText, 0, newText.length()); }
4852 
4853 // ============================
4854 // extract
4855 // ============================
4856 inline void
4857 UnicodeString::doExtract(int32_t start,
4858  int32_t _length,
4859  UnicodeString& target) const
4860 { target.replace(0, target.length(), *this, start, _length); }
4861 
4862 inline void
4863 UnicodeString::extract(int32_t start,
4864  int32_t _length,
4865  Char16Ptr target,
4866  int32_t targetStart) const
4867 { doExtract(start, _length, target, targetStart); }
4868 
4869 inline void
4870 UnicodeString::extract(int32_t start,
4871  int32_t _length,
4872  UnicodeString& target) const
4873 { doExtract(start, _length, target); }
4874 
4875 #if !UCONFIG_NO_CONVERSION
4876 
4877 inline int32_t
4878 UnicodeString::extract(int32_t start,
4879  int32_t _length,
4880  char *dst,
4881  const char *codepage) const
4882 
4883 {
4884  // This dstSize value will be checked explicitly
4885  return extract(start, _length, dst, dst != nullptr ? 0xffffffff : 0, codepage);
4886 }
4887 
4888 #endif
4889 
4890 inline void
4891 UnicodeString::extractBetween(int32_t start,
4892  int32_t limit,
4893  char16_t *dst,
4894  int32_t dstStart) const {
4895  pinIndex(start);
4896  pinIndex(limit);
4897  doExtract(start, limit - start, dst, dstStart);
4898 }
4899 
4900 inline UnicodeString
4901 UnicodeString::tempSubStringBetween(int32_t start, int32_t limit) const {
4902  return tempSubString(start, limit - start);
4903 }
4904 
4905 inline char16_t
4906 UnicodeString::doCharAt(int32_t offset) const
4907 {
4908  if (static_cast<uint32_t>(offset) < static_cast<uint32_t>(length())) {
4909  return getArrayStart()[offset];
4910  } else {
4911  return kInvalidUChar;
4912  }
4913 }
4914 
4915 inline char16_t
4916 UnicodeString::charAt(int32_t offset) const
4917 { return doCharAt(offset); }
4918 
4919 inline char16_t
4920 UnicodeString::operator[] (int32_t offset) const
4921 { return doCharAt(offset); }
4922 
4923 inline UBool
4924 UnicodeString::isEmpty() const {
4925  // Arithmetic or logical right shift does not matter: only testing for 0.
4926  return (fUnion.fFields.fLengthAndFlags>>kLengthShift) == 0;
4927 }
4928 
4929 //========================================
4930 // Write implementation methods
4931 //========================================
4932 inline void
4933 UnicodeString::setZeroLength() {
4934  fUnion.fFields.fLengthAndFlags &= kAllStorageFlags;
4935 }
4936 
4937 inline void
4938 UnicodeString::setShortLength(int32_t len) {
4939  // requires 0 <= len <= kMaxShortLength
4940  fUnion.fFields.fLengthAndFlags =
4941  static_cast<int16_t>((fUnion.fFields.fLengthAndFlags & kAllStorageFlags) | (len << kLengthShift));
4942 }
4943 
4944 inline void
4945 UnicodeString::setLength(int32_t len) {
4946  if(len <= kMaxShortLength) {
4947  setShortLength(len);
4948  } else {
4949  fUnion.fFields.fLengthAndFlags |= kLengthIsLarge;
4950  fUnion.fFields.fLength = len;
4951  }
4952 }
4953 
4954 inline void
4955 UnicodeString::setToEmpty() {
4956  fUnion.fFields.fLengthAndFlags = kShortString;
4957 }
4958 
4959 inline void
4960 UnicodeString::setArray(char16_t *array, int32_t len, int32_t capacity) {
4961  setLength(len);
4962  fUnion.fFields.fArray = array;
4963  fUnion.fFields.fCapacity = capacity;
4964 }
4965 
4966 inline UnicodeString&
4967 UnicodeString::operator= (char16_t ch)
4968 { return doReplace(0, length(), &ch, 0, 1); }
4969 
4970 inline UnicodeString&
4971 UnicodeString::operator= (UChar32 ch)
4972 { return replace(0, length(), ch); }
4973 
4974 inline UnicodeString&
4975 UnicodeString::setTo(const UnicodeString& srcText,
4976  int32_t srcStart,
4977  int32_t srcLength)
4978 {
4979  unBogus();
4980  return doReplace(0, length(), srcText, srcStart, srcLength);
4981 }
4982 
4983 inline UnicodeString&
4984 UnicodeString::setTo(const UnicodeString& srcText,
4985  int32_t srcStart)
4986 {
4987  unBogus();
4988  srcText.pinIndex(srcStart);
4989  return doReplace(0, length(), srcText, srcStart, srcText.length() - srcStart);
4990 }
4991 
4992 inline UnicodeString&
4993 UnicodeString::setTo(const UnicodeString& srcText)
4994 {
4995  return copyFrom(srcText);
4996 }
4997 
4998 inline UnicodeString&
4999 UnicodeString::setTo(const char16_t *srcChars,
5000  int32_t srcLength)
5001 {
5002  unBogus();
5003  return doReplace(0, length(), srcChars, 0, srcLength);
5004 }
5005 
5006 inline UnicodeString&
5007 UnicodeString::setTo(char16_t srcChar)
5008 {
5009  unBogus();
5010  return doReplace(0, length(), &srcChar, 0, 1);
5011 }
5012 
5013 inline UnicodeString&
5014 UnicodeString::setTo(UChar32 srcChar)
5015 {
5016  unBogus();
5017  return replace(0, length(), srcChar);
5018 }
5019 
5020 inline UnicodeString&
5021 UnicodeString::append(const UnicodeString& srcText,
5022  int32_t srcStart,
5023  int32_t srcLength)
5024 { return doAppend(srcText, srcStart, srcLength); }
5025 
5026 inline UnicodeString&
5027 UnicodeString::append(const UnicodeString& srcText)
5028 { return doAppend(srcText, 0, srcText.length()); }
5029 
5030 inline UnicodeString&
5031 UnicodeString::append(const char16_t *srcChars,
5032  int32_t srcStart,
5033  int32_t srcLength)
5034 { return doAppend(srcChars, srcStart, srcLength); }
5035 
5036 inline UnicodeString&
5037 UnicodeString::append(ConstChar16Ptr srcChars,
5038  int32_t srcLength)
5039 { return doAppend(srcChars, 0, srcLength); }
5040 
5041 inline UnicodeString&
5042 UnicodeString::append(char16_t srcChar)
5043 { return doAppend(&srcChar, 0, 1); }
5044 
5045 inline UnicodeString&
5046 UnicodeString::operator+= (char16_t ch)
5047 { return doAppend(&ch, 0, 1); }
5048 
5049 inline UnicodeString&
5050 UnicodeString::operator+= (UChar32 ch) {
5051  return append(ch);
5052 }
5053 
5054 inline UnicodeString&
5055 UnicodeString::operator+= (const UnicodeString& srcText)
5056 { return doAppend(srcText, 0, srcText.length()); }
5057 
5058 inline UnicodeString&
5059 UnicodeString::insert(int32_t start,
5060  const UnicodeString& srcText,
5061  int32_t srcStart,
5062  int32_t srcLength)
5063 { return doReplace(start, 0, srcText, srcStart, srcLength); }
5064 
5065 inline UnicodeString&
5066 UnicodeString::insert(int32_t start,
5067  const UnicodeString& srcText)
5068 { return doReplace(start, 0, srcText, 0, srcText.length()); }
5069 
5070 inline UnicodeString&
5071 UnicodeString::insert(int32_t start,
5072  const char16_t *srcChars,
5073  int32_t srcStart,
5074  int32_t srcLength)
5075 { return doReplace(start, 0, srcChars, srcStart, srcLength); }
5076 
5077 inline UnicodeString&
5078 UnicodeString::insert(int32_t start,
5079  ConstChar16Ptr srcChars,
5080  int32_t srcLength)
5081 { return doReplace(start, 0, srcChars, 0, srcLength); }
5082 
5083 inline UnicodeString&
5084 UnicodeString::insert(int32_t start,
5085  char16_t srcChar)
5086 { return doReplace(start, 0, &srcChar, 0, 1); }
5087 
5088 inline UnicodeString&
5089 UnicodeString::insert(int32_t start,
5090  UChar32 srcChar)
5091 { return replace(start, 0, srcChar); }
5092 
5093 
5094 inline UnicodeString&
5095 UnicodeString::remove()
5096 {
5097  // remove() of a bogus string makes the string empty and non-bogus
5098  if(isBogus()) {
5099  setToEmpty();
5100  } else {
5101  setZeroLength();
5102  }
5103  return *this;
5104 }
5105 
5106 inline UnicodeString&
5107 UnicodeString::remove(int32_t start,
5108  int32_t _length)
5109 {
5110  if(start <= 0 && _length == INT32_MAX) {
5111  // remove(guaranteed everything) of a bogus string makes the string empty and non-bogus
5112  return remove();
5113  }
5114  return doReplace(start, _length, nullptr, 0, 0);
5115 }
5116 
5117 inline UnicodeString&
5118 UnicodeString::removeBetween(int32_t start,
5119  int32_t limit)
5120 { return doReplace(start, limit - start, nullptr, 0, 0); }
5121 
5122 inline UnicodeString &
5123 UnicodeString::retainBetween(int32_t start, int32_t limit) {
5124  truncate(limit);
5125  return doReplace(0, start, nullptr, 0, 0);
5126 }
5127 
5128 inline UBool
5129 UnicodeString::truncate(int32_t targetLength)
5130 {
5131  if(isBogus() && targetLength == 0) {
5132  // truncate(0) of a bogus string makes the string empty and non-bogus
5133  unBogus();
5134  return false;
5135  } else if (static_cast<uint32_t>(targetLength) < static_cast<uint32_t>(length())) {
5136  setLength(targetLength);
5137  return true;
5138  } else {
5139  return false;
5140  }
5141 }
5142 
5143 inline UnicodeString&
5144 UnicodeString::reverse()
5145 { return doReverse(0, length()); }
5146 
5147 inline UnicodeString&
5148 UnicodeString::reverse(int32_t start,
5149  int32_t _length)
5150 { return doReverse(start, _length); }
5151 
5152 U_NAMESPACE_END
5153 
5154 #endif /* U_SHOW_CPLUSPLUS_API */
5155 
5156 #endif
C++ API: Interface for writing bytes, and implementation classes.
C++ API: char16_t pointer wrappers with implicit conversion from bit-compatible raw pointer types.
#define U_ALIASING_BARRIER(ptr)
Barrier for pointer anti-aliasing optimizations even across function boundaries.
Definition: char16ptr.h:37
The BreakIterator class implements methods for finding the location of boundaries in text.
Definition: brkiter.h:106
A ByteSink can be filled with bytes.
Definition: bytestream.h:55
char16_t * wrapper with implicit conversion from distinct but bit-compatible pointer types.
Definition: char16ptr.h:49
const char16_t * wrapper with implicit conversion from distinct but bit-compatible pointer types.
Definition: char16ptr.h:156
Records lengths of string edits but not replacement text.
Definition: edits.h:80
A Locale object represents a specific geographical, political, or cultural region.
Definition: locid.h:198
Replaceable is an abstract base class representing a string of characters that supports the replaceme...
Definition: rep.h:77
virtual void extractBetween(int32_t start, int32_t limit, UnicodeString &target) const =0
Copies characters in the range [start, limit) into the UnicodeString target.
char16_t charAt(int32_t offset) const
Returns the 16-bit code unit at the given offset into the text.
Definition: rep.h:251
int32_t length() const
Returns the number of 16-bit code units in the text.
Definition: rep.h:246
Implementation of ByteSink that writes to a "string".
Definition: bytestream.h:291
A string-like object that points to a sized piece of memory.
Definition: stringpiece.h:61
UnicodeString is a string class that stores Unicode characters directly and provides similar function...
Definition: unistr.h:303
int32_t indexOf(const char16_t *srcChars, int32_t srcStart, int32_t srcLength, int32_t start, int32_t length) const
Locate in this the first occurrence in the range [start, start + length) of the characters in srcChar...
UnicodeString(const UnicodeString &that)
Copy constructor.
void push_back(char16_t c)
Appends the code unit c to the UnicodeString object.
Definition: unistr.h:2386
void swap(UnicodeString &other) noexcept
Swap strings.
virtual char16_t getCharAt(int32_t offset) const override
The change in Replaceable to use virtual getCharAt() allows UnicodeString::charAt() to be inline agai...
bool operator==(const S &text) const
Equality operator.
Definition: unistr.h:355
virtual int32_t getLength() const override
Implement Replaceable::getLength() (see jitterbug 1027).
UnicodeString & foldCase(uint32_t options=0)
Case-folds the characters in this string.
UChar32 unescapeAt(int32_t &offset) const
Unescape a single escape sequence and return the represented character.
UnicodeString(const wchar_t *text, int32_t textLength)
wchar_t * constructor.
Definition: unistr.h:3312
virtual void handleReplaceBetween(int32_t start, int32_t limit, const UnicodeString &text) override
Replace a substring of this object with the given text.
UBool hasMoreChar32Than(int32_t start, int32_t length, int32_t number) const
Check if the length char16_t code units of the string contain more Unicode code points than a certain...
UnicodeString(const UnicodeString &src, int32_t srcStart, int32_t srcLength)
'Substring' constructor from subrange of source string.
UnicodeString & operator=(const S &src)
Assignment operator.
Definition: unistr.h:2022
UnicodeString & append(const S &src)
Appends the characters in src which is, or which is implicitly convertible to, a std::u16string_view ...
Definition: unistr.h:2358
UnicodeString & operator=(UnicodeString &&src) noexcept
Move assignment operator; might leave src in bogus state.
virtual ~UnicodeString()
Destructor.
UnicodeString(const char *codepageData, int32_t dataLength, const char *codepage)
char* constructor.
UnicodeString & toLower()
Convert the characters in this to lower case following the conventions of the default locale.
UnicodeString(const char *codepageData, const char *codepage)
char* constructor.
UnicodeString(UBool isTerminated, ConstChar16Ptr text, int32_t textLength)
Readonly-aliasing char16_t* constructor.
static UnicodeString readOnlyAlias(const UnicodeString &text)
Readonly-aliasing factory method.
Definition: unistr.h:3685
UnicodeString & fastCopyFrom(const UnicodeString &src)
Almost the same as the assignment operator.
UnicodeString & toTitle(BreakIterator *titleIter)
Titlecase this string, convenience function using the default locale.
EInvariant
Constant to be used in the UnicodeString(char *, int32_t, EInvariant) constructor which constructs a ...
Definition: unistr.h:316
bool operator!=(const S &text) const
Inequality operator.
Definition: unistr.h:388
UnicodeString unescape() const
Unescape a string of characters and return a string containing the result.
UnicodeString(const UnicodeString &src, int32_t srcStart)
'Substring' constructor from tail of source string.
int32_t getChar32Limit(int32_t offset) const
Adjust a random-access offset so that it points behind a Unicode character.
UnicodeString(char16_t *buffer, int32_t buffLength, int32_t buffCapacity)
Writable-aliasing char16_t* constructor.
UnicodeString(int32_t capacity, UChar32 c, int32_t count)
Construct a UnicodeString with capacity to hold capacity char16_ts.
int32_t extract(int32_t start, int32_t startLength, char *target, uint32_t targetLength, const char *codepage) const
Copy the characters in the range [start, start + length) into an array of characters in a specified c...
UnicodeString & findAndReplace(int32_t start, int32_t length, const UnicodeString &oldText, int32_t oldStart, int32_t oldLength, const UnicodeString &newText, int32_t newStart, int32_t newLength)
Replace all occurrences of characters in oldText in the range [oldStart, oldStart + oldLength) with t...
virtual void copy(int32_t start, int32_t limit, int32_t dest) override
Copy a substring of this object, retaining attribute (out-of-band) information.
virtual void extractBetween(int32_t start, int32_t limit, UnicodeString &target) const override
Copy the characters in the range [start, limit) into the UnicodeString target.
UnicodeString & replace(int32_t start, int32_t length, const UnicodeString &srcText, int32_t srcStart, int32_t srcLength)
Replace the characters in the range [start, start + length) with the characters in srcText in the ran...
Definition: unistr.h:4797
UBool padLeading(int32_t targetLength, char16_t padChar=0x0020)
Pad the start of this UnicodeString with the character padChar.
int32_t getChar32Start(int32_t offset) const
Adjust a random-access offset so that it points to the beginning of a Unicode character.
UChar32 char32At(int32_t offset) const
Return the code point that contains the code unit at offset offset.
UnicodeString(const char *src, int32_t textLength, enum EInvariant inv)
Constructs a Unicode string from an invariant-character char * string.
UnicodeString & operator=(const UnicodeString &srcText)
Assignment operator.
UnicodeString & append(UChar32 srcChar)
Append the code point srcChar to the UnicodeString object.
StringClass & toUTF8String(StringClass &result) const
Convert the UnicodeString to UTF-8 and append the result to a standard string.
Definition: unistr.h:1783
UnicodeString & toLower(const Locale &locale)
Convert the characters in this to lower case following the conventions of a specific locale.
UnicodeString & toTitle(BreakIterator *titleIter, const Locale &locale)
Titlecase this string.
UnicodeString tempSubString(int32_t start=0, int32_t length=INT32_MAX) const
Create a temporary substring for the specified range.
int32_t extract(Char16Ptr dest, int32_t destCapacity, UErrorCode &errorCode) const
Copy the contents of the string into dest.
int32_t length() const
Return the length of the UnicodeString object.
Definition: unistr.h:4275
unspecified_reverse_iterator rend() const
Definition: unistr.h:1955
virtual UChar32 getChar32At(int32_t offset) const override
The change in Replaceable to use virtual getChar32At() allows UnicodeString::char32At() to be inline ...
int32_t extract(int32_t start, int32_t startLength, char *target, uint32_t targetLength) const
Copy the characters in the range [start, start + length) into an array of characters in the platform'...
static UnicodeString fromUTF8(StringPiece utf8)
Create a UnicodeString from a UTF-8 string.
UNISTR_FROM_CHAR_EXPLICIT UnicodeString(char16_t ch)
Single char16_t (code unit) constructor.
int32_t lastIndexOf(const char16_t *srcChars, int32_t srcStart, int32_t srcLength, int32_t start, int32_t length) const
Locate in this the last occurrence in the range [start, start + length) of the characters in srcChars...
UnicodeString(wchar_t *buffer, int32_t buffLength, int32_t buffCapacity)
Writable-aliasing wchar_t * constructor.
Definition: unistr.h:3420
void setToBogus()
Make this UnicodeString object invalid.
friend void swap(UnicodeString &s1, UnicodeString &s2) noexcept
Non-member UnicodeString swap function.
Definition: unistr.h:2051
int32_t moveIndex32(int32_t index, int32_t delta) const
Move the code unit index along the string by delta code points.
static UnicodeString fromUTF32(const UChar32 *utf32, int32_t length)
Create a UnicodeString from a UTF-32 string.
UNISTR_FROM_STRING_EXPLICIT UnicodeString(const S &text)
Constructor from text which is, or which is implicitly convertible to, a std::u16string_view or (if U...
Definition: unistr.h:3338
int32_t countChar32(int32_t start=0, int32_t length=INT32_MAX) const
Count Unicode code points in the length char16_t code units of the string.
StringClass toUTF8String() const
Convert the UnicodeString to a UTF-8 string.
Definition: unistr.h:1802
UNISTR_FROM_CHAR_EXPLICIT UnicodeString(UChar32 ch)
Single UChar32 (code point) constructor.
UnicodeString & setTo(UBool isTerminated, ConstChar16Ptr text, int32_t textLength)
Aliasing setTo() function, analogous to the readonly-aliasing char16_t* constructor.
static UnicodeString readOnlyAlias(const S &text)
Readonly-aliasing factory method.
Definition: unistr.h:3662
UnicodeString & operator+=(const S &src)
Append operator.
Definition: unistr.h:2287
unspecified_iterator begin() const
Definition: unistr.h:1937
UnicodeString & toTitle(BreakIterator *titleIter, const Locale &locale, uint32_t options)
Titlecase this string, with options.
virtual UnicodeString * clone() const override
Clone this object, an instance of a subclass of Replaceable.
int32_t extract(char *dest, int32_t destCapacity, UConverter *cnv, UErrorCode &errorCode) const
Convert the UnicodeString into a codepage string using an existing UConverter.
UnicodeString(UnicodeString &&src) noexcept
Move constructor; might leave src in bogus state.
UnicodeString(const char16_t *text, int32_t textLength)
char16_t* constructor.
UnicodeString & replace(int32_t start, int32_t length, UChar32 srcChar)
Replace the characters in the range [start, start + length) with the code point srcChar.
UnicodeString(const char *src, int32_t srcLength, UConverter *cnv, UErrorCode &errorCode)
char * / UConverter constructor.
UnicodeString(const char *codepageData, int32_t dataLength)
char* constructor.
static UClassID getStaticClassID()
ICU "poor man's RTTI", returns a UClassID for this class.
UnicodeString(uint16_t *buffer, int32_t buffLength, int32_t buffCapacity)
Writable-aliasing uint16_t * constructor.
Definition: unistr.h:3406
int32_t toUTF32(UChar32 *utf32, int32_t capacity, UErrorCode &errorCode) const
Convert the UnicodeString to UTF-32.
UBool isBogus() const
Determine if this object contains a valid string.
Definition: unistr.h:4290
const char16_t * getTerminatedBuffer()
Get a read-only pointer to the internal buffer, making sure that it is NUL-terminated.
UnicodeString(const uint16_t *text, int32_t textLength)
uint16_t * constructor.
Definition: unistr.h:3290
unspecified_reverse_iterator rbegin() const
Definition: unistr.h:1949
virtual UClassID getDynamicClassID() const override
ICU "poor man's RTTI", returns a UClassID for the actual class.
int32_t extract(int32_t start, int32_t startLength, char *target, int32_t targetCapacity, enum EInvariant inv) const
Copy the characters in the range [start, start + startLength) into an array of characters.
UnicodeString & setCharAt(int32_t offset, char16_t ch)
Set the character at the specified offset to the specified character.
UnicodeString & setTo(char16_t *buffer, int32_t buffLength, int32_t buffCapacity)
Aliasing setTo() function, analogous to the writable-aliasing char16_t* constructor.
UBool padTrailing(int32_t targetLength, char16_t padChar=0x0020)
Pad the end of this UnicodeString with the character padChar.
UNISTR_FROM_STRING_EXPLICIT UnicodeString(const char *codepageData)
char* constructor.
void releaseBuffer(int32_t newLength=-1)
Release a read/write buffer on a UnicodeString object with an "open" getBuffer(minCapacity).
char16_t * getBuffer(int32_t minCapacity)
Get a read/write pointer to the internal buffer.
void toUTF8(ByteSink &sink) const
Convert the UnicodeString to UTF-8 and write the result to a ByteSink.
char16_t value_type
C++ boilerplate.
Definition: unistr.h:306
unspecified_iterator end() const
Definition: unistr.h:1943
virtual UBool hasMetaData() const override
Replaceable API.
UnicodeString & toUpper()
Convert the characters in this to UPPER CASE following the conventions of the default locale.
UnicodeString & toUpper(const Locale &locale)
Convert the characters in this to UPPER CASE following the conventions of a specific locale.
UnicodeString & trim()
Trims leading and trailing whitespace from this UnicodeString.
U_CAPI int32_t u_strlen(const UChar *s)
U_COMMON_API UnicodeString unistr_internalConcat(const UnicodeString &s1, std::u16string_view s2)
U_COMMON_API UBool operator==(const StringPiece &x, const StringPiece &y)
Global operator == for StringPiece.
bool operator!=(const StringPiece &x, const StringPiece &y)
Global operator != for StringPiece.
Definition: stringpiece.h:346
UnicodeString operator+(const US &s1, const S &s2)
Creates a new UnicodeString from the concatenation of a UnicodeString and s2 which is,...
Definition: unistr.h:4179
#define U_CALLCONV
Similar to U_CDECL_BEGIN/U_CDECL_END, this qualifier is necessary in callback function typedefs to ma...
Definition: platform.h:837
C++ API: Replaceable String.
C++ API: Central ICU header for including the C++ standard <string> header and for related definition...
C++ API: StringPiece: Read-only byte string wrapper class.
struct UConverter UConverter
Definition: ucnv_err.h:96
#define UCONFIG_NO_BREAK_ITERATION
This switch turns off break iteration.
Definition: uconfig.h:358
int32_t UChar32
Define UChar32 as a type for single Unicode code points.
Definition: umachine.h:449
#define INT32_MAX
The largest value a 32 bit signed integer can hold.
Definition: umachine.h:208
int8_t UBool
The ICU boolean type, a signed-byte integer.
Definition: umachine.h:269
#define U_CAPI
This is used to declare a function as a public ICU C API.
Definition: umachine.h:110
char16_t UChar
The base type for UTF-16 code units and pointers.
Definition: umachine.h:400
#define U_SIZEOF_UCHAR
Number of bytes in a UChar (always 2).
Definition: umachine.h:352
#define UNISTR_FROM_CHAR_EXPLICIT
This can be defined to be empty or "explicit".
Definition: unistr.h:150
int32_t UStringCaseMapper(int32_t caseLocale, uint32_t options, icu::BreakIterator *iter, char16_t *dest, int32_t destCapacity, const char16_t *src, int32_t srcLength, icu::Edits *edits, UErrorCode &errorCode)
Internal string case mapping function type.
Definition: unistr.h:71
#define UNISTR_FROM_STRING_EXPLICIT
This can be defined to be empty or "explicit".
Definition: unistr.h:170
#define UNISTR_OBJECT_SIZE
Desired sizeof(UnicodeString) in bytes.
Definition: unistr.h:208
void * UClassID
UClassID is used to identify classes without using the compiler's RTTI.
Definition: uobject.h:96
Basic definitions for ICU, for both C and C++ APIs.
UErrorCode
Standard ICU4C error code type, a substitute for exceptions.
Definition: utypes.h:509
#define U_COMMON_API
Set to export library symbols from inside the common library, and to import them from outside.
Definition: utypes.h:315