ICU 78.1  78.1
unistr.h
Go to the documentation of this file.
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 **********************************************************************
5 * Copyright (C) 1998-2016, International Business Machines
6 * Corporation and others. All Rights Reserved.
7 **********************************************************************
8 *
9 * File unistr.h
10 *
11 * Modification History:
12 *
13 * Date Name Description
14 * 09/25/98 stephen Creation.
15 * 11/11/98 stephen Changed per 11/9 code review.
16 * 04/20/99 stephen Overhauled per 4/16 code review.
17 * 11/18/99 aliu Made to inherit from Replaceable. Added method
18 * handleReplaceBetween(); other methods unchanged.
19 * 06/25/01 grhoten Remove dependency on iostream.
20 ******************************************************************************
21 */
22 
23 #ifndef UNISTR_H
24 #define UNISTR_H
25 
31 #include "unicode/utypes.h"
32 
33 #if U_SHOW_CPLUSPLUS_API
34 
35 #include <cstddef>
36 #include <string_view>
37 #include "unicode/char16ptr.h"
38 #include "unicode/rep.h"
39 #include "unicode/std_string.h"
40 #include "unicode/stringpiece.h"
41 #include "unicode/bytestream.h"
42 
43 struct UConverter; // unicode/ucnv.h
44 
45 #ifndef USTRING_H
51 U_CAPI int32_t U_EXPORT2 u_strlen(const UChar *s);
52 #endif
53 
54 U_NAMESPACE_BEGIN
55 
56 #if !UCONFIG_NO_BREAK_ITERATION
57 class BreakIterator; // unicode/brkiter.h
58 #endif
59 class Edits;
60 
61 U_NAMESPACE_END
62 
63 // Not #ifndef U_HIDE_INTERNAL_API because UnicodeString needs the UStringCaseMapper.
70 typedef int32_t U_CALLCONV
71 UStringCaseMapper(int32_t caseLocale, uint32_t options,
73  icu::BreakIterator *iter,
74 #endif
75  char16_t *dest, int32_t destCapacity,
76  const char16_t *src, int32_t srcLength,
77  icu::Edits *edits,
78  UErrorCode &errorCode);
79 
80 U_NAMESPACE_BEGIN
81 
82 class Locale; // unicode/locid.h
83 class StringCharacterIterator;
84 class UnicodeStringAppendable; // unicode/appendable.h
85 
86 /* The <iostream> include has been moved to unicode/ustream.h */
87 
98 #define US_INV icu::UnicodeString::kInvariant
99 
120 #if !U_CHAR16_IS_TYPEDEF
121 # define UNICODE_STRING(cs, _length) icu::UnicodeString(true, u ## cs, _length)
122 #else
123 # define UNICODE_STRING(cs, _length) icu::UnicodeString(true, (const char16_t*)u ## cs, _length)
124 #endif
125 
135 #define UNICODE_STRING_SIMPLE(cs) UNICODE_STRING(cs, -1)
136 
144 #ifndef UNISTR_FROM_CHAR_EXPLICIT
145 # if defined(U_COMBINED_IMPLEMENTATION) || defined(U_COMMON_IMPLEMENTATION) || defined(U_I18N_IMPLEMENTATION) || defined(U_IO_IMPLEMENTATION)
146  // Auto-"explicit" in ICU library code.
147 # define UNISTR_FROM_CHAR_EXPLICIT explicit
148 # else
149  // Empty by default for source code compatibility.
150 # define UNISTR_FROM_CHAR_EXPLICIT
151 # endif
152 #endif
153 
164 #ifndef UNISTR_FROM_STRING_EXPLICIT
165 # if defined(U_COMBINED_IMPLEMENTATION) || defined(U_COMMON_IMPLEMENTATION) || defined(U_I18N_IMPLEMENTATION) || defined(U_IO_IMPLEMENTATION)
166  // Auto-"explicit" in ICU library code.
167 # define UNISTR_FROM_STRING_EXPLICIT explicit
168 # else
169  // Empty by default for source code compatibility.
170 # define UNISTR_FROM_STRING_EXPLICIT
171 # endif
172 #endif
173 
207 #ifndef UNISTR_OBJECT_SIZE
208 # define UNISTR_OBJECT_SIZE 64
209 #endif
210 
303 {
304 public:
306  using value_type = char16_t;
307 
316  enum EInvariant {
321  kInvariant
322  };
323 
324  //========================================
325  // Read-only operations
326  //========================================
327 
328  /* Comparison - bitwise only - for international comparison use collation */
329 
337  inline bool operator== (const UnicodeString& text) const;
338 
354  template<typename S, typename = std::enable_if_t<ConvertibleToU16StringView<S>>>
355  inline bool operator==(const S &text) const {
356  std::u16string_view sv(internal::toU16StringView(text));
357  uint32_t len; // unsigned to avoid a compiler warning
358  return !isBogus() && (len = length()) == sv.length() && doEquals(sv.data(), len);
359  }
360 
368  inline bool operator!= (const UnicodeString& text) const;
369 
387  template<typename S, typename = std::enable_if_t<ConvertibleToU16StringView<S>>>
388  inline bool operator!=(const S &text) const {
389  return !operator==(text);
390  }
391 
399  inline UBool operator> (const UnicodeString& text) const;
400 
408  inline UBool operator< (const UnicodeString& text) const;
409 
417  inline UBool operator>= (const UnicodeString& text) const;
418 
426  inline UBool operator<= (const UnicodeString& text) const;
427 
439  inline int8_t compare(const UnicodeString& text) const;
440 
456  inline int8_t compare(int32_t start,
457  int32_t length,
458  const UnicodeString& text) const;
459 
477  inline int8_t compare(int32_t start,
478  int32_t length,
479  const UnicodeString& srcText,
480  int32_t srcStart,
481  int32_t srcLength) const;
482 
495  inline int8_t compare(ConstChar16Ptr srcChars,
496  int32_t srcLength) const;
497 
512  inline int8_t compare(int32_t start,
513  int32_t length,
514  const char16_t *srcChars) const;
515 
533  inline int8_t compare(int32_t start,
534  int32_t length,
535  const char16_t *srcChars,
536  int32_t srcStart,
537  int32_t srcLength) const;
538 
556  inline int8_t compareBetween(int32_t start,
557  int32_t limit,
558  const UnicodeString& srcText,
559  int32_t srcStart,
560  int32_t srcLimit) const;
561 
579  inline int8_t compareCodePointOrder(const UnicodeString& text) const;
580 
600  inline int8_t compareCodePointOrder(int32_t start,
601  int32_t length,
602  const UnicodeString& srcText) const;
603 
625  inline int8_t compareCodePointOrder(int32_t start,
626  int32_t length,
627  const UnicodeString& srcText,
628  int32_t srcStart,
629  int32_t srcLength) const;
630 
649  inline int8_t compareCodePointOrder(ConstChar16Ptr srcChars,
650  int32_t srcLength) const;
651 
671  inline int8_t compareCodePointOrder(int32_t start,
672  int32_t length,
673  const char16_t *srcChars) const;
674 
696  inline int8_t compareCodePointOrder(int32_t start,
697  int32_t length,
698  const char16_t *srcChars,
699  int32_t srcStart,
700  int32_t srcLength) const;
701 
723  inline int8_t compareCodePointOrderBetween(int32_t start,
724  int32_t limit,
725  const UnicodeString& srcText,
726  int32_t srcStart,
727  int32_t srcLimit) const;
728 
747  inline int8_t caseCompare(const UnicodeString& text, uint32_t options) const;
748 
769  inline int8_t caseCompare(int32_t start,
770  int32_t length,
771  const UnicodeString& srcText,
772  uint32_t options) const;
773 
796  inline int8_t caseCompare(int32_t start,
797  int32_t length,
798  const UnicodeString& srcText,
799  int32_t srcStart,
800  int32_t srcLength,
801  uint32_t options) const;
802 
822  inline int8_t caseCompare(ConstChar16Ptr srcChars,
823  int32_t srcLength,
824  uint32_t options) const;
825 
846  inline int8_t caseCompare(int32_t start,
847  int32_t length,
848  const char16_t *srcChars,
849  uint32_t options) const;
850 
873  inline int8_t caseCompare(int32_t start,
874  int32_t length,
875  const char16_t *srcChars,
876  int32_t srcStart,
877  int32_t srcLength,
878  uint32_t options) const;
879 
902  inline int8_t caseCompareBetween(int32_t start,
903  int32_t limit,
904  const UnicodeString& srcText,
905  int32_t srcStart,
906  int32_t srcLimit,
907  uint32_t options) const;
908 
916  inline UBool startsWith(const UnicodeString& text) const;
917 
928  inline UBool startsWith(const UnicodeString& srcText,
929  int32_t srcStart,
930  int32_t srcLength) const;
931 
940  inline UBool startsWith(ConstChar16Ptr srcChars,
941  int32_t srcLength) const;
942 
952  inline UBool startsWith(const char16_t *srcChars,
953  int32_t srcStart,
954  int32_t srcLength) const;
955 
963  inline UBool endsWith(const UnicodeString& text) const;
964 
975  inline UBool endsWith(const UnicodeString& srcText,
976  int32_t srcStart,
977  int32_t srcLength) const;
978 
987  inline UBool endsWith(ConstChar16Ptr srcChars,
988  int32_t srcLength) const;
989 
1000  inline UBool endsWith(const char16_t *srcChars,
1001  int32_t srcStart,
1002  int32_t srcLength) const;
1003 
1004 
1005  /* Searching - bitwise only */
1006 
1015  inline int32_t indexOf(const UnicodeString& text) const;
1016 
1026  inline int32_t indexOf(const UnicodeString& text,
1027  int32_t start) const;
1028 
1040  inline int32_t indexOf(const UnicodeString& text,
1041  int32_t start,
1042  int32_t length) const;
1043 
1060  inline int32_t indexOf(const UnicodeString& srcText,
1061  int32_t srcStart,
1062  int32_t srcLength,
1063  int32_t start,
1064  int32_t length) const;
1065 
1077  inline int32_t indexOf(const char16_t *srcChars,
1078  int32_t srcLength,
1079  int32_t start) const;
1080 
1093  inline int32_t indexOf(ConstChar16Ptr srcChars,
1094  int32_t srcLength,
1095  int32_t start,
1096  int32_t length) const;
1097 
1114  int32_t indexOf(const char16_t *srcChars,
1115  int32_t srcStart,
1116  int32_t srcLength,
1117  int32_t start,
1118  int32_t length) const;
1119 
1127  inline int32_t indexOf(char16_t c) const;
1128 
1137  inline int32_t indexOf(UChar32 c) const;
1138 
1147  inline int32_t indexOf(char16_t c,
1148  int32_t start) const;
1149 
1159  inline int32_t indexOf(UChar32 c,
1160  int32_t start) const;
1161 
1172  inline int32_t indexOf(char16_t c,
1173  int32_t start,
1174  int32_t length) const;
1175 
1187  inline int32_t indexOf(UChar32 c,
1188  int32_t start,
1189  int32_t length) const;
1190 
1199  inline int32_t lastIndexOf(const UnicodeString& text) const;
1200 
1210  inline int32_t lastIndexOf(const UnicodeString& text,
1211  int32_t start) const;
1212 
1224  inline int32_t lastIndexOf(const UnicodeString& text,
1225  int32_t start,
1226  int32_t length) const;
1227 
1244  inline int32_t lastIndexOf(const UnicodeString& srcText,
1245  int32_t srcStart,
1246  int32_t srcLength,
1247  int32_t start,
1248  int32_t length) const;
1249 
1260  inline int32_t lastIndexOf(const char16_t *srcChars,
1261  int32_t srcLength,
1262  int32_t start) const;
1263 
1276  inline int32_t lastIndexOf(ConstChar16Ptr srcChars,
1277  int32_t srcLength,
1278  int32_t start,
1279  int32_t length) const;
1280 
1297  int32_t lastIndexOf(const char16_t *srcChars,
1298  int32_t srcStart,
1299  int32_t srcLength,
1300  int32_t start,
1301  int32_t length) const;
1302 
1310  inline int32_t lastIndexOf(char16_t c) const;
1311 
1320  inline int32_t lastIndexOf(UChar32 c) const;
1321 
1330  inline int32_t lastIndexOf(char16_t c,
1331  int32_t start) const;
1332 
1342  inline int32_t lastIndexOf(UChar32 c,
1343  int32_t start) const;
1344 
1355  inline int32_t lastIndexOf(char16_t c,
1356  int32_t start,
1357  int32_t length) const;
1358 
1370  inline int32_t lastIndexOf(UChar32 c,
1371  int32_t start,
1372  int32_t length) const;
1373 
1374 
1375  /* Character access */
1376 
1385  inline char16_t charAt(int32_t offset) const;
1386 
1394  inline char16_t operator[] (int32_t offset) const;
1395 
1407  UChar32 char32At(int32_t offset) const;
1408 
1424  int32_t getChar32Start(int32_t offset) const;
1425 
1442  int32_t getChar32Limit(int32_t offset) const;
1443 
1494  int32_t moveIndex32(int32_t index, int32_t delta) const;
1495 
1496  /* Substring extraction */
1497 
1513  inline void extract(int32_t start,
1514  int32_t length,
1515  Char16Ptr dst,
1516  int32_t dstStart = 0) const;
1517 
1539  int32_t
1540  extract(Char16Ptr dest, int32_t destCapacity,
1541  UErrorCode &errorCode) const;
1542 
1552  inline void extract(int32_t start,
1553  int32_t length,
1554  UnicodeString& target) const;
1555 
1567  inline void extractBetween(int32_t start,
1568  int32_t limit,
1569  char16_t *dst,
1570  int32_t dstStart = 0) const;
1571 
1580  virtual void extractBetween(int32_t start,
1581  int32_t limit,
1582  UnicodeString& target) const override;
1583 
1605  int32_t extract(int32_t start,
1606  int32_t startLength,
1607  char *target,
1608  int32_t targetCapacity,
1609  enum EInvariant inv) const;
1610 
1611 #if U_CHARSET_IS_UTF8 || !UCONFIG_NO_CONVERSION
1612 
1632  int32_t extract(int32_t start,
1633  int32_t startLength,
1634  char *target,
1635  uint32_t targetLength) const;
1636 
1637 #endif
1638 
1639 #if !UCONFIG_NO_CONVERSION
1640 
1666  inline int32_t extract(int32_t start,
1667  int32_t startLength,
1668  char* target,
1669  const char* codepage = nullptr) const;
1670 
1700  int32_t extract(int32_t start,
1701  int32_t startLength,
1702  char *target,
1703  uint32_t targetLength,
1704  const char *codepage) const;
1705 
1723  int32_t extract(char *dest, int32_t destCapacity,
1724  UConverter *cnv,
1725  UErrorCode &errorCode) const;
1726 
1727 #endif
1728 
1742  UnicodeString tempSubString(int32_t start=0, int32_t length=INT32_MAX) const;
1743 
1754  inline UnicodeString tempSubStringBetween(int32_t start, int32_t limit=INT32_MAX) const;
1755 
1767  void toUTF8(ByteSink &sink) const;
1768 
1782  template<typename StringClass>
1783  StringClass &toUTF8String(StringClass &result) const {
1784  StringByteSink<StringClass> sbs(&result, length());
1785  toUTF8(sbs);
1786  return result;
1787  }
1788 
1789 #ifndef U_HIDE_DRAFT_API
1801  template<typename StringClass>
1802  StringClass toUTF8String() const {
1803  StringClass result;
1804  StringByteSink<StringClass> sbs(&result, length());
1805  toUTF8(sbs);
1806  return result;
1807  }
1808 #endif // U_HIDE_DRAFT_API
1809 
1825  int32_t toUTF32(UChar32 *utf32, int32_t capacity, UErrorCode &errorCode) const;
1826 
1827  /* Length operations */
1828 
1837  inline int32_t length() const;
1838 
1852  int32_t
1853  countChar32(int32_t start=0, int32_t length=INT32_MAX) const;
1854 
1878  UBool
1879  hasMoreChar32Than(int32_t start, int32_t length, int32_t number) const;
1880 
1886  inline UBool isEmpty() const;
1887 
1897  inline int32_t getCapacity() const;
1898 
1899  /* Other operations */
1900 
1906  inline int32_t hashCode() const;
1907 
1920  inline UBool isBogus() const;
1921 
1922 #ifndef U_HIDE_DRAFT_API
1923 private:
1924  // These type aliases are private; there is no guarantee that they will remain
1925  // aliases to the same types in subsequent versions of ICU.
1926  // Note that whether `std::u16string_view::const_iterator` is a pointer or a
1927  // class that models contiguous_iterator is platform-dependent.
1928  using unspecified_iterator = std::u16string_view::const_iterator;
1929  using unspecified_reverse_iterator = std::u16string_view::const_reverse_iterator;
1930 
1931 public:
1937  unspecified_iterator begin() const { return std::u16string_view(*this).begin(); }
1943  unspecified_iterator end() const { return std::u16string_view(*this).end(); }
1949  unspecified_reverse_iterator rbegin() const { return std::u16string_view(*this).rbegin(); }
1955  unspecified_reverse_iterator rend() const { return std::u16string_view(*this).rend(); }
1956 #endif // U_HIDE_DRAFT_API
1957 
1958  //========================================
1959  // Write operations
1960  //========================================
1961 
1962  /* Assignment operations */
1963 
1983 
2010 
2021  template<typename S, typename = std::enable_if_t<ConvertibleToU16StringView<S>>>
2022  inline UnicodeString &operator=(const S &src) {
2023  unBogus();
2024  return doReplace(0, length(), internal::toU16StringView(src));
2025  }
2026 
2036 
2042  void swap(UnicodeString &other) noexcept;
2043 
2050  friend inline void U_EXPORT2
2051  swap(UnicodeString &s1, UnicodeString &s2) noexcept {
2052  s1.swap(s2);
2053  }
2054 
2062  inline UnicodeString& operator= (char16_t ch);
2063 
2071  inline UnicodeString& operator= (UChar32 ch);
2072 
2084  inline UnicodeString& setTo(const UnicodeString& srcText,
2085  int32_t srcStart);
2086 
2100  inline UnicodeString& setTo(const UnicodeString& srcText,
2101  int32_t srcStart,
2102  int32_t srcLength);
2103 
2112  inline UnicodeString& setTo(const UnicodeString& srcText);
2113 
2122  inline UnicodeString& setTo(const char16_t *srcChars,
2123  int32_t srcLength);
2124 
2133  inline UnicodeString& setTo(char16_t srcChar);
2134 
2143  inline UnicodeString& setTo(UChar32 srcChar);
2144 
2168  UnicodeString &setTo(UBool isTerminated,
2169  ConstChar16Ptr text,
2170  int32_t textLength);
2171 
2191  UnicodeString &setTo(char16_t *buffer,
2192  int32_t buffLength,
2193  int32_t buffCapacity);
2194 
2234  void setToBogus();
2235 
2243  UnicodeString& setCharAt(int32_t offset,
2244  char16_t ch);
2245 
2246 
2247  /* Append operations */
2248 
2256  inline UnicodeString& operator+= (char16_t ch);
2257 
2265  inline UnicodeString& operator+= (UChar32 ch);
2266 
2274  inline UnicodeString& operator+= (const UnicodeString& srcText);
2275 
2286  template<typename S, typename = std::enable_if_t<ConvertibleToU16StringView<S>>>
2287  inline UnicodeString& operator+=(const S &src) {
2288  return doAppend(internal::toU16StringView(src));
2289  }
2290 
2305  inline UnicodeString& append(const UnicodeString& srcText,
2306  int32_t srcStart,
2307  int32_t srcLength);
2308 
2316  inline UnicodeString& append(const UnicodeString& srcText);
2317 
2331  inline UnicodeString& append(const char16_t *srcChars,
2332  int32_t srcStart,
2333  int32_t srcLength);
2334 
2344  inline UnicodeString& append(ConstChar16Ptr srcChars,
2345  int32_t srcLength);
2346 
2357  template<typename S, typename = std::enable_if_t<ConvertibleToU16StringView<S>>>
2358  inline UnicodeString& append(const S &src) {
2359  return doAppend(internal::toU16StringView(src));
2360  }
2361 
2368  inline UnicodeString& append(char16_t srcChar);
2369 
2377 
2378 #ifndef U_HIDE_DRAFT_API
2386  inline void push_back(char16_t c) { append(c); }
2387 #endif // U_HIDE_DRAFT_API
2388 
2389  /* Insert operations */
2390 
2404  inline UnicodeString& insert(int32_t start,
2405  const UnicodeString& srcText,
2406  int32_t srcStart,
2407  int32_t srcLength);
2408 
2417  inline UnicodeString& insert(int32_t start,
2418  const UnicodeString& srcText);
2419 
2433  inline UnicodeString& insert(int32_t start,
2434  const char16_t *srcChars,
2435  int32_t srcStart,
2436  int32_t srcLength);
2437 
2447  inline UnicodeString& insert(int32_t start,
2448  ConstChar16Ptr srcChars,
2449  int32_t srcLength);
2450 
2459  inline UnicodeString& insert(int32_t start,
2460  char16_t srcChar);
2461 
2470  inline UnicodeString& insert(int32_t start,
2471  UChar32 srcChar);
2472 
2473 
2474  /* Replace operations */
2475 
2493  inline UnicodeString& replace(int32_t start,
2494  int32_t length,
2495  const UnicodeString& srcText,
2496  int32_t srcStart,
2497  int32_t srcLength);
2498 
2511  inline UnicodeString& replace(int32_t start,
2512  int32_t length,
2513  const UnicodeString& srcText);
2514 
2532  inline UnicodeString& replace(int32_t start,
2533  int32_t length,
2534  const char16_t *srcChars,
2535  int32_t srcStart,
2536  int32_t srcLength);
2537 
2550  inline UnicodeString& replace(int32_t start,
2551  int32_t length,
2552  ConstChar16Ptr srcChars,
2553  int32_t srcLength);
2554 
2566  inline UnicodeString& replace(int32_t start,
2567  int32_t length,
2568  char16_t srcChar);
2569 
2581  UnicodeString& replace(int32_t start, int32_t length, UChar32 srcChar);
2582 
2592  inline UnicodeString& replaceBetween(int32_t start,
2593  int32_t limit,
2594  const UnicodeString& srcText);
2595 
2610  inline UnicodeString& replaceBetween(int32_t start,
2611  int32_t limit,
2612  const UnicodeString& srcText,
2613  int32_t srcStart,
2614  int32_t srcLimit);
2615 
2623  virtual void handleReplaceBetween(int32_t start,
2624  int32_t limit,
2625  const UnicodeString& text) override;
2626 
2632  virtual UBool hasMetaData() const override;
2633 
2647  virtual void copy(int32_t start, int32_t limit, int32_t dest) override;
2648 
2649  /* Search and replace operations */
2650 
2659  inline UnicodeString& findAndReplace(const UnicodeString& oldText,
2660  const UnicodeString& newText);
2661 
2673  inline UnicodeString& findAndReplace(int32_t start,
2674  int32_t length,
2675  const UnicodeString& oldText,
2676  const UnicodeString& newText);
2677 
2696  int32_t length,
2697  const UnicodeString& oldText,
2698  int32_t oldStart,
2699  int32_t oldLength,
2700  const UnicodeString& newText,
2701  int32_t newStart,
2702  int32_t newLength);
2703 
2704 
2705  /* Remove operations */
2706 
2715  inline UnicodeString& remove();
2716 
2725  inline UnicodeString& remove(int32_t start,
2726  int32_t length = static_cast<int32_t>(INT32_MAX));
2727 
2736  inline UnicodeString& removeBetween(int32_t start,
2737  int32_t limit = static_cast<int32_t>(INT32_MAX));
2738 
2748  inline UnicodeString &retainBetween(int32_t start, int32_t limit = INT32_MAX);
2749 
2750  /* Length operations */
2751 
2763  UBool padLeading(int32_t targetLength,
2764  char16_t padChar = 0x0020);
2765 
2777  UBool padTrailing(int32_t targetLength,
2778  char16_t padChar = 0x0020);
2779 
2786  inline UBool truncate(int32_t targetLength);
2787 
2794 
2795  /* Miscellaneous operations */
2796 
2802  inline UnicodeString& reverse();
2803 
2812  inline UnicodeString& reverse(int32_t start,
2813  int32_t length);
2814 
2822 
2830  UnicodeString& toUpper(const Locale& locale);
2831 
2839 
2847  UnicodeString& toLower(const Locale& locale);
2848 
2849 #if !UCONFIG_NO_BREAK_ITERATION
2850 
2878 
2906  UnicodeString &toTitle(BreakIterator *titleIter, const Locale &locale);
2907 
2938  UnicodeString &toTitle(BreakIterator *titleIter, const Locale &locale, uint32_t options);
2939 
2940 #endif
2941 
2955  UnicodeString &foldCase(uint32_t options=0 /*U_FOLD_CASE_DEFAULT*/);
2956 
2957  //========================================
2958  // Access to the internal buffer
2959  //========================================
2960 
3004  char16_t *getBuffer(int32_t minCapacity);
3005 
3026  void releaseBuffer(int32_t newLength=-1);
3027 
3058  inline const char16_t *getBuffer() const;
3059 
3093  const char16_t *getTerminatedBuffer();
3094 
3101  inline operator std::u16string_view() const {
3102  return {getBuffer(), static_cast<std::u16string_view::size_type>(length())};
3103  }
3104 
3105 #if U_SIZEOF_WCHAR_T==2 || defined(U_IN_DOXYGEN)
3115  inline operator std::wstring_view() const {
3116  const char16_t *p = getBuffer();
3117 #ifdef U_ALIASING_BARRIER
3118  U_ALIASING_BARRIER(p);
3119 #endif
3120  return { reinterpret_cast<const wchar_t *>(p), (std::wstring_view::size_type)length() };
3121  }
3122 #endif // U_SIZEOF_WCHAR_T
3123 
3124  //========================================
3125  // Constructors
3126  //========================================
3127 
3131  inline UnicodeString();
3132 
3144  UnicodeString(int32_t capacity, UChar32 c, int32_t count);
3145 
3156 
3167 
3168 #ifdef U_HIDE_DRAFT_API
3188  UNISTR_FROM_STRING_EXPLICIT UnicodeString(const char16_t *text) :
3189  UnicodeString(text, -1) {}
3190 #endif // U_HIDE_DRAFT_API
3191 
3192 #if !U_CHAR16_IS_TYPEDEF && \
3193  (defined(U_HIDE_DRAFT_API) || (defined(_LIBCPP_VERSION) && _LIBCPP_VERSION >= 180000))
3213  UNISTR_FROM_STRING_EXPLICIT UnicodeString(const uint16_t *text) :
3214  UnicodeString(ConstChar16Ptr(text), -1) {}
3215 #endif
3216 
3217 #if defined(U_HIDE_DRAFT_API) && (U_SIZEOF_WCHAR_T==2 || defined(U_IN_DOXYGEN))
3238  UNISTR_FROM_STRING_EXPLICIT UnicodeString(const wchar_t *text) :
3239  UnicodeString(ConstChar16Ptr(text), -1) {}
3240 #endif
3241 
3252  UNISTR_FROM_STRING_EXPLICIT inline UnicodeString(const std::nullptr_t text);
3253 
3270  UnicodeString(const char16_t *text,
3271  int32_t textLength);
3272 
3273 #if !U_CHAR16_IS_TYPEDEF
3290  UnicodeString(const uint16_t *text, int32_t textLength) :
3291  UnicodeString(ConstChar16Ptr(text), textLength) {}
3292 #endif
3293 
3294 #if U_SIZEOF_WCHAR_T==2 || defined(U_IN_DOXYGEN)
3312  UnicodeString(const wchar_t *text, int32_t textLength) :
3313  UnicodeString(ConstChar16Ptr(text), textLength) {}
3314 #endif
3315 
3323  inline UnicodeString(const std::nullptr_t text, int32_t textLength);
3324 
3337  template<typename S, typename = std::enable_if_t<ConvertibleToU16StringView<S>>>
3339  fUnion.fFields.fLengthAndFlags = kShortString;
3340  doAppend(internal::toU16StringViewNullable(text));
3341  }
3342 
3373  UnicodeString(UBool isTerminated,
3374  ConstChar16Ptr text,
3375  int32_t textLength);
3376 
3395  UnicodeString(char16_t *buffer, int32_t buffLength, int32_t buffCapacity);
3396 
3397 #if !U_CHAR16_IS_TYPEDEF
3406  UnicodeString(uint16_t *buffer, int32_t buffLength, int32_t buffCapacity) :
3407  UnicodeString(Char16Ptr(buffer), buffLength, buffCapacity) {}
3408 #endif
3409 
3410 #if U_SIZEOF_WCHAR_T==2 || defined(U_IN_DOXYGEN)
3420  UnicodeString(wchar_t *buffer, int32_t buffLength, int32_t buffCapacity) :
3421  UnicodeString(Char16Ptr(buffer), buffLength, buffCapacity) {}
3422 #endif
3423 
3432  inline UnicodeString(std::nullptr_t buffer, int32_t buffLength, int32_t buffCapacity);
3433 
3434 #if U_CHARSET_IS_UTF8 || !UCONFIG_NO_CONVERSION
3435 
3459  UNISTR_FROM_STRING_EXPLICIT UnicodeString(const char *codepageData);
3460 
3469  UnicodeString(const char *codepageData, int32_t dataLength);
3470 
3471 #endif
3472 
3473 #if !UCONFIG_NO_CONVERSION
3474 
3492  UnicodeString(const char *codepageData, const char *codepage);
3493 
3511  UnicodeString(const char *codepageData, int32_t dataLength, const char *codepage);
3512 
3535  const char *src, int32_t srcLength,
3536  UConverter *cnv,
3537  UErrorCode &errorCode);
3538 
3539 #endif
3540 
3573  UnicodeString(const char *src, int32_t textLength, enum EInvariant inv);
3574 
3575 
3593 
3600  UnicodeString(UnicodeString &&src) noexcept;
3601 
3608  UnicodeString(const UnicodeString& src, int32_t srcStart);
3609 
3617  UnicodeString(const UnicodeString& src, int32_t srcStart, int32_t srcLength);
3618 
3632  virtual UnicodeString *clone() const override;
3633 
3637  virtual ~UnicodeString();
3638 
3661  template<typename S, typename = std::enable_if_t<ConvertibleToU16StringView<S>>>
3662  static inline UnicodeString readOnlyAlias(const S &text) {
3663  return readOnlyAliasFromU16StringView(internal::toU16StringView(text));
3664  }
3665 
3685  static inline UnicodeString readOnlyAlias(const UnicodeString &text) {
3686  return readOnlyAliasFromUnicodeString(text);
3687  }
3688 
3703 
3715  static UnicodeString fromUTF32(const UChar32 *utf32, int32_t length);
3716 
3717  /* Miscellaneous operations */
3718 
3754 
3774  UChar32 unescapeAt(int32_t &offset) const;
3775 
3781  static UClassID U_EXPORT2 getStaticClassID();
3782 
3788  virtual UClassID getDynamicClassID() const override;
3789 
3790  //========================================
3791  // Implementation methods
3792  //========================================
3793 
3794 protected:
3799  virtual int32_t getLength() const override;
3800 
3806  virtual char16_t getCharAt(int32_t offset) const override;
3807 
3813  virtual UChar32 getChar32At(int32_t offset) const override;
3814 
3815 private:
3816  static UnicodeString readOnlyAliasFromU16StringView(std::u16string_view text);
3817  static UnicodeString readOnlyAliasFromUnicodeString(const UnicodeString &text);
3818 
3819  // For char* constructors. Could be made public.
3820  UnicodeString &setToUTF8(StringPiece utf8);
3821  // For extract(char*).
3822  // We could make a toUTF8(target, capacity, errorCode) public but not
3823  // this version: New API will be cleaner if we make callers create substrings
3824  // rather than having start+length on every method,
3825  // and it should take a UErrorCode&.
3826  int32_t
3827  toUTF8(int32_t start, int32_t len,
3828  char *target, int32_t capacity) const;
3829 
3834  inline UBool doEquals(const UnicodeString &text, int32_t len) const {
3835  return doEquals(text.getArrayStart(), len);
3836  }
3837  UBool doEquals(const char16_t *text, int32_t len) const;
3838 
3839  inline UBool
3840  doEqualsSubstring(int32_t start,
3841  int32_t length,
3842  const UnicodeString& srcText,
3843  int32_t srcStart,
3844  int32_t srcLength) const;
3845 
3846  UBool doEqualsSubstring(int32_t start,
3847  int32_t length,
3848  const char16_t *srcChars,
3849  int32_t srcStart,
3850  int32_t srcLength) const;
3851 
3852  inline int8_t
3853  doCompare(int32_t start,
3854  int32_t length,
3855  const UnicodeString& srcText,
3856  int32_t srcStart,
3857  int32_t srcLength) const;
3858 
3859  int8_t doCompare(int32_t start,
3860  int32_t length,
3861  const char16_t *srcChars,
3862  int32_t srcStart,
3863  int32_t srcLength) const;
3864 
3865  inline int8_t
3866  doCompareCodePointOrder(int32_t start,
3867  int32_t length,
3868  const UnicodeString& srcText,
3869  int32_t srcStart,
3870  int32_t srcLength) const;
3871 
3872  int8_t doCompareCodePointOrder(int32_t start,
3873  int32_t length,
3874  const char16_t *srcChars,
3875  int32_t srcStart,
3876  int32_t srcLength) const;
3877 
3878  inline int8_t
3879  doCaseCompare(int32_t start,
3880  int32_t length,
3881  const UnicodeString &srcText,
3882  int32_t srcStart,
3883  int32_t srcLength,
3884  uint32_t options) const;
3885 
3886  int8_t
3887  doCaseCompare(int32_t start,
3888  int32_t length,
3889  const char16_t *srcChars,
3890  int32_t srcStart,
3891  int32_t srcLength,
3892  uint32_t options) const;
3893 
3894  int32_t doIndexOf(char16_t c,
3895  int32_t start,
3896  int32_t length) const;
3897 
3898  int32_t doIndexOf(UChar32 c,
3899  int32_t start,
3900  int32_t length) const;
3901 
3902  int32_t doLastIndexOf(char16_t c,
3903  int32_t start,
3904  int32_t length) const;
3905 
3906  int32_t doLastIndexOf(UChar32 c,
3907  int32_t start,
3908  int32_t length) const;
3909 
3910  void doExtract(int32_t start,
3911  int32_t length,
3912  char16_t *dst,
3913  int32_t dstStart) const;
3914 
3915  inline void doExtract(int32_t start,
3916  int32_t length,
3917  UnicodeString& target) const;
3918 
3919  inline char16_t doCharAt(int32_t offset) const;
3920 
3921  UnicodeString& doReplace(int32_t start,
3922  int32_t length,
3923  const UnicodeString& srcText,
3924  int32_t srcStart,
3925  int32_t srcLength);
3926 
3927  UnicodeString& doReplace(int32_t start,
3928  int32_t length,
3929  const char16_t *srcChars,
3930  int32_t srcStart,
3931  int32_t srcLength);
3932  UnicodeString& doReplace(int32_t start, int32_t length, std::u16string_view src);
3933 
3934  UnicodeString& doAppend(const UnicodeString& src, int32_t srcStart, int32_t srcLength);
3935  UnicodeString& doAppend(const char16_t *srcChars, int32_t srcStart, int32_t srcLength);
3936  UnicodeString& doAppend(std::u16string_view src);
3937 
3938  UnicodeString& doReverse(int32_t start,
3939  int32_t length);
3940 
3941  // calculate hash code
3942  int32_t doHashCode() const;
3943 
3944  // get pointer to start of array
3945  // these do not check for kOpenGetBuffer, unlike the public getBuffer() function
3946  inline char16_t* getArrayStart();
3947  inline const char16_t* getArrayStart() const;
3948 
3949  inline UBool hasShortLength() const;
3950  inline int32_t getShortLength() const;
3951 
3952  // A UnicodeString object (not necessarily its current buffer)
3953  // is writable unless it isBogus() or it has an "open" getBuffer(minCapacity).
3954  inline UBool isWritable() const;
3955 
3956  // Is the current buffer writable?
3957  inline UBool isBufferWritable() const;
3958 
3959  // None of the following does releaseArray().
3960  inline void setZeroLength();
3961  inline void setShortLength(int32_t len);
3962  inline void setLength(int32_t len);
3963  inline void setToEmpty();
3964  inline void setArray(char16_t *array, int32_t len, int32_t capacity); // sets length but not flags
3965 
3966  // allocate the array; result may be the stack buffer
3967  // sets refCount to 1 if appropriate
3968  // sets fArray, fCapacity, and flags
3969  // sets length to 0
3970  // returns boolean for success or failure
3971  UBool allocate(int32_t capacity);
3972 
3973  // release the array if owned
3974  void releaseArray();
3975 
3976  // turn a bogus string into an empty one
3977  void unBogus();
3978 
3979  // implements assignment operator, copy constructor, and fastCopyFrom()
3980  UnicodeString &copyFrom(const UnicodeString &src, UBool fastCopy=false);
3981 
3982  // Copies just the fields without memory management.
3983  void copyFieldsFrom(UnicodeString &src, UBool setSrcToBogus) noexcept;
3984 
3985  // Pin start and limit to acceptable values.
3986  inline void pinIndex(int32_t& start) const;
3987  inline void pinIndices(int32_t& start,
3988  int32_t& length) const;
3989 
3990 #if !UCONFIG_NO_CONVERSION
3991 
3992  /* Internal extract() using UConverter. */
3993  int32_t doExtract(int32_t start, int32_t length,
3994  char *dest, int32_t destCapacity,
3995  UConverter *cnv,
3996  UErrorCode &errorCode) const;
3997 
3998  /*
3999  * Real constructor for converting from codepage data.
4000  * It assumes that it is called with !fRefCounted.
4001  *
4002  * If `codepage==0`, then the default converter
4003  * is used for the platform encoding.
4004  * If `codepage` is an empty string (`""`),
4005  * then a simple conversion is performed on the codepage-invariant
4006  * subset ("invariant characters") of the platform encoding. See utypes.h.
4007  */
4008  void doCodepageCreate(const char *codepageData,
4009  int32_t dataLength,
4010  const char *codepage);
4011 
4012  /*
4013  * Worker function for creating a UnicodeString from
4014  * a codepage string using a UConverter.
4015  */
4016  void
4017  doCodepageCreate(const char *codepageData,
4018  int32_t dataLength,
4019  UConverter *converter,
4020  UErrorCode &status);
4021 
4022 #endif
4023 
4024  /*
4025  * This function is called when write access to the array
4026  * is necessary.
4027  *
4028  * We need to make a copy of the array if
4029  * the buffer is read-only, or
4030  * the buffer is refCounted (shared), and refCount>1, or
4031  * the buffer is too small.
4032  *
4033  * Return false if memory could not be allocated.
4034  */
4035  UBool cloneArrayIfNeeded(int32_t newCapacity = -1,
4036  int32_t growCapacity = -1,
4037  UBool doCopyArray = true,
4038  int32_t** pBufferToDelete = nullptr,
4039  UBool forceClone = false);
4040 
4046  UnicodeString &
4047  caseMap(int32_t caseLocale, uint32_t options,
4049  BreakIterator *iter,
4050 #endif
4051  UStringCaseMapper *stringCaseMapper);
4052 
4053  // ref counting
4054  void addRef();
4055  int32_t removeRef();
4056  int32_t refCount() const;
4057 
4058  // constants
4059  enum {
4065  US_STACKBUF_SIZE = static_cast<int32_t>(UNISTR_OBJECT_SIZE - sizeof(void*) - 2) / U_SIZEOF_UCHAR,
4066  kInvalidUChar=0xffff, // U+FFFF returned by charAt(invalid index)
4067  kInvalidHashCode=0, // invalid hash code
4068  kEmptyHashCode=1, // hash code for empty string
4069 
4070  // bit flag values for fLengthAndFlags
4071  kIsBogus=1, // this string is bogus, i.e., not valid or nullptr
4072  kUsingStackBuffer=2,// using fUnion.fStackFields instead of fUnion.fFields
4073  kRefCounted=4, // there is a refCount field before the characters in fArray
4074  kBufferIsReadonly=8,// do not write to this buffer
4075  kOpenGetBuffer=16, // getBuffer(minCapacity) was called (is "open"),
4076  // and releaseBuffer(newLength) must be called
4077  kAllStorageFlags=0x1f,
4078 
4079  kLengthShift=5, // remaining 11 bits for non-negative short length, or negative if long
4080  kLength1=1<<kLengthShift,
4081  kMaxShortLength=0x3ff, // max non-negative short length (leaves top bit 0)
4082  kLengthIsLarge=0xffe0, // short length < 0, real length is in fUnion.fFields.fLength
4083 
4084  // combined values for convenience
4085  kShortString=kUsingStackBuffer,
4086  kLongString=kRefCounted,
4087  kReadonlyAlias=kBufferIsReadonly,
4088  kWritableAlias=0
4089  };
4090 
4091  friend class UnicodeStringAppendable;
4092 
4093  union StackBufferOrFields; // forward declaration necessary before friend declaration
4094  friend union StackBufferOrFields; // make US_STACKBUF_SIZE visible inside fUnion
4095 
4096  /*
4097  * The following are all the class fields that are stored
4098  * in each UnicodeString object.
4099  * Note that UnicodeString has virtual functions,
4100  * therefore there is an implicit vtable pointer
4101  * as the first real field.
4102  * The fields should be aligned such that no padding is necessary.
4103  * On 32-bit machines, the size should be 32 bytes,
4104  * on 64-bit machines (8-byte pointers), it should be 40 bytes.
4105  *
4106  * We use a hack to achieve this.
4107  *
4108  * With at least some compilers, each of the following is forced to
4109  * a multiple of sizeof(pointer) [the largest field base unit here is a data pointer],
4110  * rounded up with additional padding if the fields do not already fit that requirement:
4111  * - sizeof(class UnicodeString)
4112  * - offsetof(UnicodeString, fUnion)
4113  * - sizeof(fUnion)
4114  * - sizeof(fStackFields)
4115  *
4116  * We optimize for the longest possible internal buffer for short strings.
4117  * fUnion.fStackFields begins with 2 bytes for storage flags
4118  * and the length of relatively short strings,
4119  * followed by the buffer for short string contents.
4120  * There is no padding inside fStackFields.
4121  *
4122  * Heap-allocated and aliased strings use fUnion.fFields.
4123  * Both fStackFields and fFields must begin with the same fields for flags and short length,
4124  * that is, those must have the same memory offsets inside the object,
4125  * because the flags must be inspected in order to decide which half of fUnion is being used.
4126  * We assume that the compiler does not reorder the fields.
4127  *
4128  * (Padding at the end of fFields is ok:
4129  * As long as it is no larger than fStackFields, it is not wasted space.)
4130  *
4131  * For some of the history of the UnicodeString class fields layout, see
4132  * - ICU ticket #11551 "longer UnicodeString contents in stack buffer"
4133  * - ICU ticket #11336 "UnicodeString: recombine stack buffer arrays"
4134  * - ICU ticket #8322 "why is sizeof(UnicodeString)==48?"
4135  */
4136  // (implicit) *vtable;
4137  union StackBufferOrFields {
4138  // fStackFields is used iff (fLengthAndFlags&kUsingStackBuffer) else fFields is used.
4139  // Each struct of the union must begin with fLengthAndFlags.
4140  struct {
4141  int16_t fLengthAndFlags; // bit fields: see constants above
4142  char16_t fBuffer[US_STACKBUF_SIZE]; // buffer for short strings
4143  } fStackFields;
4144  struct {
4145  int16_t fLengthAndFlags; // bit fields: see constants above
4146  int32_t fLength; // number of characters in fArray if >127; else undefined
4147  int32_t fCapacity; // capacity of fArray (in char16_ts)
4148  // array pointer last to minimize padding for machines with P128 data model
4149  // or pointer sizes that are not a power of 2
4150  char16_t *fArray; // the Unicode data
4151  } fFields;
4152  } fUnion;
4153 };
4154 
4163 U_COMMON_API UnicodeString U_EXPORT2
4165 
4176 template<typename S, typename = std::enable_if_t<ConvertibleToU16StringView<S>>>
4177 inline UnicodeString operator+(const UnicodeString &s1, const S &s2) {
4178  return unistr_internalConcat(s1, internal::toU16StringView(s2));
4179 }
4180 
4181 #ifndef U_FORCE_HIDE_INTERNAL_API
4183 U_COMMON_API UnicodeString U_EXPORT2
4184 unistr_internalConcat(const UnicodeString &s1, std::u16string_view s2);
4185 #endif
4186 
4187 //========================================
4188 // Inline members
4189 //========================================
4190 
4191 //========================================
4192 // Privates
4193 //========================================
4194 
4195 inline void
4196 UnicodeString::pinIndex(int32_t& start) const
4197 {
4198  // pin index
4199  if(start < 0) {
4200  start = 0;
4201  } else if(start > length()) {
4202  start = length();
4203  }
4204 }
4205 
4206 inline void
4207 UnicodeString::pinIndices(int32_t& start,
4208  int32_t& _length) const
4209 {
4210  // pin indices
4211  int32_t len = length();
4212  if(start < 0) {
4213  start = 0;
4214  } else if(start > len) {
4215  start = len;
4216  }
4217  if(_length < 0) {
4218  _length = 0;
4219  } else if(_length > (len - start)) {
4220  _length = (len - start);
4221  }
4222 }
4223 
4224 inline char16_t*
4225 UnicodeString::getArrayStart() {
4226  return (fUnion.fFields.fLengthAndFlags&kUsingStackBuffer) ?
4227  fUnion.fStackFields.fBuffer : fUnion.fFields.fArray;
4228 }
4229 
4230 inline const char16_t*
4231 UnicodeString::getArrayStart() const {
4232  return (fUnion.fFields.fLengthAndFlags&kUsingStackBuffer) ?
4233  fUnion.fStackFields.fBuffer : fUnion.fFields.fArray;
4234 }
4235 
4236 //========================================
4237 // Default constructor
4238 //========================================
4239 
4240 inline
4241 UnicodeString::UnicodeString() {
4242  fUnion.fStackFields.fLengthAndFlags=kShortString;
4243 }
4244 
4245 inline UnicodeString::UnicodeString(const std::nullptr_t /*text*/) {
4246  fUnion.fStackFields.fLengthAndFlags=kShortString;
4247 }
4248 
4249 inline UnicodeString::UnicodeString(const std::nullptr_t /*text*/, int32_t /*length*/) {
4250  fUnion.fStackFields.fLengthAndFlags=kShortString;
4251 }
4252 
4253 inline UnicodeString::UnicodeString(std::nullptr_t /*buffer*/, int32_t /*buffLength*/, int32_t /*buffCapacity*/) {
4254  fUnion.fStackFields.fLengthAndFlags=kShortString;
4255 }
4256 
4257 //========================================
4258 // Read-only implementation methods
4259 //========================================
4260 inline UBool
4261 UnicodeString::hasShortLength() const {
4262  return fUnion.fFields.fLengthAndFlags>=0;
4263 }
4264 
4265 inline int32_t
4266 UnicodeString::getShortLength() const {
4267  // fLengthAndFlags must be non-negative -> short length >= 0
4268  // and arithmetic or logical shift does not matter.
4269  return fUnion.fFields.fLengthAndFlags>>kLengthShift;
4270 }
4271 
4272 inline int32_t
4273 UnicodeString::length() const {
4274  return hasShortLength() ? getShortLength() : fUnion.fFields.fLength;
4275 }
4276 
4277 inline int32_t
4278 UnicodeString::getCapacity() const {
4279  return (fUnion.fFields.fLengthAndFlags&kUsingStackBuffer) ?
4280  US_STACKBUF_SIZE : fUnion.fFields.fCapacity;
4281 }
4282 
4283 inline int32_t
4284 UnicodeString::hashCode() const
4285 { return doHashCode(); }
4286 
4287 inline UBool
4288 UnicodeString::isBogus() const
4289 { return fUnion.fFields.fLengthAndFlags & kIsBogus; }
4290 
4291 inline UBool
4292 UnicodeString::isWritable() const
4293 { return !(fUnion.fFields.fLengthAndFlags & (kOpenGetBuffer | kIsBogus)); }
4294 
4295 inline UBool
4296 UnicodeString::isBufferWritable() const
4297 {
4298  return
4299  !(fUnion.fFields.fLengthAndFlags&(kOpenGetBuffer|kIsBogus|kBufferIsReadonly)) &&
4300  (!(fUnion.fFields.fLengthAndFlags&kRefCounted) || refCount()==1);
4301 }
4302 
4303 inline const char16_t *
4304 UnicodeString::getBuffer() const {
4305  if(fUnion.fFields.fLengthAndFlags&(kIsBogus|kOpenGetBuffer)) {
4306  return nullptr;
4307  } else if(fUnion.fFields.fLengthAndFlags&kUsingStackBuffer) {
4308  return fUnion.fStackFields.fBuffer;
4309  } else {
4310  return fUnion.fFields.fArray;
4311  }
4312 }
4313 
4314 //========================================
4315 // Read-only alias methods
4316 //========================================
4317 inline int8_t
4318 UnicodeString::doCompare(int32_t start,
4319  int32_t thisLength,
4320  const UnicodeString& srcText,
4321  int32_t srcStart,
4322  int32_t srcLength) const
4323 {
4324  if(srcText.isBogus()) {
4325  return static_cast<int8_t>(!isBogus()); // 0 if both are bogus, 1 otherwise
4326  } else {
4327  srcText.pinIndices(srcStart, srcLength);
4328  return doCompare(start, thisLength, srcText.getArrayStart(), srcStart, srcLength);
4329  }
4330 }
4331 
4332 inline UBool
4333 UnicodeString::doEqualsSubstring(int32_t start,
4334  int32_t thisLength,
4335  const UnicodeString& srcText,
4336  int32_t srcStart,
4337  int32_t srcLength) const
4338 {
4339  if(srcText.isBogus()) {
4340  return isBogus();
4341  } else {
4342  srcText.pinIndices(srcStart, srcLength);
4343  return !isBogus() && doEqualsSubstring(start, thisLength, srcText.getArrayStart(), srcStart, srcLength);
4344  }
4345 }
4346 
4347 inline bool
4349 {
4350  if(isBogus()) {
4351  return text.isBogus();
4352  } else {
4353  int32_t len = length(), textLength = text.length();
4354  return !text.isBogus() && len == textLength && doEquals(text, len);
4355  }
4356 }
4357 
4358 inline bool
4360 { return (! operator==(text)); }
4361 
4362 inline UBool
4363 UnicodeString::operator> (const UnicodeString& text) const
4364 { return doCompare(0, length(), text, 0, text.length()) == 1; }
4365 
4366 inline UBool
4367 UnicodeString::operator< (const UnicodeString& text) const
4368 { return doCompare(0, length(), text, 0, text.length()) == -1; }
4369 
4370 inline UBool
4371 UnicodeString::operator>= (const UnicodeString& text) const
4372 { return doCompare(0, length(), text, 0, text.length()) != -1; }
4373 
4374 inline UBool
4375 UnicodeString::operator<= (const UnicodeString& text) const
4376 { return doCompare(0, length(), text, 0, text.length()) != 1; }
4377 
4378 inline int8_t
4379 UnicodeString::compare(const UnicodeString& text) const
4380 { return doCompare(0, length(), text, 0, text.length()); }
4381 
4382 inline int8_t
4383 UnicodeString::compare(int32_t start,
4384  int32_t _length,
4385  const UnicodeString& srcText) const
4386 { return doCompare(start, _length, srcText, 0, srcText.length()); }
4387 
4388 inline int8_t
4389 UnicodeString::compare(ConstChar16Ptr srcChars,
4390  int32_t srcLength) const
4391 { return doCompare(0, length(), srcChars, 0, srcLength); }
4392 
4393 inline int8_t
4394 UnicodeString::compare(int32_t start,
4395  int32_t _length,
4396  const UnicodeString& srcText,
4397  int32_t srcStart,
4398  int32_t srcLength) const
4399 { return doCompare(start, _length, srcText, srcStart, srcLength); }
4400 
4401 inline int8_t
4402 UnicodeString::compare(int32_t start,
4403  int32_t _length,
4404  const char16_t *srcChars) const
4405 { return doCompare(start, _length, srcChars, 0, _length); }
4406 
4407 inline int8_t
4408 UnicodeString::compare(int32_t start,
4409  int32_t _length,
4410  const char16_t *srcChars,
4411  int32_t srcStart,
4412  int32_t srcLength) const
4413 { return doCompare(start, _length, srcChars, srcStart, srcLength); }
4414 
4415 inline int8_t
4416 UnicodeString::compareBetween(int32_t start,
4417  int32_t limit,
4418  const UnicodeString& srcText,
4419  int32_t srcStart,
4420  int32_t srcLimit) const
4421 { return doCompare(start, limit - start,
4422  srcText, srcStart, srcLimit - srcStart); }
4423 
4424 inline int8_t
4425 UnicodeString::doCompareCodePointOrder(int32_t start,
4426  int32_t thisLength,
4427  const UnicodeString& srcText,
4428  int32_t srcStart,
4429  int32_t srcLength) const
4430 {
4431  if(srcText.isBogus()) {
4432  return static_cast<int8_t>(!isBogus()); // 0 if both are bogus, 1 otherwise
4433  } else {
4434  srcText.pinIndices(srcStart, srcLength);
4435  return doCompareCodePointOrder(start, thisLength, srcText.getArrayStart(), srcStart, srcLength);
4436  }
4437 }
4438 
4439 inline int8_t
4440 UnicodeString::compareCodePointOrder(const UnicodeString& text) const
4441 { return doCompareCodePointOrder(0, length(), text, 0, text.length()); }
4442 
4443 inline int8_t
4444 UnicodeString::compareCodePointOrder(int32_t start,
4445  int32_t _length,
4446  const UnicodeString& srcText) const
4447 { return doCompareCodePointOrder(start, _length, srcText, 0, srcText.length()); }
4448 
4449 inline int8_t
4450 UnicodeString::compareCodePointOrder(ConstChar16Ptr srcChars,
4451  int32_t srcLength) const
4452 { return doCompareCodePointOrder(0, length(), srcChars, 0, srcLength); }
4453 
4454 inline int8_t
4455 UnicodeString::compareCodePointOrder(int32_t start,
4456  int32_t _length,
4457  const UnicodeString& srcText,
4458  int32_t srcStart,
4459  int32_t srcLength) const
4460 { return doCompareCodePointOrder(start, _length, srcText, srcStart, srcLength); }
4461 
4462 inline int8_t
4463 UnicodeString::compareCodePointOrder(int32_t start,
4464  int32_t _length,
4465  const char16_t *srcChars) const
4466 { return doCompareCodePointOrder(start, _length, srcChars, 0, _length); }
4467 
4468 inline int8_t
4469 UnicodeString::compareCodePointOrder(int32_t start,
4470  int32_t _length,
4471  const char16_t *srcChars,
4472  int32_t srcStart,
4473  int32_t srcLength) const
4474 { return doCompareCodePointOrder(start, _length, srcChars, srcStart, srcLength); }
4475 
4476 inline int8_t
4477 UnicodeString::compareCodePointOrderBetween(int32_t start,
4478  int32_t limit,
4479  const UnicodeString& srcText,
4480  int32_t srcStart,
4481  int32_t srcLimit) const
4482 { return doCompareCodePointOrder(start, limit - start,
4483  srcText, srcStart, srcLimit - srcStart); }
4484 
4485 inline int8_t
4486 UnicodeString::doCaseCompare(int32_t start,
4487  int32_t thisLength,
4488  const UnicodeString &srcText,
4489  int32_t srcStart,
4490  int32_t srcLength,
4491  uint32_t options) const
4492 {
4493  if(srcText.isBogus()) {
4494  return static_cast<int8_t>(!isBogus()); // 0 if both are bogus, 1 otherwise
4495  } else {
4496  srcText.pinIndices(srcStart, srcLength);
4497  return doCaseCompare(start, thisLength, srcText.getArrayStart(), srcStart, srcLength, options);
4498  }
4499 }
4500 
4501 inline int8_t
4502 UnicodeString::caseCompare(const UnicodeString &text, uint32_t options) const {
4503  return doCaseCompare(0, length(), text, 0, text.length(), options);
4504 }
4505 
4506 inline int8_t
4507 UnicodeString::caseCompare(int32_t start,
4508  int32_t _length,
4509  const UnicodeString &srcText,
4510  uint32_t options) const {
4511  return doCaseCompare(start, _length, srcText, 0, srcText.length(), options);
4512 }
4513 
4514 inline int8_t
4515 UnicodeString::caseCompare(ConstChar16Ptr srcChars,
4516  int32_t srcLength,
4517  uint32_t options) const {
4518  return doCaseCompare(0, length(), srcChars, 0, srcLength, options);
4519 }
4520 
4521 inline int8_t
4522 UnicodeString::caseCompare(int32_t start,
4523  int32_t _length,
4524  const UnicodeString &srcText,
4525  int32_t srcStart,
4526  int32_t srcLength,
4527  uint32_t options) const {
4528  return doCaseCompare(start, _length, srcText, srcStart, srcLength, options);
4529 }
4530 
4531 inline int8_t
4532 UnicodeString::caseCompare(int32_t start,
4533  int32_t _length,
4534  const char16_t *srcChars,
4535  uint32_t options) const {
4536  return doCaseCompare(start, _length, srcChars, 0, _length, options);
4537 }
4538 
4539 inline int8_t
4540 UnicodeString::caseCompare(int32_t start,
4541  int32_t _length,
4542  const char16_t *srcChars,
4543  int32_t srcStart,
4544  int32_t srcLength,
4545  uint32_t options) const {
4546  return doCaseCompare(start, _length, srcChars, srcStart, srcLength, options);
4547 }
4548 
4549 inline int8_t
4550 UnicodeString::caseCompareBetween(int32_t start,
4551  int32_t limit,
4552  const UnicodeString &srcText,
4553  int32_t srcStart,
4554  int32_t srcLimit,
4555  uint32_t options) const {
4556  return doCaseCompare(start, limit - start, srcText, srcStart, srcLimit - srcStart, options);
4557 }
4558 
4559 inline int32_t
4560 UnicodeString::indexOf(const UnicodeString& srcText,
4561  int32_t srcStart,
4562  int32_t srcLength,
4563  int32_t start,
4564  int32_t _length) const
4565 {
4566  if(!srcText.isBogus()) {
4567  srcText.pinIndices(srcStart, srcLength);
4568  if(srcLength > 0) {
4569  return indexOf(srcText.getArrayStart(), srcStart, srcLength, start, _length);
4570  }
4571  }
4572  return -1;
4573 }
4574 
4575 inline int32_t
4576 UnicodeString::indexOf(const UnicodeString& text) const
4577 { return indexOf(text, 0, text.length(), 0, length()); }
4578 
4579 inline int32_t
4580 UnicodeString::indexOf(const UnicodeString& text,
4581  int32_t start) const {
4582  pinIndex(start);
4583  return indexOf(text, 0, text.length(), start, length() - start);
4584 }
4585 
4586 inline int32_t
4587 UnicodeString::indexOf(const UnicodeString& text,
4588  int32_t start,
4589  int32_t _length) const
4590 { return indexOf(text, 0, text.length(), start, _length); }
4591 
4592 inline int32_t
4593 UnicodeString::indexOf(const char16_t *srcChars,
4594  int32_t srcLength,
4595  int32_t start) const {
4596  pinIndex(start);
4597  return indexOf(srcChars, 0, srcLength, start, length() - start);
4598 }
4599 
4600 inline int32_t
4601 UnicodeString::indexOf(ConstChar16Ptr srcChars,
4602  int32_t srcLength,
4603  int32_t start,
4604  int32_t _length) const
4605 { return indexOf(srcChars, 0, srcLength, start, _length); }
4606 
4607 inline int32_t
4608 UnicodeString::indexOf(char16_t c,
4609  int32_t start,
4610  int32_t _length) const
4611 { return doIndexOf(c, start, _length); }
4612 
4613 inline int32_t
4614 UnicodeString::indexOf(UChar32 c,
4615  int32_t start,
4616  int32_t _length) const
4617 { return doIndexOf(c, start, _length); }
4618 
4619 inline int32_t
4620 UnicodeString::indexOf(char16_t c) const
4621 { return doIndexOf(c, 0, length()); }
4622 
4623 inline int32_t
4624 UnicodeString::indexOf(UChar32 c) const
4625 { return indexOf(c, 0, length()); }
4626 
4627 inline int32_t
4628 UnicodeString::indexOf(char16_t c,
4629  int32_t start) const {
4630  pinIndex(start);
4631  return doIndexOf(c, start, length() - start);
4632 }
4633 
4634 inline int32_t
4635 UnicodeString::indexOf(UChar32 c,
4636  int32_t start) const {
4637  pinIndex(start);
4638  return indexOf(c, start, length() - start);
4639 }
4640 
4641 inline int32_t
4642 UnicodeString::lastIndexOf(ConstChar16Ptr srcChars,
4643  int32_t srcLength,
4644  int32_t start,
4645  int32_t _length) const
4646 { return lastIndexOf(srcChars, 0, srcLength, start, _length); }
4647 
4648 inline int32_t
4649 UnicodeString::lastIndexOf(const char16_t *srcChars,
4650  int32_t srcLength,
4651  int32_t start) const {
4652  pinIndex(start);
4653  return lastIndexOf(srcChars, 0, srcLength, start, length() - start);
4654 }
4655 
4656 inline int32_t
4657 UnicodeString::lastIndexOf(const UnicodeString& srcText,
4658  int32_t srcStart,
4659  int32_t srcLength,
4660  int32_t start,
4661  int32_t _length) const
4662 {
4663  if(!srcText.isBogus()) {
4664  srcText.pinIndices(srcStart, srcLength);
4665  if(srcLength > 0) {
4666  return lastIndexOf(srcText.getArrayStart(), srcStart, srcLength, start, _length);
4667  }
4668  }
4669  return -1;
4670 }
4671 
4672 inline int32_t
4673 UnicodeString::lastIndexOf(const UnicodeString& text,
4674  int32_t start,
4675  int32_t _length) const
4676 { return lastIndexOf(text, 0, text.length(), start, _length); }
4677 
4678 inline int32_t
4679 UnicodeString::lastIndexOf(const UnicodeString& text,
4680  int32_t start) const {
4681  pinIndex(start);
4682  return lastIndexOf(text, 0, text.length(), start, length() - start);
4683 }
4684 
4685 inline int32_t
4686 UnicodeString::lastIndexOf(const UnicodeString& text) const
4687 { return lastIndexOf(text, 0, text.length(), 0, length()); }
4688 
4689 inline int32_t
4690 UnicodeString::lastIndexOf(char16_t c,
4691  int32_t start,
4692  int32_t _length) const
4693 { return doLastIndexOf(c, start, _length); }
4694 
4695 inline int32_t
4696 UnicodeString::lastIndexOf(UChar32 c,
4697  int32_t start,
4698  int32_t _length) const {
4699  return doLastIndexOf(c, start, _length);
4700 }
4701 
4702 inline int32_t
4703 UnicodeString::lastIndexOf(char16_t c) const
4704 { return doLastIndexOf(c, 0, length()); }
4705 
4706 inline int32_t
4707 UnicodeString::lastIndexOf(UChar32 c) const {
4708  return lastIndexOf(c, 0, length());
4709 }
4710 
4711 inline int32_t
4712 UnicodeString::lastIndexOf(char16_t c,
4713  int32_t start) const {
4714  pinIndex(start);
4715  return doLastIndexOf(c, start, length() - start);
4716 }
4717 
4718 inline int32_t
4719 UnicodeString::lastIndexOf(UChar32 c,
4720  int32_t start) const {
4721  pinIndex(start);
4722  return lastIndexOf(c, start, length() - start);
4723 }
4724 
4725 inline UBool
4726 UnicodeString::startsWith(const UnicodeString& text) const
4727 { return doEqualsSubstring(0, text.length(), text, 0, text.length()); }
4728 
4729 inline UBool
4730 UnicodeString::startsWith(const UnicodeString& srcText,
4731  int32_t srcStart,
4732  int32_t srcLength) const
4733 { return doEqualsSubstring(0, srcLength, srcText, srcStart, srcLength); }
4734 
4735 inline UBool
4736 UnicodeString::startsWith(ConstChar16Ptr srcChars, int32_t srcLength) const {
4737  if(srcLength < 0) {
4738  srcLength = u_strlen(toUCharPtr(srcChars));
4739  }
4740  return doEqualsSubstring(0, srcLength, srcChars, 0, srcLength);
4741 }
4742 
4743 inline UBool
4744 UnicodeString::startsWith(const char16_t *srcChars, int32_t srcStart, int32_t srcLength) const {
4745  if(srcLength < 0) {
4746  srcLength = u_strlen(toUCharPtr(srcChars));
4747  }
4748  return doEqualsSubstring(0, srcLength, srcChars, srcStart, srcLength);
4749 }
4750 
4751 inline UBool
4752 UnicodeString::endsWith(const UnicodeString& text) const
4753 { return doEqualsSubstring(length() - text.length(), text.length(),
4754  text, 0, text.length()); }
4755 
4756 inline UBool
4757 UnicodeString::endsWith(const UnicodeString& srcText,
4758  int32_t srcStart,
4759  int32_t srcLength) const {
4760  srcText.pinIndices(srcStart, srcLength);
4761  return doEqualsSubstring(length() - srcLength, srcLength,
4762  srcText, srcStart, srcLength);
4763 }
4764 
4765 inline UBool
4766 UnicodeString::endsWith(ConstChar16Ptr srcChars,
4767  int32_t srcLength) const {
4768  if(srcLength < 0) {
4769  srcLength = u_strlen(toUCharPtr(srcChars));
4770  }
4771  return doEqualsSubstring(length() - srcLength, srcLength, srcChars, 0, srcLength);
4772 }
4773 
4774 inline UBool
4775 UnicodeString::endsWith(const char16_t *srcChars,
4776  int32_t srcStart,
4777  int32_t srcLength) const {
4778  if(srcLength < 0) {
4779  srcLength = u_strlen(toUCharPtr(srcChars + srcStart));
4780  }
4781  return doEqualsSubstring(length() - srcLength, srcLength,
4782  srcChars, srcStart, srcLength);
4783 }
4784 
4785 //========================================
4786 // replace
4787 //========================================
4788 inline UnicodeString&
4789 UnicodeString::replace(int32_t start,
4790  int32_t _length,
4791  const UnicodeString& srcText)
4792 { return doReplace(start, _length, srcText, 0, srcText.length()); }
4793 
4794 inline UnicodeString&
4795 UnicodeString::replace(int32_t start,
4796  int32_t _length,
4797  const UnicodeString& srcText,
4798  int32_t srcStart,
4799  int32_t srcLength)
4800 { return doReplace(start, _length, srcText, srcStart, srcLength); }
4801 
4802 inline UnicodeString&
4803 UnicodeString::replace(int32_t start,
4804  int32_t _length,
4805  ConstChar16Ptr srcChars,
4806  int32_t srcLength)
4807 { return doReplace(start, _length, srcChars, 0, srcLength); }
4808 
4809 inline UnicodeString&
4810 UnicodeString::replace(int32_t start,
4811  int32_t _length,
4812  const char16_t *srcChars,
4813  int32_t srcStart,
4814  int32_t srcLength)
4815 { return doReplace(start, _length, srcChars, srcStart, srcLength); }
4816 
4817 inline UnicodeString&
4818 UnicodeString::replace(int32_t start,
4819  int32_t _length,
4820  char16_t srcChar)
4821 { return doReplace(start, _length, &srcChar, 0, 1); }
4822 
4823 inline UnicodeString&
4824 UnicodeString::replaceBetween(int32_t start,
4825  int32_t limit,
4826  const UnicodeString& srcText)
4827 { return doReplace(start, limit - start, srcText, 0, srcText.length()); }
4828 
4829 inline UnicodeString&
4830 UnicodeString::replaceBetween(int32_t start,
4831  int32_t limit,
4832  const UnicodeString& srcText,
4833  int32_t srcStart,
4834  int32_t srcLimit)
4835 { return doReplace(start, limit - start, srcText, srcStart, srcLimit - srcStart); }
4836 
4837 inline UnicodeString&
4838 UnicodeString::findAndReplace(const UnicodeString& oldText,
4839  const UnicodeString& newText)
4840 { return findAndReplace(0, length(), oldText, 0, oldText.length(),
4841  newText, 0, newText.length()); }
4842 
4843 inline UnicodeString&
4844 UnicodeString::findAndReplace(int32_t start,
4845  int32_t _length,
4846  const UnicodeString& oldText,
4847  const UnicodeString& newText)
4848 { return findAndReplace(start, _length, oldText, 0, oldText.length(),
4849  newText, 0, newText.length()); }
4850 
4851 // ============================
4852 // extract
4853 // ============================
4854 inline void
4855 UnicodeString::doExtract(int32_t start,
4856  int32_t _length,
4857  UnicodeString& target) const
4858 { target.replace(0, target.length(), *this, start, _length); }
4859 
4860 inline void
4861 UnicodeString::extract(int32_t start,
4862  int32_t _length,
4863  Char16Ptr target,
4864  int32_t targetStart) const
4865 { doExtract(start, _length, target, targetStart); }
4866 
4867 inline void
4868 UnicodeString::extract(int32_t start,
4869  int32_t _length,
4870  UnicodeString& target) const
4871 { doExtract(start, _length, target); }
4872 
4873 #if !UCONFIG_NO_CONVERSION
4874 
4875 inline int32_t
4876 UnicodeString::extract(int32_t start,
4877  int32_t _length,
4878  char *dst,
4879  const char *codepage) const
4880 
4881 {
4882  // This dstSize value will be checked explicitly
4883  return extract(start, _length, dst, dst != nullptr ? 0xffffffff : 0, codepage);
4884 }
4885 
4886 #endif
4887 
4888 inline void
4889 UnicodeString::extractBetween(int32_t start,
4890  int32_t limit,
4891  char16_t *dst,
4892  int32_t dstStart) const {
4893  pinIndex(start);
4894  pinIndex(limit);
4895  doExtract(start, limit - start, dst, dstStart);
4896 }
4897 
4898 inline UnicodeString
4899 UnicodeString::tempSubStringBetween(int32_t start, int32_t limit) const {
4900  return tempSubString(start, limit - start);
4901 }
4902 
4903 inline char16_t
4904 UnicodeString::doCharAt(int32_t offset) const
4905 {
4906  if (static_cast<uint32_t>(offset) < static_cast<uint32_t>(length())) {
4907  return getArrayStart()[offset];
4908  } else {
4909  return kInvalidUChar;
4910  }
4911 }
4912 
4913 inline char16_t
4914 UnicodeString::charAt(int32_t offset) const
4915 { return doCharAt(offset); }
4916 
4917 inline char16_t
4918 UnicodeString::operator[] (int32_t offset) const
4919 { return doCharAt(offset); }
4920 
4921 inline UBool
4922 UnicodeString::isEmpty() const {
4923  // Arithmetic or logical right shift does not matter: only testing for 0.
4924  return (fUnion.fFields.fLengthAndFlags>>kLengthShift) == 0;
4925 }
4926 
4927 //========================================
4928 // Write implementation methods
4929 //========================================
4930 inline void
4931 UnicodeString::setZeroLength() {
4932  fUnion.fFields.fLengthAndFlags &= kAllStorageFlags;
4933 }
4934 
4935 inline void
4936 UnicodeString::setShortLength(int32_t len) {
4937  // requires 0 <= len <= kMaxShortLength
4938  fUnion.fFields.fLengthAndFlags =
4939  static_cast<int16_t>((fUnion.fFields.fLengthAndFlags & kAllStorageFlags) | (len << kLengthShift));
4940 }
4941 
4942 inline void
4943 UnicodeString::setLength(int32_t len) {
4944  if(len <= kMaxShortLength) {
4945  setShortLength(len);
4946  } else {
4947  fUnion.fFields.fLengthAndFlags |= kLengthIsLarge;
4948  fUnion.fFields.fLength = len;
4949  }
4950 }
4951 
4952 inline void
4953 UnicodeString::setToEmpty() {
4954  fUnion.fFields.fLengthAndFlags = kShortString;
4955 }
4956 
4957 inline void
4958 UnicodeString::setArray(char16_t *array, int32_t len, int32_t capacity) {
4959  setLength(len);
4960  fUnion.fFields.fArray = array;
4961  fUnion.fFields.fCapacity = capacity;
4962 }
4963 
4964 inline UnicodeString&
4965 UnicodeString::operator= (char16_t ch)
4966 { return doReplace(0, length(), &ch, 0, 1); }
4967 
4968 inline UnicodeString&
4969 UnicodeString::operator= (UChar32 ch)
4970 { return replace(0, length(), ch); }
4971 
4972 inline UnicodeString&
4973 UnicodeString::setTo(const UnicodeString& srcText,
4974  int32_t srcStart,
4975  int32_t srcLength)
4976 {
4977  unBogus();
4978  return doReplace(0, length(), srcText, srcStart, srcLength);
4979 }
4980 
4981 inline UnicodeString&
4982 UnicodeString::setTo(const UnicodeString& srcText,
4983  int32_t srcStart)
4984 {
4985  unBogus();
4986  srcText.pinIndex(srcStart);
4987  return doReplace(0, length(), srcText, srcStart, srcText.length() - srcStart);
4988 }
4989 
4990 inline UnicodeString&
4991 UnicodeString::setTo(const UnicodeString& srcText)
4992 {
4993  return copyFrom(srcText);
4994 }
4995 
4996 inline UnicodeString&
4997 UnicodeString::setTo(const char16_t *srcChars,
4998  int32_t srcLength)
4999 {
5000  unBogus();
5001  return doReplace(0, length(), srcChars, 0, srcLength);
5002 }
5003 
5004 inline UnicodeString&
5005 UnicodeString::setTo(char16_t srcChar)
5006 {
5007  unBogus();
5008  return doReplace(0, length(), &srcChar, 0, 1);
5009 }
5010 
5011 inline UnicodeString&
5012 UnicodeString::setTo(UChar32 srcChar)
5013 {
5014  unBogus();
5015  return replace(0, length(), srcChar);
5016 }
5017 
5018 inline UnicodeString&
5019 UnicodeString::append(const UnicodeString& srcText,
5020  int32_t srcStart,
5021  int32_t srcLength)
5022 { return doAppend(srcText, srcStart, srcLength); }
5023 
5024 inline UnicodeString&
5025 UnicodeString::append(const UnicodeString& srcText)
5026 { return doAppend(srcText, 0, srcText.length()); }
5027 
5028 inline UnicodeString&
5029 UnicodeString::append(const char16_t *srcChars,
5030  int32_t srcStart,
5031  int32_t srcLength)
5032 { return doAppend(srcChars, srcStart, srcLength); }
5033 
5034 inline UnicodeString&
5035 UnicodeString::append(ConstChar16Ptr srcChars,
5036  int32_t srcLength)
5037 { return doAppend(srcChars, 0, srcLength); }
5038 
5039 inline UnicodeString&
5040 UnicodeString::append(char16_t srcChar)
5041 { return doAppend(&srcChar, 0, 1); }
5042 
5043 inline UnicodeString&
5044 UnicodeString::operator+= (char16_t ch)
5045 { return doAppend(&ch, 0, 1); }
5046 
5047 inline UnicodeString&
5048 UnicodeString::operator+= (UChar32 ch) {
5049  return append(ch);
5050 }
5051 
5052 inline UnicodeString&
5053 UnicodeString::operator+= (const UnicodeString& srcText)
5054 { return doAppend(srcText, 0, srcText.length()); }
5055 
5056 inline UnicodeString&
5057 UnicodeString::insert(int32_t start,
5058  const UnicodeString& srcText,
5059  int32_t srcStart,
5060  int32_t srcLength)
5061 { return doReplace(start, 0, srcText, srcStart, srcLength); }
5062 
5063 inline UnicodeString&
5064 UnicodeString::insert(int32_t start,
5065  const UnicodeString& srcText)
5066 { return doReplace(start, 0, srcText, 0, srcText.length()); }
5067 
5068 inline UnicodeString&
5069 UnicodeString::insert(int32_t start,
5070  const char16_t *srcChars,
5071  int32_t srcStart,
5072  int32_t srcLength)
5073 { return doReplace(start, 0, srcChars, srcStart, srcLength); }
5074 
5075 inline UnicodeString&
5076 UnicodeString::insert(int32_t start,
5077  ConstChar16Ptr srcChars,
5078  int32_t srcLength)
5079 { return doReplace(start, 0, srcChars, 0, srcLength); }
5080 
5081 inline UnicodeString&
5082 UnicodeString::insert(int32_t start,
5083  char16_t srcChar)
5084 { return doReplace(start, 0, &srcChar, 0, 1); }
5085 
5086 inline UnicodeString&
5087 UnicodeString::insert(int32_t start,
5088  UChar32 srcChar)
5089 { return replace(start, 0, srcChar); }
5090 
5091 
5092 inline UnicodeString&
5093 UnicodeString::remove()
5094 {
5095  // remove() of a bogus string makes the string empty and non-bogus
5096  if(isBogus()) {
5097  setToEmpty();
5098  } else {
5099  setZeroLength();
5100  }
5101  return *this;
5102 }
5103 
5104 inline UnicodeString&
5105 UnicodeString::remove(int32_t start,
5106  int32_t _length)
5107 {
5108  if(start <= 0 && _length == INT32_MAX) {
5109  // remove(guaranteed everything) of a bogus string makes the string empty and non-bogus
5110  return remove();
5111  }
5112  return doReplace(start, _length, nullptr, 0, 0);
5113 }
5114 
5115 inline UnicodeString&
5116 UnicodeString::removeBetween(int32_t start,
5117  int32_t limit)
5118 { return doReplace(start, limit - start, nullptr, 0, 0); }
5119 
5120 inline UnicodeString &
5121 UnicodeString::retainBetween(int32_t start, int32_t limit) {
5122  truncate(limit);
5123  return doReplace(0, start, nullptr, 0, 0);
5124 }
5125 
5126 inline UBool
5127 UnicodeString::truncate(int32_t targetLength)
5128 {
5129  if(isBogus() && targetLength == 0) {
5130  // truncate(0) of a bogus string makes the string empty and non-bogus
5131  unBogus();
5132  return false;
5133  } else if (static_cast<uint32_t>(targetLength) < static_cast<uint32_t>(length())) {
5134  setLength(targetLength);
5135  return true;
5136  } else {
5137  return false;
5138  }
5139 }
5140 
5141 inline UnicodeString&
5142 UnicodeString::reverse()
5143 { return doReverse(0, length()); }
5144 
5145 inline UnicodeString&
5146 UnicodeString::reverse(int32_t start,
5147  int32_t _length)
5148 { return doReverse(start, _length); }
5149 
5150 U_NAMESPACE_END
5151 
5152 #endif /* U_SHOW_CPLUSPLUS_API */
5153 
5154 #endif
C++ API: Interface for writing bytes, and implementation classes.
C++ API: char16_t pointer wrappers with implicit conversion from bit-compatible raw pointer types.
#define U_ALIASING_BARRIER(ptr)
Barrier for pointer anti-aliasing optimizations even across function boundaries.
Definition: char16ptr.h:37
The BreakIterator class implements methods for finding the location of boundaries in text.
Definition: brkiter.h:106
A ByteSink can be filled with bytes.
Definition: bytestream.h:55
char16_t * wrapper with implicit conversion from distinct but bit-compatible pointer types.
Definition: char16ptr.h:49
const char16_t * wrapper with implicit conversion from distinct but bit-compatible pointer types.
Definition: char16ptr.h:156
Records lengths of string edits but not replacement text.
Definition: edits.h:80
A Locale object represents a specific geographical, political, or cultural region.
Definition: locid.h:198
Replaceable is an abstract base class representing a string of characters that supports the replaceme...
Definition: rep.h:77
virtual void extractBetween(int32_t start, int32_t limit, UnicodeString &target) const =0
Copies characters in the range [start, limit) into the UnicodeString target.
char16_t charAt(int32_t offset) const
Returns the 16-bit code unit at the given offset into the text.
Definition: rep.h:251
int32_t length() const
Returns the number of 16-bit code units in the text.
Definition: rep.h:246
Implementation of ByteSink that writes to a "string".
Definition: bytestream.h:269
A string-like object that points to a sized piece of memory.
Definition: stringpiece.h:61
UnicodeString is a string class that stores Unicode characters directly and provides similar function...
Definition: unistr.h:303
int32_t indexOf(const char16_t *srcChars, int32_t srcStart, int32_t srcLength, int32_t start, int32_t length) const
Locate in this the first occurrence in the range [start, start + length) of the characters in srcChar...
UnicodeString(const UnicodeString &that)
Copy constructor.
void push_back(char16_t c)
Appends the code unit c to the UnicodeString object.
Definition: unistr.h:2386
void swap(UnicodeString &other) noexcept
Swap strings.
virtual char16_t getCharAt(int32_t offset) const override
The change in Replaceable to use virtual getCharAt() allows UnicodeString::charAt() to be inline agai...
bool operator==(const S &text) const
Equality operator.
Definition: unistr.h:355
virtual int32_t getLength() const override
Implement Replaceable::getLength() (see jitterbug 1027).
UnicodeString & foldCase(uint32_t options=0)
Case-folds the characters in this string.
UChar32 unescapeAt(int32_t &offset) const
Unescape a single escape sequence and return the represented character.
UnicodeString(const wchar_t *text, int32_t textLength)
wchar_t * constructor.
Definition: unistr.h:3312
virtual void handleReplaceBetween(int32_t start, int32_t limit, const UnicodeString &text) override
Replace a substring of this object with the given text.
UBool hasMoreChar32Than(int32_t start, int32_t length, int32_t number) const
Check if the length char16_t code units of the string contain more Unicode code points than a certain...
UnicodeString(const UnicodeString &src, int32_t srcStart, int32_t srcLength)
'Substring' constructor from subrange of source string.
UnicodeString & operator=(const S &src)
Assignment operator.
Definition: unistr.h:2022
UnicodeString & append(const S &src)
Appends the characters in src which is, or which is implicitly convertible to, a std::u16string_view ...
Definition: unistr.h:2358
UnicodeString & operator=(UnicodeString &&src) noexcept
Move assignment operator; might leave src in bogus state.
virtual ~UnicodeString()
Destructor.
UnicodeString(const char *codepageData, int32_t dataLength, const char *codepage)
char* constructor.
UnicodeString & toLower()
Convert the characters in this to lower case following the conventions of the default locale.
UnicodeString(const char *codepageData, const char *codepage)
char* constructor.
UnicodeString(UBool isTerminated, ConstChar16Ptr text, int32_t textLength)
Readonly-aliasing char16_t* constructor.
static UnicodeString readOnlyAlias(const UnicodeString &text)
Readonly-aliasing factory method.
Definition: unistr.h:3685
UnicodeString & fastCopyFrom(const UnicodeString &src)
Almost the same as the assignment operator.
UnicodeString & toTitle(BreakIterator *titleIter)
Titlecase this string, convenience function using the default locale.
EInvariant
Constant to be used in the UnicodeString(char *, int32_t, EInvariant) constructor which constructs a ...
Definition: unistr.h:316
bool operator!=(const S &text) const
Inequality operator.
Definition: unistr.h:388
UnicodeString unescape() const
Unescape a string of characters and return a string containing the result.
UnicodeString(const UnicodeString &src, int32_t srcStart)
'Substring' constructor from tail of source string.
int32_t getChar32Limit(int32_t offset) const
Adjust a random-access offset so that it points behind a Unicode character.
UnicodeString(char16_t *buffer, int32_t buffLength, int32_t buffCapacity)
Writable-aliasing char16_t* constructor.
UnicodeString(int32_t capacity, UChar32 c, int32_t count)
Construct a UnicodeString with capacity to hold capacity char16_ts.
int32_t extract(int32_t start, int32_t startLength, char *target, uint32_t targetLength, const char *codepage) const
Copy the characters in the range [start, start + length) into an array of characters in a specified c...
UnicodeString & findAndReplace(int32_t start, int32_t length, const UnicodeString &oldText, int32_t oldStart, int32_t oldLength, const UnicodeString &newText, int32_t newStart, int32_t newLength)
Replace all occurrences of characters in oldText in the range [oldStart, oldStart + oldLength) with t...
virtual void copy(int32_t start, int32_t limit, int32_t dest) override
Copy a substring of this object, retaining attribute (out-of-band) information.
virtual void extractBetween(int32_t start, int32_t limit, UnicodeString &target) const override
Copy the characters in the range [start, limit) into the UnicodeString target.
UnicodeString & replace(int32_t start, int32_t length, const UnicodeString &srcText, int32_t srcStart, int32_t srcLength)
Replace the characters in the range [start, start + length) with the characters in srcText in the ran...
Definition: unistr.h:4795
UBool padLeading(int32_t targetLength, char16_t padChar=0x0020)
Pad the start of this UnicodeString with the character padChar.
int32_t getChar32Start(int32_t offset) const
Adjust a random-access offset so that it points to the beginning of a Unicode character.
UChar32 char32At(int32_t offset) const
Return the code point that contains the code unit at offset offset.
UnicodeString(const char *src, int32_t textLength, enum EInvariant inv)
Constructs a Unicode string from an invariant-character char * string.
UnicodeString & operator=(const UnicodeString &srcText)
Assignment operator.
UnicodeString & append(UChar32 srcChar)
Append the code point srcChar to the UnicodeString object.
StringClass & toUTF8String(StringClass &result) const
Convert the UnicodeString to UTF-8 and append the result to a standard string.
Definition: unistr.h:1783
UnicodeString & toLower(const Locale &locale)
Convert the characters in this to lower case following the conventions of a specific locale.
UnicodeString & toTitle(BreakIterator *titleIter, const Locale &locale)
Titlecase this string.
UnicodeString tempSubString(int32_t start=0, int32_t length=INT32_MAX) const
Create a temporary substring for the specified range.
int32_t extract(Char16Ptr dest, int32_t destCapacity, UErrorCode &errorCode) const
Copy the contents of the string into dest.
int32_t length() const
Return the length of the UnicodeString object.
Definition: unistr.h:4273
unspecified_reverse_iterator rend() const
Definition: unistr.h:1955
virtual UChar32 getChar32At(int32_t offset) const override
The change in Replaceable to use virtual getChar32At() allows UnicodeString::char32At() to be inline ...
int32_t extract(int32_t start, int32_t startLength, char *target, uint32_t targetLength) const
Copy the characters in the range [start, start + length) into an array of characters in the platform'...
static UnicodeString fromUTF8(StringPiece utf8)
Create a UnicodeString from a UTF-8 string.
UNISTR_FROM_CHAR_EXPLICIT UnicodeString(char16_t ch)
Single char16_t (code unit) constructor.
int32_t lastIndexOf(const char16_t *srcChars, int32_t srcStart, int32_t srcLength, int32_t start, int32_t length) const
Locate in this the last occurrence in the range [start, start + length) of the characters in srcChars...
UnicodeString(wchar_t *buffer, int32_t buffLength, int32_t buffCapacity)
Writable-aliasing wchar_t * constructor.
Definition: unistr.h:3420
void setToBogus()
Make this UnicodeString object invalid.
friend void swap(UnicodeString &s1, UnicodeString &s2) noexcept
Non-member UnicodeString swap function.
Definition: unistr.h:2051
int32_t moveIndex32(int32_t index, int32_t delta) const
Move the code unit index along the string by delta code points.
static UnicodeString fromUTF32(const UChar32 *utf32, int32_t length)
Create a UnicodeString from a UTF-32 string.
UNISTR_FROM_STRING_EXPLICIT UnicodeString(const S &text)
Constructor from text which is, or which is implicitly convertible to, a std::u16string_view or (if U...
Definition: unistr.h:3338
int32_t countChar32(int32_t start=0, int32_t length=INT32_MAX) const
Count Unicode code points in the length char16_t code units of the string.
StringClass toUTF8String() const
Convert the UnicodeString to a UTF-8 string.
Definition: unistr.h:1802
UNISTR_FROM_CHAR_EXPLICIT UnicodeString(UChar32 ch)
Single UChar32 (code point) constructor.
UnicodeString & setTo(UBool isTerminated, ConstChar16Ptr text, int32_t textLength)
Aliasing setTo() function, analogous to the readonly-aliasing char16_t* constructor.
static UnicodeString readOnlyAlias(const S &text)
Readonly-aliasing factory method.
Definition: unistr.h:3662
UnicodeString & operator+=(const S &src)
Append operator.
Definition: unistr.h:2287
unspecified_iterator begin() const
Definition: unistr.h:1937
UnicodeString & toTitle(BreakIterator *titleIter, const Locale &locale, uint32_t options)
Titlecase this string, with options.
virtual UnicodeString * clone() const override
Clone this object, an instance of a subclass of Replaceable.
int32_t extract(char *dest, int32_t destCapacity, UConverter *cnv, UErrorCode &errorCode) const
Convert the UnicodeString into a codepage string using an existing UConverter.
UnicodeString(UnicodeString &&src) noexcept
Move constructor; might leave src in bogus state.
UnicodeString(const char16_t *text, int32_t textLength)
char16_t* constructor.
UnicodeString & replace(int32_t start, int32_t length, UChar32 srcChar)
Replace the characters in the range [start, start + length) with the code point srcChar.
UnicodeString(const char *src, int32_t srcLength, UConverter *cnv, UErrorCode &errorCode)
char * / UConverter constructor.
UnicodeString(const char *codepageData, int32_t dataLength)
char* constructor.
static UClassID getStaticClassID()
ICU "poor man's RTTI", returns a UClassID for this class.
UnicodeString(uint16_t *buffer, int32_t buffLength, int32_t buffCapacity)
Writable-aliasing uint16_t * constructor.
Definition: unistr.h:3406
int32_t toUTF32(UChar32 *utf32, int32_t capacity, UErrorCode &errorCode) const
Convert the UnicodeString to UTF-32.
UBool isBogus() const
Determine if this object contains a valid string.
Definition: unistr.h:4288
const char16_t * getTerminatedBuffer()
Get a read-only pointer to the internal buffer, making sure that it is NUL-terminated.
UnicodeString(const uint16_t *text, int32_t textLength)
uint16_t * constructor.
Definition: unistr.h:3290
unspecified_reverse_iterator rbegin() const
Definition: unistr.h:1949
virtual UClassID getDynamicClassID() const override
ICU "poor man's RTTI", returns a UClassID for the actual class.
int32_t extract(int32_t start, int32_t startLength, char *target, int32_t targetCapacity, enum EInvariant inv) const
Copy the characters in the range [start, start + startLength) into an array of characters.
UnicodeString & setCharAt(int32_t offset, char16_t ch)
Set the character at the specified offset to the specified character.
UnicodeString & setTo(char16_t *buffer, int32_t buffLength, int32_t buffCapacity)
Aliasing setTo() function, analogous to the writable-aliasing char16_t* constructor.
UBool padTrailing(int32_t targetLength, char16_t padChar=0x0020)
Pad the end of this UnicodeString with the character padChar.
UNISTR_FROM_STRING_EXPLICIT UnicodeString(const char *codepageData)
char* constructor.
void releaseBuffer(int32_t newLength=-1)
Release a read/write buffer on a UnicodeString object with an "open" getBuffer(minCapacity).
char16_t * getBuffer(int32_t minCapacity)
Get a read/write pointer to the internal buffer.
void toUTF8(ByteSink &sink) const
Convert the UnicodeString to UTF-8 and write the result to a ByteSink.
char16_t value_type
C++ boilerplate.
Definition: unistr.h:306
unspecified_iterator end() const
Definition: unistr.h:1943
virtual UBool hasMetaData() const override
Replaceable API.
UnicodeString & toUpper()
Convert the characters in this to UPPER CASE following the conventions of the default locale.
UnicodeString & toUpper(const Locale &locale)
Convert the characters in this to UPPER CASE following the conventions of a specific locale.
UnicodeString & trim()
Trims leading and trailing whitespace from this UnicodeString.
U_CAPI int32_t u_strlen(const UChar *s)
U_COMMON_API UnicodeString unistr_internalConcat(const UnicodeString &s1, std::u16string_view s2)
U_COMMON_API UBool operator==(const StringPiece &x, const StringPiece &y)
Global operator == for StringPiece.
bool operator!=(const StringPiece &x, const StringPiece &y)
Global operator != for StringPiece.
Definition: stringpiece.h:346
UnicodeString operator+(const UnicodeString &s1, const S &s2)
Creates a new UnicodeString from the concatenation of a UnicodeString and s2 which is,...
Definition: unistr.h:4177
#define U_CALLCONV
Similar to U_CDECL_BEGIN/U_CDECL_END, this qualifier is necessary in callback function typedefs to ma...
Definition: platform.h:837
C++ API: Replaceable String.
C++ API: Central ICU header for including the C++ standard <string> header and for related definition...
C++ API: StringPiece: Read-only byte string wrapper class.
struct UConverter UConverter
Definition: ucnv_err.h:96
#define UCONFIG_NO_BREAK_ITERATION
This switch turns off break iteration.
Definition: uconfig.h:358
int32_t UChar32
Define UChar32 as a type for single Unicode code points.
Definition: umachine.h:449
#define INT32_MAX
The largest value a 32 bit signed integer can hold.
Definition: umachine.h:208
int8_t UBool
The ICU boolean type, a signed-byte integer.
Definition: umachine.h:269
#define U_CAPI
This is used to declare a function as a public ICU C API.
Definition: umachine.h:110
char16_t UChar
The base type for UTF-16 code units and pointers.
Definition: umachine.h:400
#define U_SIZEOF_UCHAR
Number of bytes in a UChar (always 2).
Definition: umachine.h:352
#define UNISTR_FROM_CHAR_EXPLICIT
This can be defined to be empty or "explicit".
Definition: unistr.h:150
int32_t UStringCaseMapper(int32_t caseLocale, uint32_t options, icu::BreakIterator *iter, char16_t *dest, int32_t destCapacity, const char16_t *src, int32_t srcLength, icu::Edits *edits, UErrorCode &errorCode)
Internal string case mapping function type.
Definition: unistr.h:71
#define UNISTR_FROM_STRING_EXPLICIT
This can be defined to be empty or "explicit".
Definition: unistr.h:170
#define UNISTR_OBJECT_SIZE
Desired sizeof(UnicodeString) in bytes.
Definition: unistr.h:208
void * UClassID
UClassID is used to identify classes without using the compiler's RTTI.
Definition: uobject.h:96
Basic definitions for ICU, for both C and C++ APIs.
UErrorCode
Standard ICU4C error code type, a substitute for exceptions.
Definition: utypes.h:509
#define U_COMMON_API
Set to export library symbols from inside the common library, and to import them from outside.
Definition: utypes.h:315