ICU 77.1  77.1
All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
unistr.h
Go to the documentation of this file.
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 **********************************************************************
5 * Copyright (C) 1998-2016, International Business Machines
6 * Corporation and others. All Rights Reserved.
7 **********************************************************************
8 *
9 * File unistr.h
10 *
11 * Modification History:
12 *
13 * Date Name Description
14 * 09/25/98 stephen Creation.
15 * 11/11/98 stephen Changed per 11/9 code review.
16 * 04/20/99 stephen Overhauled per 4/16 code review.
17 * 11/18/99 aliu Made to inherit from Replaceable. Added method
18 * handleReplaceBetween(); other methods unchanged.
19 * 06/25/01 grhoten Remove dependency on iostream.
20 ******************************************************************************
21 */
22 
23 #ifndef UNISTR_H
24 #define UNISTR_H
25 
31 #include "unicode/utypes.h"
32 
33 #if U_SHOW_CPLUSPLUS_API
34 
35 #include <cstddef>
36 #include <string_view>
37 #include "unicode/char16ptr.h"
38 #include "unicode/rep.h"
39 #include "unicode/std_string.h"
40 #include "unicode/stringpiece.h"
41 #include "unicode/bytestream.h"
42 
43 struct UConverter; // unicode/ucnv.h
44 
45 #ifndef USTRING_H
51 U_CAPI int32_t U_EXPORT2 u_strlen(const UChar *s);
52 #endif
53 
54 U_NAMESPACE_BEGIN
55 
56 #if !UCONFIG_NO_BREAK_ITERATION
57 class BreakIterator; // unicode/brkiter.h
58 #endif
59 class Edits;
60 
61 U_NAMESPACE_END
62 
63 // Not #ifndef U_HIDE_INTERNAL_API because UnicodeString needs the UStringCaseMapper.
70 typedef int32_t U_CALLCONV
71 UStringCaseMapper(int32_t caseLocale, uint32_t options,
73  icu::BreakIterator *iter,
74 #endif
75  char16_t *dest, int32_t destCapacity,
76  const char16_t *src, int32_t srcLength,
77  icu::Edits *edits,
78  UErrorCode &errorCode);
79 
80 U_NAMESPACE_BEGIN
81 
82 class Locale; // unicode/locid.h
83 class StringCharacterIterator;
84 class UnicodeStringAppendable; // unicode/appendable.h
85 
86 /* The <iostream> include has been moved to unicode/ustream.h */
87 
98 #define US_INV icu::UnicodeString::kInvariant
99 
120 #if !U_CHAR16_IS_TYPEDEF
121 # define UNICODE_STRING(cs, _length) icu::UnicodeString(true, u ## cs, _length)
122 #else
123 # define UNICODE_STRING(cs, _length) icu::UnicodeString(true, (const char16_t*)u ## cs, _length)
124 #endif
125 
135 #define UNICODE_STRING_SIMPLE(cs) UNICODE_STRING(cs, -1)
136 
144 #ifndef UNISTR_FROM_CHAR_EXPLICIT
145 # if defined(U_COMBINED_IMPLEMENTATION) || defined(U_COMMON_IMPLEMENTATION) || defined(U_I18N_IMPLEMENTATION) || defined(U_IO_IMPLEMENTATION)
146  // Auto-"explicit" in ICU library code.
147 # define UNISTR_FROM_CHAR_EXPLICIT explicit
148 # else
149  // Empty by default for source code compatibility.
150 # define UNISTR_FROM_CHAR_EXPLICIT
151 # endif
152 #endif
153 
164 #ifndef UNISTR_FROM_STRING_EXPLICIT
165 # if defined(U_COMBINED_IMPLEMENTATION) || defined(U_COMMON_IMPLEMENTATION) || defined(U_I18N_IMPLEMENTATION) || defined(U_IO_IMPLEMENTATION)
166  // Auto-"explicit" in ICU library code.
167 # define UNISTR_FROM_STRING_EXPLICIT explicit
168 # else
169  // Empty by default for source code compatibility.
170 # define UNISTR_FROM_STRING_EXPLICIT
171 # endif
172 #endif
173 
207 #ifndef UNISTR_OBJECT_SIZE
208 # define UNISTR_OBJECT_SIZE 64
209 #endif
210 
296 {
297 public:
298 
307  enum EInvariant {
312  kInvariant
313  };
314 
315  //========================================
316  // Read-only operations
317  //========================================
318 
319  /* Comparison - bitwise only - for international comparison use collation */
320 
328  inline bool operator== (const UnicodeString& text) const;
329 
330 #ifndef U_HIDE_DRAFT_API
346  template<typename S, typename = std::enable_if_t<ConvertibleToU16StringView<S>>>
347  inline bool operator==(const S &text) const {
348  std::u16string_view sv(internal::toU16StringView(text));
349  uint32_t len; // unsigned to avoid a compiler warning
350  return !isBogus() && (len = length()) == sv.length() && doEquals(sv.data(), len);
351  }
352 #endif // U_HIDE_DRAFT_API
353 
361  inline bool operator!= (const UnicodeString& text) const;
362 
363 #ifndef U_HIDE_DRAFT_API
381  template<typename S, typename = std::enable_if_t<ConvertibleToU16StringView<S>>>
382  inline bool operator!=(const S &text) const {
383  return !operator==(text);
384  }
385 #endif // U_HIDE_DRAFT_API
386 
394  inline UBool operator> (const UnicodeString& text) const;
395 
403  inline UBool operator< (const UnicodeString& text) const;
404 
412  inline UBool operator>= (const UnicodeString& text) const;
413 
421  inline UBool operator<= (const UnicodeString& text) const;
422 
434  inline int8_t compare(const UnicodeString& text) const;
435 
451  inline int8_t compare(int32_t start,
452  int32_t length,
453  const UnicodeString& text) const;
454 
472  inline int8_t compare(int32_t start,
473  int32_t length,
474  const UnicodeString& srcText,
475  int32_t srcStart,
476  int32_t srcLength) const;
477 
490  inline int8_t compare(ConstChar16Ptr srcChars,
491  int32_t srcLength) const;
492 
507  inline int8_t compare(int32_t start,
508  int32_t length,
509  const char16_t *srcChars) const;
510 
528  inline int8_t compare(int32_t start,
529  int32_t length,
530  const char16_t *srcChars,
531  int32_t srcStart,
532  int32_t srcLength) const;
533 
551  inline int8_t compareBetween(int32_t start,
552  int32_t limit,
553  const UnicodeString& srcText,
554  int32_t srcStart,
555  int32_t srcLimit) const;
556 
574  inline int8_t compareCodePointOrder(const UnicodeString& text) const;
575 
595  inline int8_t compareCodePointOrder(int32_t start,
596  int32_t length,
597  const UnicodeString& srcText) const;
598 
620  inline int8_t compareCodePointOrder(int32_t start,
621  int32_t length,
622  const UnicodeString& srcText,
623  int32_t srcStart,
624  int32_t srcLength) const;
625 
644  inline int8_t compareCodePointOrder(ConstChar16Ptr srcChars,
645  int32_t srcLength) const;
646 
666  inline int8_t compareCodePointOrder(int32_t start,
667  int32_t length,
668  const char16_t *srcChars) const;
669 
691  inline int8_t compareCodePointOrder(int32_t start,
692  int32_t length,
693  const char16_t *srcChars,
694  int32_t srcStart,
695  int32_t srcLength) const;
696 
718  inline int8_t compareCodePointOrderBetween(int32_t start,
719  int32_t limit,
720  const UnicodeString& srcText,
721  int32_t srcStart,
722  int32_t srcLimit) const;
723 
742  inline int8_t caseCompare(const UnicodeString& text, uint32_t options) const;
743 
764  inline int8_t caseCompare(int32_t start,
765  int32_t length,
766  const UnicodeString& srcText,
767  uint32_t options) const;
768 
791  inline int8_t caseCompare(int32_t start,
792  int32_t length,
793  const UnicodeString& srcText,
794  int32_t srcStart,
795  int32_t srcLength,
796  uint32_t options) const;
797 
817  inline int8_t caseCompare(ConstChar16Ptr srcChars,
818  int32_t srcLength,
819  uint32_t options) const;
820 
841  inline int8_t caseCompare(int32_t start,
842  int32_t length,
843  const char16_t *srcChars,
844  uint32_t options) const;
845 
868  inline int8_t caseCompare(int32_t start,
869  int32_t length,
870  const char16_t *srcChars,
871  int32_t srcStart,
872  int32_t srcLength,
873  uint32_t options) const;
874 
897  inline int8_t caseCompareBetween(int32_t start,
898  int32_t limit,
899  const UnicodeString& srcText,
900  int32_t srcStart,
901  int32_t srcLimit,
902  uint32_t options) const;
903 
911  inline UBool startsWith(const UnicodeString& text) const;
912 
923  inline UBool startsWith(const UnicodeString& srcText,
924  int32_t srcStart,
925  int32_t srcLength) const;
926 
935  inline UBool startsWith(ConstChar16Ptr srcChars,
936  int32_t srcLength) const;
937 
947  inline UBool startsWith(const char16_t *srcChars,
948  int32_t srcStart,
949  int32_t srcLength) const;
950 
958  inline UBool endsWith(const UnicodeString& text) const;
959 
970  inline UBool endsWith(const UnicodeString& srcText,
971  int32_t srcStart,
972  int32_t srcLength) const;
973 
982  inline UBool endsWith(ConstChar16Ptr srcChars,
983  int32_t srcLength) const;
984 
995  inline UBool endsWith(const char16_t *srcChars,
996  int32_t srcStart,
997  int32_t srcLength) const;
998 
999 
1000  /* Searching - bitwise only */
1001 
1010  inline int32_t indexOf(const UnicodeString& text) const;
1011 
1021  inline int32_t indexOf(const UnicodeString& text,
1022  int32_t start) const;
1023 
1035  inline int32_t indexOf(const UnicodeString& text,
1036  int32_t start,
1037  int32_t length) const;
1038 
1055  inline int32_t indexOf(const UnicodeString& srcText,
1056  int32_t srcStart,
1057  int32_t srcLength,
1058  int32_t start,
1059  int32_t length) const;
1060 
1072  inline int32_t indexOf(const char16_t *srcChars,
1073  int32_t srcLength,
1074  int32_t start) const;
1075 
1088  inline int32_t indexOf(ConstChar16Ptr srcChars,
1089  int32_t srcLength,
1090  int32_t start,
1091  int32_t length) const;
1092 
1109  int32_t indexOf(const char16_t *srcChars,
1110  int32_t srcStart,
1111  int32_t srcLength,
1112  int32_t start,
1113  int32_t length) const;
1114 
1122  inline int32_t indexOf(char16_t c) const;
1123 
1132  inline int32_t indexOf(UChar32 c) const;
1133 
1142  inline int32_t indexOf(char16_t c,
1143  int32_t start) const;
1144 
1154  inline int32_t indexOf(UChar32 c,
1155  int32_t start) const;
1156 
1167  inline int32_t indexOf(char16_t c,
1168  int32_t start,
1169  int32_t length) const;
1170 
1182  inline int32_t indexOf(UChar32 c,
1183  int32_t start,
1184  int32_t length) const;
1185 
1194  inline int32_t lastIndexOf(const UnicodeString& text) const;
1195 
1205  inline int32_t lastIndexOf(const UnicodeString& text,
1206  int32_t start) const;
1207 
1219  inline int32_t lastIndexOf(const UnicodeString& text,
1220  int32_t start,
1221  int32_t length) const;
1222 
1239  inline int32_t lastIndexOf(const UnicodeString& srcText,
1240  int32_t srcStart,
1241  int32_t srcLength,
1242  int32_t start,
1243  int32_t length) const;
1244 
1255  inline int32_t lastIndexOf(const char16_t *srcChars,
1256  int32_t srcLength,
1257  int32_t start) const;
1258 
1271  inline int32_t lastIndexOf(ConstChar16Ptr srcChars,
1272  int32_t srcLength,
1273  int32_t start,
1274  int32_t length) const;
1275 
1292  int32_t lastIndexOf(const char16_t *srcChars,
1293  int32_t srcStart,
1294  int32_t srcLength,
1295  int32_t start,
1296  int32_t length) const;
1297 
1305  inline int32_t lastIndexOf(char16_t c) const;
1306 
1315  inline int32_t lastIndexOf(UChar32 c) const;
1316 
1325  inline int32_t lastIndexOf(char16_t c,
1326  int32_t start) const;
1327 
1337  inline int32_t lastIndexOf(UChar32 c,
1338  int32_t start) const;
1339 
1350  inline int32_t lastIndexOf(char16_t c,
1351  int32_t start,
1352  int32_t length) const;
1353 
1365  inline int32_t lastIndexOf(UChar32 c,
1366  int32_t start,
1367  int32_t length) const;
1368 
1369 
1370  /* Character access */
1371 
1380  inline char16_t charAt(int32_t offset) const;
1381 
1389  inline char16_t operator[] (int32_t offset) const;
1390 
1402  UChar32 char32At(int32_t offset) const;
1403 
1419  int32_t getChar32Start(int32_t offset) const;
1420 
1437  int32_t getChar32Limit(int32_t offset) const;
1438 
1489  int32_t moveIndex32(int32_t index, int32_t delta) const;
1490 
1491  /* Substring extraction */
1492 
1508  inline void extract(int32_t start,
1509  int32_t length,
1510  Char16Ptr dst,
1511  int32_t dstStart = 0) const;
1512 
1534  int32_t
1535  extract(Char16Ptr dest, int32_t destCapacity,
1536  UErrorCode &errorCode) const;
1537 
1547  inline void extract(int32_t start,
1548  int32_t length,
1549  UnicodeString& target) const;
1550 
1562  inline void extractBetween(int32_t start,
1563  int32_t limit,
1564  char16_t *dst,
1565  int32_t dstStart = 0) const;
1566 
1575  virtual void extractBetween(int32_t start,
1576  int32_t limit,
1577  UnicodeString& target) const override;
1578 
1600  int32_t extract(int32_t start,
1601  int32_t startLength,
1602  char *target,
1603  int32_t targetCapacity,
1604  enum EInvariant inv) const;
1605 
1606 #if U_CHARSET_IS_UTF8 || !UCONFIG_NO_CONVERSION
1607 
1627  int32_t extract(int32_t start,
1628  int32_t startLength,
1629  char *target,
1630  uint32_t targetLength) const;
1631 
1632 #endif
1633 
1634 #if !UCONFIG_NO_CONVERSION
1635 
1661  inline int32_t extract(int32_t start,
1662  int32_t startLength,
1663  char* target,
1664  const char* codepage = nullptr) const;
1665 
1695  int32_t extract(int32_t start,
1696  int32_t startLength,
1697  char *target,
1698  uint32_t targetLength,
1699  const char *codepage) const;
1700 
1718  int32_t extract(char *dest, int32_t destCapacity,
1719  UConverter *cnv,
1720  UErrorCode &errorCode) const;
1721 
1722 #endif
1723 
1737  UnicodeString tempSubString(int32_t start=0, int32_t length=INT32_MAX) const;
1738 
1749  inline UnicodeString tempSubStringBetween(int32_t start, int32_t limit=INT32_MAX) const;
1750 
1762  void toUTF8(ByteSink &sink) const;
1763 
1776  template<typename StringClass>
1777  StringClass &toUTF8String(StringClass &result) const {
1778  StringByteSink<StringClass> sbs(&result, length());
1779  toUTF8(sbs);
1780  return result;
1781  }
1782 
1798  int32_t toUTF32(UChar32 *utf32, int32_t capacity, UErrorCode &errorCode) const;
1799 
1800  /* Length operations */
1801 
1810  inline int32_t length() const;
1811 
1825  int32_t
1826  countChar32(int32_t start=0, int32_t length=INT32_MAX) const;
1827 
1851  UBool
1852  hasMoreChar32Than(int32_t start, int32_t length, int32_t number) const;
1853 
1859  inline UBool isEmpty() const;
1860 
1870  inline int32_t getCapacity() const;
1871 
1872  /* Other operations */
1873 
1879  inline int32_t hashCode() const;
1880 
1893  inline UBool isBogus() const;
1894 
1895  //========================================
1896  // Write operations
1897  //========================================
1898 
1899  /* Assignment operations */
1900 
1920 
1947 
1948 #ifndef U_HIDE_DRAFT_API
1959  template<typename S, typename = std::enable_if_t<ConvertibleToU16StringView<S>>>
1960  inline UnicodeString &operator=(const S &src) {
1961  unBogus();
1962  return doReplace(0, length(), internal::toU16StringView(src));
1963  }
1964 #endif // U_HIDE_DRAFT_API
1965 
1975 
1981  void swap(UnicodeString &other) noexcept;
1982 
1989  friend inline void U_EXPORT2
1990  swap(UnicodeString &s1, UnicodeString &s2) noexcept {
1991  s1.swap(s2);
1992  }
1993 
2001  inline UnicodeString& operator= (char16_t ch);
2002 
2010  inline UnicodeString& operator= (UChar32 ch);
2011 
2023  inline UnicodeString& setTo(const UnicodeString& srcText,
2024  int32_t srcStart);
2025 
2039  inline UnicodeString& setTo(const UnicodeString& srcText,
2040  int32_t srcStart,
2041  int32_t srcLength);
2042 
2051  inline UnicodeString& setTo(const UnicodeString& srcText);
2052 
2061  inline UnicodeString& setTo(const char16_t *srcChars,
2062  int32_t srcLength);
2063 
2072  inline UnicodeString& setTo(char16_t srcChar);
2073 
2082  inline UnicodeString& setTo(UChar32 srcChar);
2083 
2107  UnicodeString &setTo(UBool isTerminated,
2108  ConstChar16Ptr text,
2109  int32_t textLength);
2110 
2130  UnicodeString &setTo(char16_t *buffer,
2131  int32_t buffLength,
2132  int32_t buffCapacity);
2133 
2173  void setToBogus();
2174 
2182  UnicodeString& setCharAt(int32_t offset,
2183  char16_t ch);
2184 
2185 
2186  /* Append operations */
2187 
2195  inline UnicodeString& operator+= (char16_t ch);
2196 
2204  inline UnicodeString& operator+= (UChar32 ch);
2205 
2213  inline UnicodeString& operator+= (const UnicodeString& srcText);
2214 
2215 #ifndef U_HIDE_DRAFT_API
2226  template<typename S, typename = std::enable_if_t<ConvertibleToU16StringView<S>>>
2227  inline UnicodeString& operator+=(const S &src) {
2228  return doAppend(internal::toU16StringView(src));
2229  }
2230 #endif // U_HIDE_DRAFT_API
2231 
2246  inline UnicodeString& append(const UnicodeString& srcText,
2247  int32_t srcStart,
2248  int32_t srcLength);
2249 
2257  inline UnicodeString& append(const UnicodeString& srcText);
2258 
2272  inline UnicodeString& append(const char16_t *srcChars,
2273  int32_t srcStart,
2274  int32_t srcLength);
2275 
2285  inline UnicodeString& append(ConstChar16Ptr srcChars,
2286  int32_t srcLength);
2287 
2288 #ifndef U_HIDE_DRAFT_API
2299  template<typename S, typename = std::enable_if_t<ConvertibleToU16StringView<S>>>
2300  inline UnicodeString& append(const S &src) {
2301  return doAppend(internal::toU16StringView(src));
2302  }
2303 #endif // U_HIDE_DRAFT_API
2304 
2311  inline UnicodeString& append(char16_t srcChar);
2312 
2320 
2321 
2322  /* Insert operations */
2323 
2337  inline UnicodeString& insert(int32_t start,
2338  const UnicodeString& srcText,
2339  int32_t srcStart,
2340  int32_t srcLength);
2341 
2350  inline UnicodeString& insert(int32_t start,
2351  const UnicodeString& srcText);
2352 
2366  inline UnicodeString& insert(int32_t start,
2367  const char16_t *srcChars,
2368  int32_t srcStart,
2369  int32_t srcLength);
2370 
2380  inline UnicodeString& insert(int32_t start,
2381  ConstChar16Ptr srcChars,
2382  int32_t srcLength);
2383 
2392  inline UnicodeString& insert(int32_t start,
2393  char16_t srcChar);
2394 
2403  inline UnicodeString& insert(int32_t start,
2404  UChar32 srcChar);
2405 
2406 
2407  /* Replace operations */
2408 
2426  inline UnicodeString& replace(int32_t start,
2427  int32_t length,
2428  const UnicodeString& srcText,
2429  int32_t srcStart,
2430  int32_t srcLength);
2431 
2444  inline UnicodeString& replace(int32_t start,
2445  int32_t length,
2446  const UnicodeString& srcText);
2447 
2465  inline UnicodeString& replace(int32_t start,
2466  int32_t length,
2467  const char16_t *srcChars,
2468  int32_t srcStart,
2469  int32_t srcLength);
2470 
2483  inline UnicodeString& replace(int32_t start,
2484  int32_t length,
2485  ConstChar16Ptr srcChars,
2486  int32_t srcLength);
2487 
2499  inline UnicodeString& replace(int32_t start,
2500  int32_t length,
2501  char16_t srcChar);
2502 
2514  UnicodeString& replace(int32_t start, int32_t length, UChar32 srcChar);
2515 
2525  inline UnicodeString& replaceBetween(int32_t start,
2526  int32_t limit,
2527  const UnicodeString& srcText);
2528 
2543  inline UnicodeString& replaceBetween(int32_t start,
2544  int32_t limit,
2545  const UnicodeString& srcText,
2546  int32_t srcStart,
2547  int32_t srcLimit);
2548 
2556  virtual void handleReplaceBetween(int32_t start,
2557  int32_t limit,
2558  const UnicodeString& text) override;
2559 
2565  virtual UBool hasMetaData() const override;
2566 
2580  virtual void copy(int32_t start, int32_t limit, int32_t dest) override;
2581 
2582  /* Search and replace operations */
2583 
2592  inline UnicodeString& findAndReplace(const UnicodeString& oldText,
2593  const UnicodeString& newText);
2594 
2606  inline UnicodeString& findAndReplace(int32_t start,
2607  int32_t length,
2608  const UnicodeString& oldText,
2609  const UnicodeString& newText);
2610 
2629  int32_t length,
2630  const UnicodeString& oldText,
2631  int32_t oldStart,
2632  int32_t oldLength,
2633  const UnicodeString& newText,
2634  int32_t newStart,
2635  int32_t newLength);
2636 
2637 
2638  /* Remove operations */
2639 
2648  inline UnicodeString& remove();
2649 
2658  inline UnicodeString& remove(int32_t start,
2659  int32_t length = static_cast<int32_t>(INT32_MAX));
2660 
2669  inline UnicodeString& removeBetween(int32_t start,
2670  int32_t limit = static_cast<int32_t>(INT32_MAX));
2671 
2681  inline UnicodeString &retainBetween(int32_t start, int32_t limit = INT32_MAX);
2682 
2683  /* Length operations */
2684 
2696  UBool padLeading(int32_t targetLength,
2697  char16_t padChar = 0x0020);
2698 
2710  UBool padTrailing(int32_t targetLength,
2711  char16_t padChar = 0x0020);
2712 
2719  inline UBool truncate(int32_t targetLength);
2720 
2727 
2728  /* Miscellaneous operations */
2729 
2735  inline UnicodeString& reverse();
2736 
2745  inline UnicodeString& reverse(int32_t start,
2746  int32_t length);
2747 
2755 
2763  UnicodeString& toUpper(const Locale& locale);
2764 
2772 
2780  UnicodeString& toLower(const Locale& locale);
2781 
2782 #if !UCONFIG_NO_BREAK_ITERATION
2783 
2811 
2839  UnicodeString &toTitle(BreakIterator *titleIter, const Locale &locale);
2840 
2871  UnicodeString &toTitle(BreakIterator *titleIter, const Locale &locale, uint32_t options);
2872 
2873 #endif
2874 
2888  UnicodeString &foldCase(uint32_t options=0 /*U_FOLD_CASE_DEFAULT*/);
2889 
2890  //========================================
2891  // Access to the internal buffer
2892  //========================================
2893 
2937  char16_t *getBuffer(int32_t minCapacity);
2938 
2959  void releaseBuffer(int32_t newLength=-1);
2960 
2991  inline const char16_t *getBuffer() const;
2992 
3026  const char16_t *getTerminatedBuffer();
3027 
3028 #ifndef U_HIDE_DRAFT_API
3035  inline operator std::u16string_view() const {
3036  return {getBuffer(), static_cast<std::u16string_view::size_type>(length())};
3037  }
3038 
3039 #if U_SIZEOF_WCHAR_T==2 || defined(U_IN_DOXYGEN)
3049  inline operator std::wstring_view() const {
3050  const char16_t *p = getBuffer();
3051 #ifdef U_ALIASING_BARRIER
3052  U_ALIASING_BARRIER(p);
3053 #endif
3054  return { reinterpret_cast<const wchar_t *>(p), (std::wstring_view::size_type)length() };
3055  }
3056 #endif // U_SIZEOF_WCHAR_T
3057 #endif // U_HIDE_DRAFT_API
3058 
3059  //========================================
3060  // Constructors
3061  //========================================
3062 
3066  inline UnicodeString();
3067 
3079  UnicodeString(int32_t capacity, UChar32 c, int32_t count);
3080 
3091 
3102 
3103 #ifdef U_HIDE_DRAFT_API
3123  UNISTR_FROM_STRING_EXPLICIT UnicodeString(const char16_t *text) :
3124  UnicodeString(text, -1) {}
3125 #endif // U_HIDE_DRAFT_API
3126 
3127 #if !U_CHAR16_IS_TYPEDEF && \
3128  (defined(U_HIDE_DRAFT_API) || (defined(_LIBCPP_VERSION) && _LIBCPP_VERSION >= 180000))
3148  UNISTR_FROM_STRING_EXPLICIT UnicodeString(const uint16_t *text) :
3149  UnicodeString(ConstChar16Ptr(text), -1) {}
3150 #endif
3151 
3152 #if defined(U_HIDE_DRAFT_API) && (U_SIZEOF_WCHAR_T==2 || defined(U_IN_DOXYGEN))
3173  UNISTR_FROM_STRING_EXPLICIT UnicodeString(const wchar_t *text) :
3174  UnicodeString(ConstChar16Ptr(text), -1) {}
3175 #endif
3176 
3187  UNISTR_FROM_STRING_EXPLICIT inline UnicodeString(const std::nullptr_t text);
3188 
3205  UnicodeString(const char16_t *text,
3206  int32_t textLength);
3207 
3208 #if !U_CHAR16_IS_TYPEDEF
3225  UnicodeString(const uint16_t *text, int32_t textLength) :
3226  UnicodeString(ConstChar16Ptr(text), textLength) {}
3227 #endif
3228 
3229 #if U_SIZEOF_WCHAR_T==2 || defined(U_IN_DOXYGEN)
3247  UnicodeString(const wchar_t *text, int32_t textLength) :
3248  UnicodeString(ConstChar16Ptr(text), textLength) {}
3249 #endif
3250 
3258  inline UnicodeString(const std::nullptr_t text, int32_t textLength);
3259 
3260 #ifndef U_HIDE_DRAFT_API
3273  template<typename S, typename = std::enable_if_t<ConvertibleToU16StringView<S>>>
3275  fUnion.fFields.fLengthAndFlags = kShortString;
3276  doAppend(internal::toU16StringViewNullable(text));
3277  }
3278 #endif // U_HIDE_DRAFT_API
3279 
3310  UnicodeString(UBool isTerminated,
3311  ConstChar16Ptr text,
3312  int32_t textLength);
3313 
3332  UnicodeString(char16_t *buffer, int32_t buffLength, int32_t buffCapacity);
3333 
3334 #if !U_CHAR16_IS_TYPEDEF
3343  UnicodeString(uint16_t *buffer, int32_t buffLength, int32_t buffCapacity) :
3344  UnicodeString(Char16Ptr(buffer), buffLength, buffCapacity) {}
3345 #endif
3346 
3347 #if U_SIZEOF_WCHAR_T==2 || defined(U_IN_DOXYGEN)
3357  UnicodeString(wchar_t *buffer, int32_t buffLength, int32_t buffCapacity) :
3358  UnicodeString(Char16Ptr(buffer), buffLength, buffCapacity) {}
3359 #endif
3360 
3369  inline UnicodeString(std::nullptr_t buffer, int32_t buffLength, int32_t buffCapacity);
3370 
3371 #if U_CHARSET_IS_UTF8 || !UCONFIG_NO_CONVERSION
3372 
3396  UNISTR_FROM_STRING_EXPLICIT UnicodeString(const char *codepageData);
3397 
3406  UnicodeString(const char *codepageData, int32_t dataLength);
3407 
3408 #endif
3409 
3410 #if !UCONFIG_NO_CONVERSION
3411 
3429  UnicodeString(const char *codepageData, const char *codepage);
3430 
3448  UnicodeString(const char *codepageData, int32_t dataLength, const char *codepage);
3449 
3472  const char *src, int32_t srcLength,
3473  UConverter *cnv,
3474  UErrorCode &errorCode);
3475 
3476 #endif
3477 
3510  UnicodeString(const char *src, int32_t textLength, enum EInvariant inv);
3511 
3512 
3530 
3537  UnicodeString(UnicodeString &&src) noexcept;
3538 
3545  UnicodeString(const UnicodeString& src, int32_t srcStart);
3546 
3554  UnicodeString(const UnicodeString& src, int32_t srcStart, int32_t srcLength);
3555 
3569  virtual UnicodeString *clone() const override;
3570 
3574  virtual ~UnicodeString();
3575 
3576 #ifndef U_HIDE_DRAFT_API
3599  template<typename S, typename = std::enable_if_t<ConvertibleToU16StringView<S>>>
3600  static inline UnicodeString readOnlyAlias(const S &text) {
3601  return readOnlyAliasFromU16StringView(internal::toU16StringView(text));
3602  }
3603 
3623  static inline UnicodeString readOnlyAlias(const UnicodeString &text) {
3624  return readOnlyAliasFromUnicodeString(text);
3625  }
3626 #endif // U_HIDE_DRAFT_API
3627 
3642 
3654  static UnicodeString fromUTF32(const UChar32 *utf32, int32_t length);
3655 
3656  /* Miscellaneous operations */
3657 
3693 
3713  UChar32 unescapeAt(int32_t &offset) const;
3714 
3720  static UClassID U_EXPORT2 getStaticClassID();
3721 
3727  virtual UClassID getDynamicClassID() const override;
3728 
3729  //========================================
3730  // Implementation methods
3731  //========================================
3732 
3733 protected:
3738  virtual int32_t getLength() const override;
3739 
3745  virtual char16_t getCharAt(int32_t offset) const override;
3746 
3752  virtual UChar32 getChar32At(int32_t offset) const override;
3753 
3754 private:
3755  static UnicodeString readOnlyAliasFromU16StringView(std::u16string_view text);
3756  static UnicodeString readOnlyAliasFromUnicodeString(const UnicodeString &text);
3757 
3758  // For char* constructors. Could be made public.
3759  UnicodeString &setToUTF8(StringPiece utf8);
3760  // For extract(char*).
3761  // We could make a toUTF8(target, capacity, errorCode) public but not
3762  // this version: New API will be cleaner if we make callers create substrings
3763  // rather than having start+length on every method,
3764  // and it should take a UErrorCode&.
3765  int32_t
3766  toUTF8(int32_t start, int32_t len,
3767  char *target, int32_t capacity) const;
3768 
3773  inline UBool doEquals(const UnicodeString &text, int32_t len) const {
3774  return doEquals(text.getArrayStart(), len);
3775  }
3776  UBool doEquals(const char16_t *text, int32_t len) const;
3777 
3778  inline UBool
3779  doEqualsSubstring(int32_t start,
3780  int32_t length,
3781  const UnicodeString& srcText,
3782  int32_t srcStart,
3783  int32_t srcLength) const;
3784 
3785  UBool doEqualsSubstring(int32_t start,
3786  int32_t length,
3787  const char16_t *srcChars,
3788  int32_t srcStart,
3789  int32_t srcLength) const;
3790 
3791  inline int8_t
3792  doCompare(int32_t start,
3793  int32_t length,
3794  const UnicodeString& srcText,
3795  int32_t srcStart,
3796  int32_t srcLength) const;
3797 
3798  int8_t doCompare(int32_t start,
3799  int32_t length,
3800  const char16_t *srcChars,
3801  int32_t srcStart,
3802  int32_t srcLength) const;
3803 
3804  inline int8_t
3805  doCompareCodePointOrder(int32_t start,
3806  int32_t length,
3807  const UnicodeString& srcText,
3808  int32_t srcStart,
3809  int32_t srcLength) const;
3810 
3811  int8_t doCompareCodePointOrder(int32_t start,
3812  int32_t length,
3813  const char16_t *srcChars,
3814  int32_t srcStart,
3815  int32_t srcLength) const;
3816 
3817  inline int8_t
3818  doCaseCompare(int32_t start,
3819  int32_t length,
3820  const UnicodeString &srcText,
3821  int32_t srcStart,
3822  int32_t srcLength,
3823  uint32_t options) const;
3824 
3825  int8_t
3826  doCaseCompare(int32_t start,
3827  int32_t length,
3828  const char16_t *srcChars,
3829  int32_t srcStart,
3830  int32_t srcLength,
3831  uint32_t options) const;
3832 
3833  int32_t doIndexOf(char16_t c,
3834  int32_t start,
3835  int32_t length) const;
3836 
3837  int32_t doIndexOf(UChar32 c,
3838  int32_t start,
3839  int32_t length) const;
3840 
3841  int32_t doLastIndexOf(char16_t c,
3842  int32_t start,
3843  int32_t length) const;
3844 
3845  int32_t doLastIndexOf(UChar32 c,
3846  int32_t start,
3847  int32_t length) const;
3848 
3849  void doExtract(int32_t start,
3850  int32_t length,
3851  char16_t *dst,
3852  int32_t dstStart) const;
3853 
3854  inline void doExtract(int32_t start,
3855  int32_t length,
3856  UnicodeString& target) const;
3857 
3858  inline char16_t doCharAt(int32_t offset) const;
3859 
3860  UnicodeString& doReplace(int32_t start,
3861  int32_t length,
3862  const UnicodeString& srcText,
3863  int32_t srcStart,
3864  int32_t srcLength);
3865 
3866  UnicodeString& doReplace(int32_t start,
3867  int32_t length,
3868  const char16_t *srcChars,
3869  int32_t srcStart,
3870  int32_t srcLength);
3871  UnicodeString& doReplace(int32_t start, int32_t length, std::u16string_view src);
3872 
3873  UnicodeString& doAppend(const UnicodeString& src, int32_t srcStart, int32_t srcLength);
3874  UnicodeString& doAppend(const char16_t *srcChars, int32_t srcStart, int32_t srcLength);
3875  UnicodeString& doAppend(std::u16string_view src);
3876 
3877  UnicodeString& doReverse(int32_t start,
3878  int32_t length);
3879 
3880  // calculate hash code
3881  int32_t doHashCode() const;
3882 
3883  // get pointer to start of array
3884  // these do not check for kOpenGetBuffer, unlike the public getBuffer() function
3885  inline char16_t* getArrayStart();
3886  inline const char16_t* getArrayStart() const;
3887 
3888  inline UBool hasShortLength() const;
3889  inline int32_t getShortLength() const;
3890 
3891  // A UnicodeString object (not necessarily its current buffer)
3892  // is writable unless it isBogus() or it has an "open" getBuffer(minCapacity).
3893  inline UBool isWritable() const;
3894 
3895  // Is the current buffer writable?
3896  inline UBool isBufferWritable() const;
3897 
3898  // None of the following does releaseArray().
3899  inline void setZeroLength();
3900  inline void setShortLength(int32_t len);
3901  inline void setLength(int32_t len);
3902  inline void setToEmpty();
3903  inline void setArray(char16_t *array, int32_t len, int32_t capacity); // sets length but not flags
3904 
3905  // allocate the array; result may be the stack buffer
3906  // sets refCount to 1 if appropriate
3907  // sets fArray, fCapacity, and flags
3908  // sets length to 0
3909  // returns boolean for success or failure
3910  UBool allocate(int32_t capacity);
3911 
3912  // release the array if owned
3913  void releaseArray();
3914 
3915  // turn a bogus string into an empty one
3916  void unBogus();
3917 
3918  // implements assignment operator, copy constructor, and fastCopyFrom()
3919  UnicodeString &copyFrom(const UnicodeString &src, UBool fastCopy=false);
3920 
3921  // Copies just the fields without memory management.
3922  void copyFieldsFrom(UnicodeString &src, UBool setSrcToBogus) noexcept;
3923 
3924  // Pin start and limit to acceptable values.
3925  inline void pinIndex(int32_t& start) const;
3926  inline void pinIndices(int32_t& start,
3927  int32_t& length) const;
3928 
3929 #if !UCONFIG_NO_CONVERSION
3930 
3931  /* Internal extract() using UConverter. */
3932  int32_t doExtract(int32_t start, int32_t length,
3933  char *dest, int32_t destCapacity,
3934  UConverter *cnv,
3935  UErrorCode &errorCode) const;
3936 
3937  /*
3938  * Real constructor for converting from codepage data.
3939  * It assumes that it is called with !fRefCounted.
3940  *
3941  * If `codepage==0`, then the default converter
3942  * is used for the platform encoding.
3943  * If `codepage` is an empty string (`""`),
3944  * then a simple conversion is performed on the codepage-invariant
3945  * subset ("invariant characters") of the platform encoding. See utypes.h.
3946  */
3947  void doCodepageCreate(const char *codepageData,
3948  int32_t dataLength,
3949  const char *codepage);
3950 
3951  /*
3952  * Worker function for creating a UnicodeString from
3953  * a codepage string using a UConverter.
3954  */
3955  void
3956  doCodepageCreate(const char *codepageData,
3957  int32_t dataLength,
3958  UConverter *converter,
3959  UErrorCode &status);
3960 
3961 #endif
3962 
3963  /*
3964  * This function is called when write access to the array
3965  * is necessary.
3966  *
3967  * We need to make a copy of the array if
3968  * the buffer is read-only, or
3969  * the buffer is refCounted (shared), and refCount>1, or
3970  * the buffer is too small.
3971  *
3972  * Return false if memory could not be allocated.
3973  */
3974  UBool cloneArrayIfNeeded(int32_t newCapacity = -1,
3975  int32_t growCapacity = -1,
3976  UBool doCopyArray = true,
3977  int32_t** pBufferToDelete = nullptr,
3978  UBool forceClone = false);
3979 
3985  UnicodeString &
3986  caseMap(int32_t caseLocale, uint32_t options,
3988  BreakIterator *iter,
3989 #endif
3990  UStringCaseMapper *stringCaseMapper);
3991 
3992  // ref counting
3993  void addRef();
3994  int32_t removeRef();
3995  int32_t refCount() const;
3996 
3997  // constants
3998  enum {
4004  US_STACKBUF_SIZE = static_cast<int32_t>(UNISTR_OBJECT_SIZE - sizeof(void*) - 2) / U_SIZEOF_UCHAR,
4005  kInvalidUChar=0xffff, // U+FFFF returned by charAt(invalid index)
4006  kInvalidHashCode=0, // invalid hash code
4007  kEmptyHashCode=1, // hash code for empty string
4008 
4009  // bit flag values for fLengthAndFlags
4010  kIsBogus=1, // this string is bogus, i.e., not valid or nullptr
4011  kUsingStackBuffer=2,// using fUnion.fStackFields instead of fUnion.fFields
4012  kRefCounted=4, // there is a refCount field before the characters in fArray
4013  kBufferIsReadonly=8,// do not write to this buffer
4014  kOpenGetBuffer=16, // getBuffer(minCapacity) was called (is "open"),
4015  // and releaseBuffer(newLength) must be called
4016  kAllStorageFlags=0x1f,
4017 
4018  kLengthShift=5, // remaining 11 bits for non-negative short length, or negative if long
4019  kLength1=1<<kLengthShift,
4020  kMaxShortLength=0x3ff, // max non-negative short length (leaves top bit 0)
4021  kLengthIsLarge=0xffe0, // short length < 0, real length is in fUnion.fFields.fLength
4022 
4023  // combined values for convenience
4024  kShortString=kUsingStackBuffer,
4025  kLongString=kRefCounted,
4026  kReadonlyAlias=kBufferIsReadonly,
4027  kWritableAlias=0
4028  };
4029 
4030  friend class UnicodeStringAppendable;
4031 
4032  union StackBufferOrFields; // forward declaration necessary before friend declaration
4033  friend union StackBufferOrFields; // make US_STACKBUF_SIZE visible inside fUnion
4034 
4035  /*
4036  * The following are all the class fields that are stored
4037  * in each UnicodeString object.
4038  * Note that UnicodeString has virtual functions,
4039  * therefore there is an implicit vtable pointer
4040  * as the first real field.
4041  * The fields should be aligned such that no padding is necessary.
4042  * On 32-bit machines, the size should be 32 bytes,
4043  * on 64-bit machines (8-byte pointers), it should be 40 bytes.
4044  *
4045  * We use a hack to achieve this.
4046  *
4047  * With at least some compilers, each of the following is forced to
4048  * a multiple of sizeof(pointer) [the largest field base unit here is a data pointer],
4049  * rounded up with additional padding if the fields do not already fit that requirement:
4050  * - sizeof(class UnicodeString)
4051  * - offsetof(UnicodeString, fUnion)
4052  * - sizeof(fUnion)
4053  * - sizeof(fStackFields)
4054  *
4055  * We optimize for the longest possible internal buffer for short strings.
4056  * fUnion.fStackFields begins with 2 bytes for storage flags
4057  * and the length of relatively short strings,
4058  * followed by the buffer for short string contents.
4059  * There is no padding inside fStackFields.
4060  *
4061  * Heap-allocated and aliased strings use fUnion.fFields.
4062  * Both fStackFields and fFields must begin with the same fields for flags and short length,
4063  * that is, those must have the same memory offsets inside the object,
4064  * because the flags must be inspected in order to decide which half of fUnion is being used.
4065  * We assume that the compiler does not reorder the fields.
4066  *
4067  * (Padding at the end of fFields is ok:
4068  * As long as it is no larger than fStackFields, it is not wasted space.)
4069  *
4070  * For some of the history of the UnicodeString class fields layout, see
4071  * - ICU ticket #11551 "longer UnicodeString contents in stack buffer"
4072  * - ICU ticket #11336 "UnicodeString: recombine stack buffer arrays"
4073  * - ICU ticket #8322 "why is sizeof(UnicodeString)==48?"
4074  */
4075  // (implicit) *vtable;
4076  union StackBufferOrFields {
4077  // fStackFields is used iff (fLengthAndFlags&kUsingStackBuffer) else fFields is used.
4078  // Each struct of the union must begin with fLengthAndFlags.
4079  struct {
4080  int16_t fLengthAndFlags; // bit fields: see constants above
4081  char16_t fBuffer[US_STACKBUF_SIZE]; // buffer for short strings
4082  } fStackFields;
4083  struct {
4084  int16_t fLengthAndFlags; // bit fields: see constants above
4085  int32_t fLength; // number of characters in fArray if >127; else undefined
4086  int32_t fCapacity; // capacity of fArray (in char16_ts)
4087  // array pointer last to minimize padding for machines with P128 data model
4088  // or pointer sizes that are not a power of 2
4089  char16_t *fArray; // the Unicode data
4090  } fFields;
4091  } fUnion;
4092 };
4093 
4102 U_COMMON_API UnicodeString U_EXPORT2
4104 
4105 #ifndef U_HIDE_DRAFT_API
4116 template<typename S, typename = std::enable_if_t<ConvertibleToU16StringView<S>>>
4117 inline UnicodeString operator+(const UnicodeString &s1, const S &s2) {
4118  return unistr_internalConcat(s1, internal::toU16StringView(s2));
4119 }
4120 #endif // U_HIDE_DRAFT_API
4121 
4122 #ifndef U_FORCE_HIDE_INTERNAL_API
4124 U_COMMON_API UnicodeString U_EXPORT2
4125 unistr_internalConcat(const UnicodeString &s1, std::u16string_view s2);
4126 #endif
4127 
4128 //========================================
4129 // Inline members
4130 //========================================
4131 
4132 //========================================
4133 // Privates
4134 //========================================
4135 
4136 inline void
4137 UnicodeString::pinIndex(int32_t& start) const
4138 {
4139  // pin index
4140  if(start < 0) {
4141  start = 0;
4142  } else if(start > length()) {
4143  start = length();
4144  }
4145 }
4146 
4147 inline void
4148 UnicodeString::pinIndices(int32_t& start,
4149  int32_t& _length) const
4150 {
4151  // pin indices
4152  int32_t len = length();
4153  if(start < 0) {
4154  start = 0;
4155  } else if(start > len) {
4156  start = len;
4157  }
4158  if(_length < 0) {
4159  _length = 0;
4160  } else if(_length > (len - start)) {
4161  _length = (len - start);
4162  }
4163 }
4164 
4165 inline char16_t*
4166 UnicodeString::getArrayStart() {
4167  return (fUnion.fFields.fLengthAndFlags&kUsingStackBuffer) ?
4168  fUnion.fStackFields.fBuffer : fUnion.fFields.fArray;
4169 }
4170 
4171 inline const char16_t*
4172 UnicodeString::getArrayStart() const {
4173  return (fUnion.fFields.fLengthAndFlags&kUsingStackBuffer) ?
4174  fUnion.fStackFields.fBuffer : fUnion.fFields.fArray;
4175 }
4176 
4177 //========================================
4178 // Default constructor
4179 //========================================
4180 
4181 inline
4182 UnicodeString::UnicodeString() {
4183  fUnion.fStackFields.fLengthAndFlags=kShortString;
4184 }
4185 
4186 inline UnicodeString::UnicodeString(const std::nullptr_t /*text*/) {
4187  fUnion.fStackFields.fLengthAndFlags=kShortString;
4188 }
4189 
4190 inline UnicodeString::UnicodeString(const std::nullptr_t /*text*/, int32_t /*length*/) {
4191  fUnion.fStackFields.fLengthAndFlags=kShortString;
4192 }
4193 
4194 inline UnicodeString::UnicodeString(std::nullptr_t /*buffer*/, int32_t /*buffLength*/, int32_t /*buffCapacity*/) {
4195  fUnion.fStackFields.fLengthAndFlags=kShortString;
4196 }
4197 
4198 //========================================
4199 // Read-only implementation methods
4200 //========================================
4201 inline UBool
4202 UnicodeString::hasShortLength() const {
4203  return fUnion.fFields.fLengthAndFlags>=0;
4204 }
4205 
4206 inline int32_t
4207 UnicodeString::getShortLength() const {
4208  // fLengthAndFlags must be non-negative -> short length >= 0
4209  // and arithmetic or logical shift does not matter.
4210  return fUnion.fFields.fLengthAndFlags>>kLengthShift;
4211 }
4212 
4213 inline int32_t
4214 UnicodeString::length() const {
4215  return hasShortLength() ? getShortLength() : fUnion.fFields.fLength;
4216 }
4217 
4218 inline int32_t
4219 UnicodeString::getCapacity() const {
4220  return (fUnion.fFields.fLengthAndFlags&kUsingStackBuffer) ?
4221  US_STACKBUF_SIZE : fUnion.fFields.fCapacity;
4222 }
4223 
4224 inline int32_t
4225 UnicodeString::hashCode() const
4226 { return doHashCode(); }
4227 
4228 inline UBool
4229 UnicodeString::isBogus() const
4230 { return fUnion.fFields.fLengthAndFlags & kIsBogus; }
4231 
4232 inline UBool
4233 UnicodeString::isWritable() const
4234 { return !(fUnion.fFields.fLengthAndFlags & (kOpenGetBuffer | kIsBogus)); }
4235 
4236 inline UBool
4237 UnicodeString::isBufferWritable() const
4238 {
4239  return
4240  !(fUnion.fFields.fLengthAndFlags&(kOpenGetBuffer|kIsBogus|kBufferIsReadonly)) &&
4241  (!(fUnion.fFields.fLengthAndFlags&kRefCounted) || refCount()==1);
4242 }
4243 
4244 inline const char16_t *
4245 UnicodeString::getBuffer() const {
4246  if(fUnion.fFields.fLengthAndFlags&(kIsBogus|kOpenGetBuffer)) {
4247  return nullptr;
4248  } else if(fUnion.fFields.fLengthAndFlags&kUsingStackBuffer) {
4249  return fUnion.fStackFields.fBuffer;
4250  } else {
4251  return fUnion.fFields.fArray;
4252  }
4253 }
4254 
4255 //========================================
4256 // Read-only alias methods
4257 //========================================
4258 inline int8_t
4259 UnicodeString::doCompare(int32_t start,
4260  int32_t thisLength,
4261  const UnicodeString& srcText,
4262  int32_t srcStart,
4263  int32_t srcLength) const
4264 {
4265  if(srcText.isBogus()) {
4266  return static_cast<int8_t>(!isBogus()); // 0 if both are bogus, 1 otherwise
4267  } else {
4268  srcText.pinIndices(srcStart, srcLength);
4269  return doCompare(start, thisLength, srcText.getArrayStart(), srcStart, srcLength);
4270  }
4271 }
4272 
4273 inline UBool
4274 UnicodeString::doEqualsSubstring(int32_t start,
4275  int32_t thisLength,
4276  const UnicodeString& srcText,
4277  int32_t srcStart,
4278  int32_t srcLength) const
4279 {
4280  if(srcText.isBogus()) {
4281  return isBogus();
4282  } else {
4283  srcText.pinIndices(srcStart, srcLength);
4284  return !isBogus() && doEqualsSubstring(start, thisLength, srcText.getArrayStart(), srcStart, srcLength);
4285  }
4286 }
4287 
4288 inline bool
4290 {
4291  if(isBogus()) {
4292  return text.isBogus();
4293  } else {
4294  int32_t len = length(), textLength = text.length();
4295  return !text.isBogus() && len == textLength && doEquals(text, len);
4296  }
4297 }
4298 
4299 inline bool
4301 { return (! operator==(text)); }
4302 
4303 inline UBool
4304 UnicodeString::operator> (const UnicodeString& text) const
4305 { return doCompare(0, length(), text, 0, text.length()) == 1; }
4306 
4307 inline UBool
4308 UnicodeString::operator< (const UnicodeString& text) const
4309 { return doCompare(0, length(), text, 0, text.length()) == -1; }
4310 
4311 inline UBool
4312 UnicodeString::operator>= (const UnicodeString& text) const
4313 { return doCompare(0, length(), text, 0, text.length()) != -1; }
4314 
4315 inline UBool
4316 UnicodeString::operator<= (const UnicodeString& text) const
4317 { return doCompare(0, length(), text, 0, text.length()) != 1; }
4318 
4319 inline int8_t
4320 UnicodeString::compare(const UnicodeString& text) const
4321 { return doCompare(0, length(), text, 0, text.length()); }
4322 
4323 inline int8_t
4324 UnicodeString::compare(int32_t start,
4325  int32_t _length,
4326  const UnicodeString& srcText) const
4327 { return doCompare(start, _length, srcText, 0, srcText.length()); }
4328 
4329 inline int8_t
4330 UnicodeString::compare(ConstChar16Ptr srcChars,
4331  int32_t srcLength) const
4332 { return doCompare(0, length(), srcChars, 0, srcLength); }
4333 
4334 inline int8_t
4335 UnicodeString::compare(int32_t start,
4336  int32_t _length,
4337  const UnicodeString& srcText,
4338  int32_t srcStart,
4339  int32_t srcLength) const
4340 { return doCompare(start, _length, srcText, srcStart, srcLength); }
4341 
4342 inline int8_t
4343 UnicodeString::compare(int32_t start,
4344  int32_t _length,
4345  const char16_t *srcChars) const
4346 { return doCompare(start, _length, srcChars, 0, _length); }
4347 
4348 inline int8_t
4349 UnicodeString::compare(int32_t start,
4350  int32_t _length,
4351  const char16_t *srcChars,
4352  int32_t srcStart,
4353  int32_t srcLength) const
4354 { return doCompare(start, _length, srcChars, srcStart, srcLength); }
4355 
4356 inline int8_t
4357 UnicodeString::compareBetween(int32_t start,
4358  int32_t limit,
4359  const UnicodeString& srcText,
4360  int32_t srcStart,
4361  int32_t srcLimit) const
4362 { return doCompare(start, limit - start,
4363  srcText, srcStart, srcLimit - srcStart); }
4364 
4365 inline int8_t
4366 UnicodeString::doCompareCodePointOrder(int32_t start,
4367  int32_t thisLength,
4368  const UnicodeString& srcText,
4369  int32_t srcStart,
4370  int32_t srcLength) const
4371 {
4372  if(srcText.isBogus()) {
4373  return static_cast<int8_t>(!isBogus()); // 0 if both are bogus, 1 otherwise
4374  } else {
4375  srcText.pinIndices(srcStart, srcLength);
4376  return doCompareCodePointOrder(start, thisLength, srcText.getArrayStart(), srcStart, srcLength);
4377  }
4378 }
4379 
4380 inline int8_t
4381 UnicodeString::compareCodePointOrder(const UnicodeString& text) const
4382 { return doCompareCodePointOrder(0, length(), text, 0, text.length()); }
4383 
4384 inline int8_t
4385 UnicodeString::compareCodePointOrder(int32_t start,
4386  int32_t _length,
4387  const UnicodeString& srcText) const
4388 { return doCompareCodePointOrder(start, _length, srcText, 0, srcText.length()); }
4389 
4390 inline int8_t
4391 UnicodeString::compareCodePointOrder(ConstChar16Ptr srcChars,
4392  int32_t srcLength) const
4393 { return doCompareCodePointOrder(0, length(), srcChars, 0, srcLength); }
4394 
4395 inline int8_t
4396 UnicodeString::compareCodePointOrder(int32_t start,
4397  int32_t _length,
4398  const UnicodeString& srcText,
4399  int32_t srcStart,
4400  int32_t srcLength) const
4401 { return doCompareCodePointOrder(start, _length, srcText, srcStart, srcLength); }
4402 
4403 inline int8_t
4404 UnicodeString::compareCodePointOrder(int32_t start,
4405  int32_t _length,
4406  const char16_t *srcChars) const
4407 { return doCompareCodePointOrder(start, _length, srcChars, 0, _length); }
4408 
4409 inline int8_t
4410 UnicodeString::compareCodePointOrder(int32_t start,
4411  int32_t _length,
4412  const char16_t *srcChars,
4413  int32_t srcStart,
4414  int32_t srcLength) const
4415 { return doCompareCodePointOrder(start, _length, srcChars, srcStart, srcLength); }
4416 
4417 inline int8_t
4418 UnicodeString::compareCodePointOrderBetween(int32_t start,
4419  int32_t limit,
4420  const UnicodeString& srcText,
4421  int32_t srcStart,
4422  int32_t srcLimit) const
4423 { return doCompareCodePointOrder(start, limit - start,
4424  srcText, srcStart, srcLimit - srcStart); }
4425 
4426 inline int8_t
4427 UnicodeString::doCaseCompare(int32_t start,
4428  int32_t thisLength,
4429  const UnicodeString &srcText,
4430  int32_t srcStart,
4431  int32_t srcLength,
4432  uint32_t options) const
4433 {
4434  if(srcText.isBogus()) {
4435  return static_cast<int8_t>(!isBogus()); // 0 if both are bogus, 1 otherwise
4436  } else {
4437  srcText.pinIndices(srcStart, srcLength);
4438  return doCaseCompare(start, thisLength, srcText.getArrayStart(), srcStart, srcLength, options);
4439  }
4440 }
4441 
4442 inline int8_t
4443 UnicodeString::caseCompare(const UnicodeString &text, uint32_t options) const {
4444  return doCaseCompare(0, length(), text, 0, text.length(), options);
4445 }
4446 
4447 inline int8_t
4448 UnicodeString::caseCompare(int32_t start,
4449  int32_t _length,
4450  const UnicodeString &srcText,
4451  uint32_t options) const {
4452  return doCaseCompare(start, _length, srcText, 0, srcText.length(), options);
4453 }
4454 
4455 inline int8_t
4456 UnicodeString::caseCompare(ConstChar16Ptr srcChars,
4457  int32_t srcLength,
4458  uint32_t options) const {
4459  return doCaseCompare(0, length(), srcChars, 0, srcLength, options);
4460 }
4461 
4462 inline int8_t
4463 UnicodeString::caseCompare(int32_t start,
4464  int32_t _length,
4465  const UnicodeString &srcText,
4466  int32_t srcStart,
4467  int32_t srcLength,
4468  uint32_t options) const {
4469  return doCaseCompare(start, _length, srcText, srcStart, srcLength, options);
4470 }
4471 
4472 inline int8_t
4473 UnicodeString::caseCompare(int32_t start,
4474  int32_t _length,
4475  const char16_t *srcChars,
4476  uint32_t options) const {
4477  return doCaseCompare(start, _length, srcChars, 0, _length, options);
4478 }
4479 
4480 inline int8_t
4481 UnicodeString::caseCompare(int32_t start,
4482  int32_t _length,
4483  const char16_t *srcChars,
4484  int32_t srcStart,
4485  int32_t srcLength,
4486  uint32_t options) const {
4487  return doCaseCompare(start, _length, srcChars, srcStart, srcLength, options);
4488 }
4489 
4490 inline int8_t
4491 UnicodeString::caseCompareBetween(int32_t start,
4492  int32_t limit,
4493  const UnicodeString &srcText,
4494  int32_t srcStart,
4495  int32_t srcLimit,
4496  uint32_t options) const {
4497  return doCaseCompare(start, limit - start, srcText, srcStart, srcLimit - srcStart, options);
4498 }
4499 
4500 inline int32_t
4501 UnicodeString::indexOf(const UnicodeString& srcText,
4502  int32_t srcStart,
4503  int32_t srcLength,
4504  int32_t start,
4505  int32_t _length) const
4506 {
4507  if(!srcText.isBogus()) {
4508  srcText.pinIndices(srcStart, srcLength);
4509  if(srcLength > 0) {
4510  return indexOf(srcText.getArrayStart(), srcStart, srcLength, start, _length);
4511  }
4512  }
4513  return -1;
4514 }
4515 
4516 inline int32_t
4517 UnicodeString::indexOf(const UnicodeString& text) const
4518 { return indexOf(text, 0, text.length(), 0, length()); }
4519 
4520 inline int32_t
4521 UnicodeString::indexOf(const UnicodeString& text,
4522  int32_t start) const {
4523  pinIndex(start);
4524  return indexOf(text, 0, text.length(), start, length() - start);
4525 }
4526 
4527 inline int32_t
4528 UnicodeString::indexOf(const UnicodeString& text,
4529  int32_t start,
4530  int32_t _length) const
4531 { return indexOf(text, 0, text.length(), start, _length); }
4532 
4533 inline int32_t
4534 UnicodeString::indexOf(const char16_t *srcChars,
4535  int32_t srcLength,
4536  int32_t start) const {
4537  pinIndex(start);
4538  return indexOf(srcChars, 0, srcLength, start, length() - start);
4539 }
4540 
4541 inline int32_t
4542 UnicodeString::indexOf(ConstChar16Ptr srcChars,
4543  int32_t srcLength,
4544  int32_t start,
4545  int32_t _length) const
4546 { return indexOf(srcChars, 0, srcLength, start, _length); }
4547 
4548 inline int32_t
4549 UnicodeString::indexOf(char16_t c,
4550  int32_t start,
4551  int32_t _length) const
4552 { return doIndexOf(c, start, _length); }
4553 
4554 inline int32_t
4555 UnicodeString::indexOf(UChar32 c,
4556  int32_t start,
4557  int32_t _length) const
4558 { return doIndexOf(c, start, _length); }
4559 
4560 inline int32_t
4561 UnicodeString::indexOf(char16_t c) const
4562 { return doIndexOf(c, 0, length()); }
4563 
4564 inline int32_t
4565 UnicodeString::indexOf(UChar32 c) const
4566 { return indexOf(c, 0, length()); }
4567 
4568 inline int32_t
4569 UnicodeString::indexOf(char16_t c,
4570  int32_t start) const {
4571  pinIndex(start);
4572  return doIndexOf(c, start, length() - start);
4573 }
4574 
4575 inline int32_t
4576 UnicodeString::indexOf(UChar32 c,
4577  int32_t start) const {
4578  pinIndex(start);
4579  return indexOf(c, start, length() - start);
4580 }
4581 
4582 inline int32_t
4583 UnicodeString::lastIndexOf(ConstChar16Ptr srcChars,
4584  int32_t srcLength,
4585  int32_t start,
4586  int32_t _length) const
4587 { return lastIndexOf(srcChars, 0, srcLength, start, _length); }
4588 
4589 inline int32_t
4590 UnicodeString::lastIndexOf(const char16_t *srcChars,
4591  int32_t srcLength,
4592  int32_t start) const {
4593  pinIndex(start);
4594  return lastIndexOf(srcChars, 0, srcLength, start, length() - start);
4595 }
4596 
4597 inline int32_t
4598 UnicodeString::lastIndexOf(const UnicodeString& srcText,
4599  int32_t srcStart,
4600  int32_t srcLength,
4601  int32_t start,
4602  int32_t _length) const
4603 {
4604  if(!srcText.isBogus()) {
4605  srcText.pinIndices(srcStart, srcLength);
4606  if(srcLength > 0) {
4607  return lastIndexOf(srcText.getArrayStart(), srcStart, srcLength, start, _length);
4608  }
4609  }
4610  return -1;
4611 }
4612 
4613 inline int32_t
4614 UnicodeString::lastIndexOf(const UnicodeString& text,
4615  int32_t start,
4616  int32_t _length) const
4617 { return lastIndexOf(text, 0, text.length(), start, _length); }
4618 
4619 inline int32_t
4620 UnicodeString::lastIndexOf(const UnicodeString& text,
4621  int32_t start) const {
4622  pinIndex(start);
4623  return lastIndexOf(text, 0, text.length(), start, length() - start);
4624 }
4625 
4626 inline int32_t
4627 UnicodeString::lastIndexOf(const UnicodeString& text) const
4628 { return lastIndexOf(text, 0, text.length(), 0, length()); }
4629 
4630 inline int32_t
4631 UnicodeString::lastIndexOf(char16_t c,
4632  int32_t start,
4633  int32_t _length) const
4634 { return doLastIndexOf(c, start, _length); }
4635 
4636 inline int32_t
4637 UnicodeString::lastIndexOf(UChar32 c,
4638  int32_t start,
4639  int32_t _length) const {
4640  return doLastIndexOf(c, start, _length);
4641 }
4642 
4643 inline int32_t
4644 UnicodeString::lastIndexOf(char16_t c) const
4645 { return doLastIndexOf(c, 0, length()); }
4646 
4647 inline int32_t
4648 UnicodeString::lastIndexOf(UChar32 c) const {
4649  return lastIndexOf(c, 0, length());
4650 }
4651 
4652 inline int32_t
4653 UnicodeString::lastIndexOf(char16_t c,
4654  int32_t start) const {
4655  pinIndex(start);
4656  return doLastIndexOf(c, start, length() - start);
4657 }
4658 
4659 inline int32_t
4660 UnicodeString::lastIndexOf(UChar32 c,
4661  int32_t start) const {
4662  pinIndex(start);
4663  return lastIndexOf(c, start, length() - start);
4664 }
4665 
4666 inline UBool
4667 UnicodeString::startsWith(const UnicodeString& text) const
4668 { return doEqualsSubstring(0, text.length(), text, 0, text.length()); }
4669 
4670 inline UBool
4671 UnicodeString::startsWith(const UnicodeString& srcText,
4672  int32_t srcStart,
4673  int32_t srcLength) const
4674 { return doEqualsSubstring(0, srcLength, srcText, srcStart, srcLength); }
4675 
4676 inline UBool
4677 UnicodeString::startsWith(ConstChar16Ptr srcChars, int32_t srcLength) const {
4678  if(srcLength < 0) {
4679  srcLength = u_strlen(toUCharPtr(srcChars));
4680  }
4681  return doEqualsSubstring(0, srcLength, srcChars, 0, srcLength);
4682 }
4683 
4684 inline UBool
4685 UnicodeString::startsWith(const char16_t *srcChars, int32_t srcStart, int32_t srcLength) const {
4686  if(srcLength < 0) {
4687  srcLength = u_strlen(toUCharPtr(srcChars));
4688  }
4689  return doEqualsSubstring(0, srcLength, srcChars, srcStart, srcLength);
4690 }
4691 
4692 inline UBool
4693 UnicodeString::endsWith(const UnicodeString& text) const
4694 { return doEqualsSubstring(length() - text.length(), text.length(),
4695  text, 0, text.length()); }
4696 
4697 inline UBool
4698 UnicodeString::endsWith(const UnicodeString& srcText,
4699  int32_t srcStart,
4700  int32_t srcLength) const {
4701  srcText.pinIndices(srcStart, srcLength);
4702  return doEqualsSubstring(length() - srcLength, srcLength,
4703  srcText, srcStart, srcLength);
4704 }
4705 
4706 inline UBool
4707 UnicodeString::endsWith(ConstChar16Ptr srcChars,
4708  int32_t srcLength) const {
4709  if(srcLength < 0) {
4710  srcLength = u_strlen(toUCharPtr(srcChars));
4711  }
4712  return doEqualsSubstring(length() - srcLength, srcLength, srcChars, 0, srcLength);
4713 }
4714 
4715 inline UBool
4716 UnicodeString::endsWith(const char16_t *srcChars,
4717  int32_t srcStart,
4718  int32_t srcLength) const {
4719  if(srcLength < 0) {
4720  srcLength = u_strlen(toUCharPtr(srcChars + srcStart));
4721  }
4722  return doEqualsSubstring(length() - srcLength, srcLength,
4723  srcChars, srcStart, srcLength);
4724 }
4725 
4726 //========================================
4727 // replace
4728 //========================================
4729 inline UnicodeString&
4730 UnicodeString::replace(int32_t start,
4731  int32_t _length,
4732  const UnicodeString& srcText)
4733 { return doReplace(start, _length, srcText, 0, srcText.length()); }
4734 
4735 inline UnicodeString&
4736 UnicodeString::replace(int32_t start,
4737  int32_t _length,
4738  const UnicodeString& srcText,
4739  int32_t srcStart,
4740  int32_t srcLength)
4741 { return doReplace(start, _length, srcText, srcStart, srcLength); }
4742 
4743 inline UnicodeString&
4744 UnicodeString::replace(int32_t start,
4745  int32_t _length,
4746  ConstChar16Ptr srcChars,
4747  int32_t srcLength)
4748 { return doReplace(start, _length, srcChars, 0, srcLength); }
4749 
4750 inline UnicodeString&
4751 UnicodeString::replace(int32_t start,
4752  int32_t _length,
4753  const char16_t *srcChars,
4754  int32_t srcStart,
4755  int32_t srcLength)
4756 { return doReplace(start, _length, srcChars, srcStart, srcLength); }
4757 
4758 inline UnicodeString&
4759 UnicodeString::replace(int32_t start,
4760  int32_t _length,
4761  char16_t srcChar)
4762 { return doReplace(start, _length, &srcChar, 0, 1); }
4763 
4764 inline UnicodeString&
4765 UnicodeString::replaceBetween(int32_t start,
4766  int32_t limit,
4767  const UnicodeString& srcText)
4768 { return doReplace(start, limit - start, srcText, 0, srcText.length()); }
4769 
4770 inline UnicodeString&
4771 UnicodeString::replaceBetween(int32_t start,
4772  int32_t limit,
4773  const UnicodeString& srcText,
4774  int32_t srcStart,
4775  int32_t srcLimit)
4776 { return doReplace(start, limit - start, srcText, srcStart, srcLimit - srcStart); }
4777 
4778 inline UnicodeString&
4779 UnicodeString::findAndReplace(const UnicodeString& oldText,
4780  const UnicodeString& newText)
4781 { return findAndReplace(0, length(), oldText, 0, oldText.length(),
4782  newText, 0, newText.length()); }
4783 
4784 inline UnicodeString&
4785 UnicodeString::findAndReplace(int32_t start,
4786  int32_t _length,
4787  const UnicodeString& oldText,
4788  const UnicodeString& newText)
4789 { return findAndReplace(start, _length, oldText, 0, oldText.length(),
4790  newText, 0, newText.length()); }
4791 
4792 // ============================
4793 // extract
4794 // ============================
4795 inline void
4796 UnicodeString::doExtract(int32_t start,
4797  int32_t _length,
4798  UnicodeString& target) const
4799 { target.replace(0, target.length(), *this, start, _length); }
4800 
4801 inline void
4802 UnicodeString::extract(int32_t start,
4803  int32_t _length,
4804  Char16Ptr target,
4805  int32_t targetStart) const
4806 { doExtract(start, _length, target, targetStart); }
4807 
4808 inline void
4809 UnicodeString::extract(int32_t start,
4810  int32_t _length,
4811  UnicodeString& target) const
4812 { doExtract(start, _length, target); }
4813 
4814 #if !UCONFIG_NO_CONVERSION
4815 
4816 inline int32_t
4817 UnicodeString::extract(int32_t start,
4818  int32_t _length,
4819  char *dst,
4820  const char *codepage) const
4821 
4822 {
4823  // This dstSize value will be checked explicitly
4824  return extract(start, _length, dst, dst != nullptr ? 0xffffffff : 0, codepage);
4825 }
4826 
4827 #endif
4828 
4829 inline void
4830 UnicodeString::extractBetween(int32_t start,
4831  int32_t limit,
4832  char16_t *dst,
4833  int32_t dstStart) const {
4834  pinIndex(start);
4835  pinIndex(limit);
4836  doExtract(start, limit - start, dst, dstStart);
4837 }
4838 
4839 inline UnicodeString
4840 UnicodeString::tempSubStringBetween(int32_t start, int32_t limit) const {
4841  return tempSubString(start, limit - start);
4842 }
4843 
4844 inline char16_t
4845 UnicodeString::doCharAt(int32_t offset) const
4846 {
4847  if (static_cast<uint32_t>(offset) < static_cast<uint32_t>(length())) {
4848  return getArrayStart()[offset];
4849  } else {
4850  return kInvalidUChar;
4851  }
4852 }
4853 
4854 inline char16_t
4855 UnicodeString::charAt(int32_t offset) const
4856 { return doCharAt(offset); }
4857 
4858 inline char16_t
4859 UnicodeString::operator[] (int32_t offset) const
4860 { return doCharAt(offset); }
4861 
4862 inline UBool
4863 UnicodeString::isEmpty() const {
4864  // Arithmetic or logical right shift does not matter: only testing for 0.
4865  return (fUnion.fFields.fLengthAndFlags>>kLengthShift) == 0;
4866 }
4867 
4868 //========================================
4869 // Write implementation methods
4870 //========================================
4871 inline void
4872 UnicodeString::setZeroLength() {
4873  fUnion.fFields.fLengthAndFlags &= kAllStorageFlags;
4874 }
4875 
4876 inline void
4877 UnicodeString::setShortLength(int32_t len) {
4878  // requires 0 <= len <= kMaxShortLength
4879  fUnion.fFields.fLengthAndFlags =
4880  static_cast<int16_t>((fUnion.fFields.fLengthAndFlags & kAllStorageFlags) | (len << kLengthShift));
4881 }
4882 
4883 inline void
4884 UnicodeString::setLength(int32_t len) {
4885  if(len <= kMaxShortLength) {
4886  setShortLength(len);
4887  } else {
4888  fUnion.fFields.fLengthAndFlags |= kLengthIsLarge;
4889  fUnion.fFields.fLength = len;
4890  }
4891 }
4892 
4893 inline void
4894 UnicodeString::setToEmpty() {
4895  fUnion.fFields.fLengthAndFlags = kShortString;
4896 }
4897 
4898 inline void
4899 UnicodeString::setArray(char16_t *array, int32_t len, int32_t capacity) {
4900  setLength(len);
4901  fUnion.fFields.fArray = array;
4902  fUnion.fFields.fCapacity = capacity;
4903 }
4904 
4905 inline UnicodeString&
4906 UnicodeString::operator= (char16_t ch)
4907 { return doReplace(0, length(), &ch, 0, 1); }
4908 
4909 inline UnicodeString&
4910 UnicodeString::operator= (UChar32 ch)
4911 { return replace(0, length(), ch); }
4912 
4913 inline UnicodeString&
4914 UnicodeString::setTo(const UnicodeString& srcText,
4915  int32_t srcStart,
4916  int32_t srcLength)
4917 {
4918  unBogus();
4919  return doReplace(0, length(), srcText, srcStart, srcLength);
4920 }
4921 
4922 inline UnicodeString&
4923 UnicodeString::setTo(const UnicodeString& srcText,
4924  int32_t srcStart)
4925 {
4926  unBogus();
4927  srcText.pinIndex(srcStart);
4928  return doReplace(0, length(), srcText, srcStart, srcText.length() - srcStart);
4929 }
4930 
4931 inline UnicodeString&
4932 UnicodeString::setTo(const UnicodeString& srcText)
4933 {
4934  return copyFrom(srcText);
4935 }
4936 
4937 inline UnicodeString&
4938 UnicodeString::setTo(const char16_t *srcChars,
4939  int32_t srcLength)
4940 {
4941  unBogus();
4942  return doReplace(0, length(), srcChars, 0, srcLength);
4943 }
4944 
4945 inline UnicodeString&
4946 UnicodeString::setTo(char16_t srcChar)
4947 {
4948  unBogus();
4949  return doReplace(0, length(), &srcChar, 0, 1);
4950 }
4951 
4952 inline UnicodeString&
4953 UnicodeString::setTo(UChar32 srcChar)
4954 {
4955  unBogus();
4956  return replace(0, length(), srcChar);
4957 }
4958 
4959 inline UnicodeString&
4960 UnicodeString::append(const UnicodeString& srcText,
4961  int32_t srcStart,
4962  int32_t srcLength)
4963 { return doAppend(srcText, srcStart, srcLength); }
4964 
4965 inline UnicodeString&
4966 UnicodeString::append(const UnicodeString& srcText)
4967 { return doAppend(srcText, 0, srcText.length()); }
4968 
4969 inline UnicodeString&
4970 UnicodeString::append(const char16_t *srcChars,
4971  int32_t srcStart,
4972  int32_t srcLength)
4973 { return doAppend(srcChars, srcStart, srcLength); }
4974 
4975 inline UnicodeString&
4976 UnicodeString::append(ConstChar16Ptr srcChars,
4977  int32_t srcLength)
4978 { return doAppend(srcChars, 0, srcLength); }
4979 
4980 inline UnicodeString&
4981 UnicodeString::append(char16_t srcChar)
4982 { return doAppend(&srcChar, 0, 1); }
4983 
4984 inline UnicodeString&
4985 UnicodeString::operator+= (char16_t ch)
4986 { return doAppend(&ch, 0, 1); }
4987 
4988 inline UnicodeString&
4989 UnicodeString::operator+= (UChar32 ch) {
4990  return append(ch);
4991 }
4992 
4993 inline UnicodeString&
4994 UnicodeString::operator+= (const UnicodeString& srcText)
4995 { return doAppend(srcText, 0, srcText.length()); }
4996 
4997 inline UnicodeString&
4998 UnicodeString::insert(int32_t start,
4999  const UnicodeString& srcText,
5000  int32_t srcStart,
5001  int32_t srcLength)
5002 { return doReplace(start, 0, srcText, srcStart, srcLength); }
5003 
5004 inline UnicodeString&
5005 UnicodeString::insert(int32_t start,
5006  const UnicodeString& srcText)
5007 { return doReplace(start, 0, srcText, 0, srcText.length()); }
5008 
5009 inline UnicodeString&
5010 UnicodeString::insert(int32_t start,
5011  const char16_t *srcChars,
5012  int32_t srcStart,
5013  int32_t srcLength)
5014 { return doReplace(start, 0, srcChars, srcStart, srcLength); }
5015 
5016 inline UnicodeString&
5017 UnicodeString::insert(int32_t start,
5018  ConstChar16Ptr srcChars,
5019  int32_t srcLength)
5020 { return doReplace(start, 0, srcChars, 0, srcLength); }
5021 
5022 inline UnicodeString&
5023 UnicodeString::insert(int32_t start,
5024  char16_t srcChar)
5025 { return doReplace(start, 0, &srcChar, 0, 1); }
5026 
5027 inline UnicodeString&
5028 UnicodeString::insert(int32_t start,
5029  UChar32 srcChar)
5030 { return replace(start, 0, srcChar); }
5031 
5032 
5033 inline UnicodeString&
5034 UnicodeString::remove()
5035 {
5036  // remove() of a bogus string makes the string empty and non-bogus
5037  if(isBogus()) {
5038  setToEmpty();
5039  } else {
5040  setZeroLength();
5041  }
5042  return *this;
5043 }
5044 
5045 inline UnicodeString&
5046 UnicodeString::remove(int32_t start,
5047  int32_t _length)
5048 {
5049  if(start <= 0 && _length == INT32_MAX) {
5050  // remove(guaranteed everything) of a bogus string makes the string empty and non-bogus
5051  return remove();
5052  }
5053  return doReplace(start, _length, nullptr, 0, 0);
5054 }
5055 
5056 inline UnicodeString&
5057 UnicodeString::removeBetween(int32_t start,
5058  int32_t limit)
5059 { return doReplace(start, limit - start, nullptr, 0, 0); }
5060 
5061 inline UnicodeString &
5062 UnicodeString::retainBetween(int32_t start, int32_t limit) {
5063  truncate(limit);
5064  return doReplace(0, start, nullptr, 0, 0);
5065 }
5066 
5067 inline UBool
5068 UnicodeString::truncate(int32_t targetLength)
5069 {
5070  if(isBogus() && targetLength == 0) {
5071  // truncate(0) of a bogus string makes the string empty and non-bogus
5072  unBogus();
5073  return false;
5074  } else if (static_cast<uint32_t>(targetLength) < static_cast<uint32_t>(length())) {
5075  setLength(targetLength);
5076  return true;
5077  } else {
5078  return false;
5079  }
5080 }
5081 
5082 inline UnicodeString&
5083 UnicodeString::reverse()
5084 { return doReverse(0, length()); }
5085 
5086 inline UnicodeString&
5087 UnicodeString::reverse(int32_t start,
5088  int32_t _length)
5089 { return doReverse(start, _length); }
5090 
5091 U_NAMESPACE_END
5092 
5093 #endif /* U_SHOW_CPLUSPLUS_API */
5094 
5095 #endif
C++ API: Interface for writing bytes, and implementation classes.
C++ API: char16_t pointer wrappers with implicit conversion from bit-compatible raw pointer types.
#define U_ALIASING_BARRIER(ptr)
Barrier for pointer anti-aliasing optimizations even across function boundaries.
Definition: char16ptr.h:37
The BreakIterator class implements methods for finding the location of boundaries in text.
Definition: brkiter.h:108
A ByteSink can be filled with bytes.
Definition: bytestream.h:53
char16_t * wrapper with implicit conversion from distinct but bit-compatible pointer types.
Definition: char16ptr.h:49
const char16_t * wrapper with implicit conversion from distinct but bit-compatible pointer types.
Definition: char16ptr.h:156
Records lengths of string edits but not replacement text.
Definition: edits.h:80
A Locale object represents a specific geographical, political, or cultural region.
Definition: locid.h:195
Replaceable is an abstract base class representing a string of characters that supports the replaceme...
Definition: rep.h:77
virtual void extractBetween(int32_t start, int32_t limit, UnicodeString &target) const =0
Copies characters in the range [start, limit) into the UnicodeString target.
char16_t charAt(int32_t offset) const
Returns the 16-bit code unit at the given offset into the text.
Definition: rep.h:251
int32_t length() const
Returns the number of 16-bit code units in the text.
Definition: rep.h:246
Implementation of ByteSink that writes to a "string".
Definition: bytestream.h:267
A string-like object that points to a sized piece of memory.
Definition: stringpiece.h:61
UnicodeString is a string class that stores Unicode characters directly and provides similar function...
Definition: unistr.h:296
int32_t indexOf(const char16_t *srcChars, int32_t srcStart, int32_t srcLength, int32_t start, int32_t length) const
Locate in this the first occurrence in the range [start, start + length) of the characters in srcChar...
UnicodeString(const UnicodeString &that)
Copy constructor.
void swap(UnicodeString &other) noexcept
Swap strings.
virtual char16_t getCharAt(int32_t offset) const override
The change in Replaceable to use virtual getCharAt() allows UnicodeString::charAt() to be inline agai...
bool operator==(const S &text) const
Equality operator.
Definition: unistr.h:347
virtual int32_t getLength() const override
Implement Replaceable::getLength() (see jitterbug 1027).
UnicodeString & foldCase(uint32_t options=0)
Case-folds the characters in this string.
UChar32 unescapeAt(int32_t &offset) const
Unescape a single escape sequence and return the represented character.
UnicodeString(const wchar_t *text, int32_t textLength)
wchar_t * constructor.
Definition: unistr.h:3247
virtual void handleReplaceBetween(int32_t start, int32_t limit, const UnicodeString &text) override
Replace a substring of this object with the given text.
UBool hasMoreChar32Than(int32_t start, int32_t length, int32_t number) const
Check if the length char16_t code units of the string contain more Unicode code points than a certain...
UnicodeString(const UnicodeString &src, int32_t srcStart, int32_t srcLength)
'Substring' constructor from subrange of source string.
UnicodeString & operator=(const S &src)
Assignment operator.
Definition: unistr.h:1960
UnicodeString & append(const S &src)
Appends the characters in src which is, or which is implicitly convertible to, a std::u16string_view ...
Definition: unistr.h:2300
UnicodeString & operator=(UnicodeString &&src) noexcept
Move assignment operator; might leave src in bogus state.
virtual ~UnicodeString()
Destructor.
UnicodeString(const char *codepageData, int32_t dataLength, const char *codepage)
char* constructor.
UnicodeString & toLower()
Convert the characters in this to lower case following the conventions of the default locale.
UnicodeString(const char *codepageData, const char *codepage)
char* constructor.
UnicodeString(UBool isTerminated, ConstChar16Ptr text, int32_t textLength)
Readonly-aliasing char16_t* constructor.
static UnicodeString readOnlyAlias(const UnicodeString &text)
Readonly-aliasing factory method.
Definition: unistr.h:3623
UnicodeString & fastCopyFrom(const UnicodeString &src)
Almost the same as the assignment operator.
UnicodeString & toTitle(BreakIterator *titleIter)
Titlecase this string, convenience function using the default locale.
EInvariant
Constant to be used in the UnicodeString(char *, int32_t, EInvariant) constructor which constructs a ...
Definition: unistr.h:307
bool operator!=(const S &text) const
Inequality operator.
Definition: unistr.h:382
UnicodeString unescape() const
Unescape a string of characters and return a string containing the result.
UnicodeString(const UnicodeString &src, int32_t srcStart)
'Substring' constructor from tail of source string.
int32_t getChar32Limit(int32_t offset) const
Adjust a random-access offset so that it points behind a Unicode character.
UnicodeString(char16_t *buffer, int32_t buffLength, int32_t buffCapacity)
Writable-aliasing char16_t* constructor.
UnicodeString(int32_t capacity, UChar32 c, int32_t count)
Construct a UnicodeString with capacity to hold capacity char16_ts.
int32_t extract(int32_t start, int32_t startLength, char *target, uint32_t targetLength, const char *codepage) const
Copy the characters in the range [start, start + length) into an array of characters in a specified c...
UnicodeString & findAndReplace(int32_t start, int32_t length, const UnicodeString &oldText, int32_t oldStart, int32_t oldLength, const UnicodeString &newText, int32_t newStart, int32_t newLength)
Replace all occurrences of characters in oldText in the range [oldStart, oldStart + oldLength) with t...
virtual void copy(int32_t start, int32_t limit, int32_t dest) override
Copy a substring of this object, retaining attribute (out-of-band) information.
virtual void extractBetween(int32_t start, int32_t limit, UnicodeString &target) const override
Copy the characters in the range [start, limit) into the UnicodeString target.
UnicodeString & replace(int32_t start, int32_t length, const UnicodeString &srcText, int32_t srcStart, int32_t srcLength)
Replace the characters in the range [start, start + length) with the characters in srcText in the ran...
Definition: unistr.h:4736
UBool padLeading(int32_t targetLength, char16_t padChar=0x0020)
Pad the start of this UnicodeString with the character padChar.
int32_t getChar32Start(int32_t offset) const
Adjust a random-access offset so that it points to the beginning of a Unicode character.
UChar32 char32At(int32_t offset) const
Return the code point that contains the code unit at offset offset.
UnicodeString(const char *src, int32_t textLength, enum EInvariant inv)
Constructs a Unicode string from an invariant-character char * string.
UnicodeString & operator=(const UnicodeString &srcText)
Assignment operator.
UnicodeString & append(UChar32 srcChar)
Append the code point srcChar to the UnicodeString object.
StringClass & toUTF8String(StringClass &result) const
Convert the UnicodeString to UTF-8 and append the result to a standard string.
Definition: unistr.h:1777
UnicodeString & toLower(const Locale &locale)
Convert the characters in this to lower case following the conventions of a specific locale.
UnicodeString & toTitle(BreakIterator *titleIter, const Locale &locale)
Titlecase this string.
UnicodeString tempSubString(int32_t start=0, int32_t length=INT32_MAX) const
Create a temporary substring for the specified range.
int32_t extract(Char16Ptr dest, int32_t destCapacity, UErrorCode &errorCode) const
Copy the contents of the string into dest.
int32_t length() const
Return the length of the UnicodeString object.
Definition: unistr.h:4214
virtual UChar32 getChar32At(int32_t offset) const override
The change in Replaceable to use virtual getChar32At() allows UnicodeString::char32At() to be inline ...
int32_t extract(int32_t start, int32_t startLength, char *target, uint32_t targetLength) const
Copy the characters in the range [start, start + length) into an array of characters in the platform'...
static UnicodeString fromUTF8(StringPiece utf8)
Create a UnicodeString from a UTF-8 string.
UNISTR_FROM_CHAR_EXPLICIT UnicodeString(char16_t ch)
Single char16_t (code unit) constructor.
int32_t lastIndexOf(const char16_t *srcChars, int32_t srcStart, int32_t srcLength, int32_t start, int32_t length) const
Locate in this the last occurrence in the range [start, start + length) of the characters in srcChars...
UnicodeString(wchar_t *buffer, int32_t buffLength, int32_t buffCapacity)
Writable-aliasing wchar_t * constructor.
Definition: unistr.h:3357
void setToBogus()
Make this UnicodeString object invalid.
friend void swap(UnicodeString &s1, UnicodeString &s2) noexcept
Non-member UnicodeString swap function.
Definition: unistr.h:1990
int32_t moveIndex32(int32_t index, int32_t delta) const
Move the code unit index along the string by delta code points.
static UnicodeString fromUTF32(const UChar32 *utf32, int32_t length)
Create a UnicodeString from a UTF-32 string.
UNISTR_FROM_STRING_EXPLICIT UnicodeString(const S &text)
Constructor from text which is, or which is implicitly convertible to, a std::u16string_view or (if U...
Definition: unistr.h:3274
int32_t countChar32(int32_t start=0, int32_t length=INT32_MAX) const
Count Unicode code points in the length char16_t code units of the string.
UNISTR_FROM_CHAR_EXPLICIT UnicodeString(UChar32 ch)
Single UChar32 (code point) constructor.
UnicodeString & setTo(UBool isTerminated, ConstChar16Ptr text, int32_t textLength)
Aliasing setTo() function, analogous to the readonly-aliasing char16_t* constructor.
static UnicodeString readOnlyAlias(const S &text)
Readonly-aliasing factory method.
Definition: unistr.h:3600
UnicodeString & operator+=(const S &src)
Append operator.
Definition: unistr.h:2227
UnicodeString & toTitle(BreakIterator *titleIter, const Locale &locale, uint32_t options)
Titlecase this string, with options.
virtual UnicodeString * clone() const override
Clone this object, an instance of a subclass of Replaceable.
int32_t extract(char *dest, int32_t destCapacity, UConverter *cnv, UErrorCode &errorCode) const
Convert the UnicodeString into a codepage string using an existing UConverter.
UnicodeString(UnicodeString &&src) noexcept
Move constructor; might leave src in bogus state.
UnicodeString(const char16_t *text, int32_t textLength)
char16_t* constructor.
UnicodeString & replace(int32_t start, int32_t length, UChar32 srcChar)
Replace the characters in the range [start, start + length) with the code point srcChar.
UnicodeString(const char *src, int32_t srcLength, UConverter *cnv, UErrorCode &errorCode)
char * / UConverter constructor.
UnicodeString(const char *codepageData, int32_t dataLength)
char* constructor.
static UClassID getStaticClassID()
ICU "poor man's RTTI", returns a UClassID for this class.
UnicodeString(uint16_t *buffer, int32_t buffLength, int32_t buffCapacity)
Writable-aliasing uint16_t * constructor.
Definition: unistr.h:3343
int32_t toUTF32(UChar32 *utf32, int32_t capacity, UErrorCode &errorCode) const
Convert the UnicodeString to UTF-32.
UBool isBogus() const
Determine if this object contains a valid string.
Definition: unistr.h:4229
const char16_t * getTerminatedBuffer()
Get a read-only pointer to the internal buffer, making sure that it is NUL-terminated.
UnicodeString(const uint16_t *text, int32_t textLength)
uint16_t * constructor.
Definition: unistr.h:3225
virtual UClassID getDynamicClassID() const override
ICU "poor man's RTTI", returns a UClassID for the actual class.
int32_t extract(int32_t start, int32_t startLength, char *target, int32_t targetCapacity, enum EInvariant inv) const
Copy the characters in the range [start, start + startLength) into an array of characters.
UnicodeString & setCharAt(int32_t offset, char16_t ch)
Set the character at the specified offset to the specified character.
UnicodeString & setTo(char16_t *buffer, int32_t buffLength, int32_t buffCapacity)
Aliasing setTo() function, analogous to the writable-aliasing char16_t* constructor.
UBool padTrailing(int32_t targetLength, char16_t padChar=0x0020)
Pad the end of this UnicodeString with the character padChar.
UNISTR_FROM_STRING_EXPLICIT UnicodeString(const char *codepageData)
char* constructor.
void releaseBuffer(int32_t newLength=-1)
Release a read/write buffer on a UnicodeString object with an "open" getBuffer(minCapacity).
char16_t * getBuffer(int32_t minCapacity)
Get a read/write pointer to the internal buffer.
void toUTF8(ByteSink &sink) const
Convert the UnicodeString to UTF-8 and write the result to a ByteSink.
virtual UBool hasMetaData() const override
Replaceable API.
UnicodeString & toUpper()
Convert the characters in this to UPPER CASE following the conventions of the default locale.
UnicodeString & toUpper(const Locale &locale)
Convert the characters in this to UPPER CASE following the conventions of a specific locale.
UnicodeString & trim()
Trims leading and trailing whitespace from this UnicodeString.
U_CAPI int32_t u_strlen(const UChar *s)
U_EXPORT UBool operator==(const StringPiece &x, const StringPiece &y)
Global operator == for StringPiece.
U_COMMON_API UnicodeString unistr_internalConcat(const UnicodeString &s1, std::u16string_view s2)
bool operator!=(const StringPiece &x, const StringPiece &y)
Global operator != for StringPiece.
Definition: stringpiece.h:346
UnicodeString operator+(const UnicodeString &s1, const S &s2)
Creates a new UnicodeString from the concatenation of a UnicodeString and s2 which is,...
Definition: unistr.h:4117
#define U_CALLCONV
Similar to U_CDECL_BEGIN/U_CDECL_END, this qualifier is necessary in callback function typedefs to ma...
Definition: platform.h:846
C++ API: Replaceable String.
C++ API: Central ICU header for including the C++ standard <string> header and for related definition...
C++ API: StringPiece: Read-only byte string wrapper class.
struct UConverter UConverter
Definition: ucnv_err.h:96
#define UCONFIG_NO_BREAK_ITERATION
This switch turns off break iteration.
Definition: uconfig.h:358
int32_t UChar32
Define UChar32 as a type for single Unicode code points.
Definition: umachine.h:427
#define INT32_MAX
The largest value a 32 bit signed integer can hold.
Definition: umachine.h:186
int8_t UBool
The ICU boolean type, a signed-byte integer.
Definition: umachine.h:247
#define U_CAPI
This is used to declare a function as a public ICU C API.
Definition: umachine.h:110
char16_t UChar
The base type for UTF-16 code units and pointers.
Definition: umachine.h:378
#define U_SIZEOF_UCHAR
Number of bytes in a UChar (always 2).
Definition: umachine.h:330
#define UNISTR_FROM_CHAR_EXPLICIT
This can be defined to be empty or "explicit".
Definition: unistr.h:150
int32_t UStringCaseMapper(int32_t caseLocale, uint32_t options, icu::BreakIterator *iter, char16_t *dest, int32_t destCapacity, const char16_t *src, int32_t srcLength, icu::Edits *edits, UErrorCode &errorCode)
Internal string case mapping function type.
Definition: unistr.h:71
#define UNISTR_FROM_STRING_EXPLICIT
This can be defined to be empty or "explicit".
Definition: unistr.h:170
#define UNISTR_OBJECT_SIZE
Desired sizeof(UnicodeString) in bytes.
Definition: unistr.h:208
void * UClassID
UClassID is used to identify classes without using the compiler's RTTI.
Definition: uobject.h:96
Basic definitions for ICU, for both C and C++ APIs.
UErrorCode
Standard ICU4C error code type, a substitute for exceptions.
Definition: utypes.h:430
#define U_COMMON_API
Set to export library symbols from inside the common library, and to import them from outside.
Definition: utypes.h:315