ICU 76.1 76.1
Loading...
Searching...
No Matches
unistr.h
Go to the documentation of this file.
1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
3/*
4**********************************************************************
5* Copyright (C) 1998-2016, International Business Machines
6* Corporation and others. All Rights Reserved.
7**********************************************************************
8*
9* File unistr.h
10*
11* Modification History:
12*
13* Date Name Description
14* 09/25/98 stephen Creation.
15* 11/11/98 stephen Changed per 11/9 code review.
16* 04/20/99 stephen Overhauled per 4/16 code review.
17* 11/18/99 aliu Made to inherit from Replaceable. Added method
18* handleReplaceBetween(); other methods unchanged.
19* 06/25/01 grhoten Remove dependency on iostream.
20******************************************************************************
21*/
22
23#ifndef UNISTR_H
24#define UNISTR_H
25
31#include "unicode/utypes.h"
32
33#if U_SHOW_CPLUSPLUS_API
34
35#include <cstddef>
36#include <string_view>
37#include "unicode/char16ptr.h"
38#include "unicode/rep.h"
39#include "unicode/std_string.h"
40#include "unicode/stringpiece.h"
41#include "unicode/bytestream.h"
42
43struct UConverter; // unicode/ucnv.h
44
45#ifndef USTRING_H
51U_CAPI int32_t U_EXPORT2 u_strlen(const UChar *s);
52#endif
53
54U_NAMESPACE_BEGIN
55
56#if !UCONFIG_NO_BREAK_ITERATION
57class BreakIterator; // unicode/brkiter.h
58#endif
59class Edits;
60
61U_NAMESPACE_END
62
63// Not #ifndef U_HIDE_INTERNAL_API because UnicodeString needs the UStringCaseMapper.
70typedef int32_t U_CALLCONV
71UStringCaseMapper(int32_t caseLocale, uint32_t options,
74#endif
75 char16_t *dest, int32_t destCapacity,
76 const char16_t *src, int32_t srcLength,
77 icu::Edits *edits,
78 UErrorCode &errorCode);
79
80U_NAMESPACE_BEGIN
81
82class Locale; // unicode/locid.h
83class StringCharacterIterator;
84class UnicodeStringAppendable; // unicode/appendable.h
85
86/* The <iostream> include has been moved to unicode/ustream.h */
87
98#define US_INV icu::UnicodeString::kInvariant
99
120#if !U_CHAR16_IS_TYPEDEF
121# define UNICODE_STRING(cs, _length) icu::UnicodeString(true, u ## cs, _length)
122#else
123# define UNICODE_STRING(cs, _length) icu::UnicodeString(true, (const char16_t*)u ## cs, _length)
124#endif
125
135#define UNICODE_STRING_SIMPLE(cs) UNICODE_STRING(cs, -1)
136
144#ifndef UNISTR_FROM_CHAR_EXPLICIT
145# if defined(U_COMBINED_IMPLEMENTATION) || defined(U_COMMON_IMPLEMENTATION) || defined(U_I18N_IMPLEMENTATION) || defined(U_IO_IMPLEMENTATION)
146 // Auto-"explicit" in ICU library code.
147# define UNISTR_FROM_CHAR_EXPLICIT explicit
148# else
149 // Empty by default for source code compatibility.
150# define UNISTR_FROM_CHAR_EXPLICIT
151# endif
152#endif
153
164#ifndef UNISTR_FROM_STRING_EXPLICIT
165# if defined(U_COMBINED_IMPLEMENTATION) || defined(U_COMMON_IMPLEMENTATION) || defined(U_I18N_IMPLEMENTATION) || defined(U_IO_IMPLEMENTATION)
166 // Auto-"explicit" in ICU library code.
167# define UNISTR_FROM_STRING_EXPLICIT explicit
168# else
169 // Empty by default for source code compatibility.
170# define UNISTR_FROM_STRING_EXPLICIT
171# endif
172#endif
173
207#ifndef UNISTR_OBJECT_SIZE
208# define UNISTR_OBJECT_SIZE 64
209#endif
210
296{
297public:
298
312 kInvariant
313 };
314
315 //========================================
316 // Read-only operations
317 //========================================
318
319 /* Comparison - bitwise only - for international comparison use collation */
320
328 inline bool operator== (const UnicodeString& text) const;
329
330#ifndef U_HIDE_DRAFT_API
346 template<typename S, typename = std::enable_if_t<ConvertibleToU16StringView<S>>>
347 inline bool operator==(const S &text) const {
348 std::u16string_view sv(internal::toU16StringView(text));
349 uint32_t len; // unsigned to avoid a compiler warning
350 return !isBogus() && (len = length()) == sv.length() && doEquals(sv.data(), len);
351 }
352#endif // U_HIDE_DRAFT_API
353
361 inline bool operator!= (const UnicodeString& text) const;
362
363#ifndef U_HIDE_DRAFT_API
381 template<typename S, typename = std::enable_if_t<ConvertibleToU16StringView<S>>>
382 inline bool operator!=(const S &text) const {
383 return !operator==(text);
384 }
385#endif // U_HIDE_DRAFT_API
386
394 inline UBool operator> (const UnicodeString& text) const;
395
403 inline UBool operator< (const UnicodeString& text) const;
404
412 inline UBool operator>= (const UnicodeString& text) const;
413
421 inline UBool operator<= (const UnicodeString& text) const;
422
434 inline int8_t compare(const UnicodeString& text) const;
435
451 inline int8_t compare(int32_t start,
452 int32_t length,
453 const UnicodeString& text) const;
454
472 inline int8_t compare(int32_t start,
473 int32_t length,
474 const UnicodeString& srcText,
476 int32_t srcLength) const;
477
490 inline int8_t compare(ConstChar16Ptr srcChars,
491 int32_t srcLength) const;
492
507 inline int8_t compare(int32_t start,
508 int32_t length,
509 const char16_t *srcChars) const;
510
528 inline int8_t compare(int32_t start,
529 int32_t length,
530 const char16_t *srcChars,
532 int32_t srcLength) const;
533
551 inline int8_t compareBetween(int32_t start,
552 int32_t limit,
553 const UnicodeString& srcText,
555 int32_t srcLimit) const;
556
574 inline int8_t compareCodePointOrder(const UnicodeString& text) const;
575
595 inline int8_t compareCodePointOrder(int32_t start,
596 int32_t length,
597 const UnicodeString& srcText) const;
598
620 inline int8_t compareCodePointOrder(int32_t start,
621 int32_t length,
622 const UnicodeString& srcText,
624 int32_t srcLength) const;
625
644 inline int8_t compareCodePointOrder(ConstChar16Ptr srcChars,
645 int32_t srcLength) const;
646
666 inline int8_t compareCodePointOrder(int32_t start,
667 int32_t length,
668 const char16_t *srcChars) const;
669
691 inline int8_t compareCodePointOrder(int32_t start,
692 int32_t length,
693 const char16_t *srcChars,
695 int32_t srcLength) const;
696
718 inline int8_t compareCodePointOrderBetween(int32_t start,
719 int32_t limit,
720 const UnicodeString& srcText,
722 int32_t srcLimit) const;
723
742 inline int8_t caseCompare(const UnicodeString& text, uint32_t options) const;
743
764 inline int8_t caseCompare(int32_t start,
765 int32_t length,
766 const UnicodeString& srcText,
767 uint32_t options) const;
768
791 inline int8_t caseCompare(int32_t start,
792 int32_t length,
793 const UnicodeString& srcText,
796 uint32_t options) const;
797
817 inline int8_t caseCompare(ConstChar16Ptr srcChars,
819 uint32_t options) const;
820
841 inline int8_t caseCompare(int32_t start,
842 int32_t length,
843 const char16_t *srcChars,
844 uint32_t options) const;
845
868 inline int8_t caseCompare(int32_t start,
869 int32_t length,
870 const char16_t *srcChars,
873 uint32_t options) const;
874
897 inline int8_t caseCompareBetween(int32_t start,
898 int32_t limit,
899 const UnicodeString& srcText,
902 uint32_t options) const;
903
911 inline UBool startsWith(const UnicodeString& text) const;
912
923 inline UBool startsWith(const UnicodeString& srcText,
925 int32_t srcLength) const;
926
935 inline UBool startsWith(ConstChar16Ptr srcChars,
936 int32_t srcLength) const;
937
947 inline UBool startsWith(const char16_t *srcChars,
949 int32_t srcLength) const;
950
958 inline UBool endsWith(const UnicodeString& text) const;
959
970 inline UBool endsWith(const UnicodeString& srcText,
972 int32_t srcLength) const;
973
982 inline UBool endsWith(ConstChar16Ptr srcChars,
983 int32_t srcLength) const;
984
995 inline UBool endsWith(const char16_t *srcChars,
997 int32_t srcLength) const;
998
999
1000 /* Searching - bitwise only */
1001
1010 inline int32_t indexOf(const UnicodeString& text) const;
1011
1021 inline int32_t indexOf(const UnicodeString& text,
1022 int32_t start) const;
1023
1035 inline int32_t indexOf(const UnicodeString& text,
1036 int32_t start,
1037 int32_t length) const;
1038
1055 inline int32_t indexOf(const UnicodeString& srcText,
1058 int32_t start,
1059 int32_t length) const;
1060
1072 inline int32_t indexOf(const char16_t *srcChars,
1074 int32_t start) const;
1075
1088 inline int32_t indexOf(ConstChar16Ptr srcChars,
1090 int32_t start,
1091 int32_t length) const;
1092
1109 int32_t indexOf(const char16_t *srcChars,
1112 int32_t start,
1113 int32_t length) const;
1114
1122 inline int32_t indexOf(char16_t c) const;
1123
1132 inline int32_t indexOf(UChar32 c) const;
1133
1142 inline int32_t indexOf(char16_t c,
1143 int32_t start) const;
1144
1154 inline int32_t indexOf(UChar32 c,
1155 int32_t start) const;
1156
1167 inline int32_t indexOf(char16_t c,
1168 int32_t start,
1169 int32_t length) const;
1170
1182 inline int32_t indexOf(UChar32 c,
1183 int32_t start,
1184 int32_t length) const;
1185
1194 inline int32_t lastIndexOf(const UnicodeString& text) const;
1195
1205 inline int32_t lastIndexOf(const UnicodeString& text,
1206 int32_t start) const;
1207
1219 inline int32_t lastIndexOf(const UnicodeString& text,
1220 int32_t start,
1221 int32_t length) const;
1222
1239 inline int32_t lastIndexOf(const UnicodeString& srcText,
1242 int32_t start,
1243 int32_t length) const;
1244
1255 inline int32_t lastIndexOf(const char16_t *srcChars,
1257 int32_t start) const;
1258
1271 inline int32_t lastIndexOf(ConstChar16Ptr srcChars,
1273 int32_t start,
1274 int32_t length) const;
1275
1295 int32_t start,
1296 int32_t length) const;
1297
1305 inline int32_t lastIndexOf(char16_t c) const;
1306
1315 inline int32_t lastIndexOf(UChar32 c) const;
1316
1325 inline int32_t lastIndexOf(char16_t c,
1326 int32_t start) const;
1327
1337 inline int32_t lastIndexOf(UChar32 c,
1338 int32_t start) const;
1339
1350 inline int32_t lastIndexOf(char16_t c,
1351 int32_t start,
1352 int32_t length) const;
1353
1365 inline int32_t lastIndexOf(UChar32 c,
1366 int32_t start,
1367 int32_t length) const;
1368
1369
1370 /* Character access */
1371
1380 inline char16_t charAt(int32_t offset) const;
1381
1389 inline char16_t operator[] (int32_t offset) const;
1390
1402 UChar32 char32At(int32_t offset) const;
1403
1420
1438
1489 int32_t moveIndex32(int32_t index, int32_t delta) const;
1490
1491 /* Substring extraction */
1492
1508 inline void extract(int32_t start,
1509 int32_t length,
1510 Char16Ptr dst,
1511 int32_t dstStart = 0) const;
1512
1534 int32_t
1536 UErrorCode &errorCode) const;
1537
1547 inline void extract(int32_t start,
1548 int32_t length,
1549 UnicodeString& target) const;
1550
1562 inline void extractBetween(int32_t start,
1563 int32_t limit,
1564 char16_t *dst,
1565 int32_t dstStart = 0) const;
1566
1575 virtual void extractBetween(int32_t start,
1576 int32_t limit,
1577 UnicodeString& target) const override;
1578
1602 char *target,
1604 enum EInvariant inv) const;
1605
1606#if U_CHARSET_IS_UTF8 || !UCONFIG_NO_CONVERSION
1607
1629 char *target,
1630 uint32_t targetLength) const;
1631
1632#endif
1633
1634#if !UCONFIG_NO_CONVERSION
1635
1661 inline int32_t extract(int32_t start,
1663 char* target,
1664 const char* codepage = nullptr) const;
1665
1697 char *target,
1699 const char *codepage) const;
1700
1719 UConverter *cnv,
1720 UErrorCode &errorCode) const;
1721
1722#endif
1723
1738
1749 inline UnicodeString tempSubStringBetween(int32_t start, int32_t limit=INT32_MAX) const;
1750
1762 void toUTF8(ByteSink &sink) const;
1763
1776 template<typename StringClass>
1779 toUTF8(sbs);
1780 return result;
1781 }
1782
1798 int32_t toUTF32(UChar32 *utf32, int32_t capacity, UErrorCode &errorCode) const;
1799
1800 /* Length operations */
1801
1810 inline int32_t length() const;
1811
1825 int32_t
1826 countChar32(int32_t start=0, int32_t length=INT32_MAX) const;
1827
1851 UBool
1852 hasMoreChar32Than(int32_t start, int32_t length, int32_t number) const;
1853
1859 inline UBool isEmpty() const;
1860
1870 inline int32_t getCapacity() const;
1871
1872 /* Other operations */
1873
1879 inline int32_t hashCode() const;
1880
1893 inline UBool isBogus() const;
1894
1895 //========================================
1896 // Write operations
1897 //========================================
1898
1899 /* Assignment operations */
1900
1920
1947
1948#ifndef U_HIDE_DRAFT_API
1959 template<typename S, typename = std::enable_if_t<ConvertibleToU16StringView<S>>>
1960 inline UnicodeString &operator=(const S &src) {
1961 unBogus();
1962 return doReplace(0, length(), internal::toU16StringView(src));
1963 }
1964#endif // U_HIDE_DRAFT_API
1965
1975
1981 void swap(UnicodeString &other) noexcept;
1982
1989 friend inline void U_EXPORT2
1991 s1.swap(s2);
1992 }
1993
2001 inline UnicodeString& operator= (char16_t ch);
2002
2011
2023 inline UnicodeString& setTo(const UnicodeString& srcText,
2025
2039 inline UnicodeString& setTo(const UnicodeString& srcText,
2042
2051 inline UnicodeString& setTo(const UnicodeString& srcText);
2052
2061 inline UnicodeString& setTo(const char16_t *srcChars,
2063
2072 inline UnicodeString& setTo(char16_t srcChar);
2073
2082 inline UnicodeString& setTo(UChar32 srcChar);
2083
2108 ConstChar16Ptr text,
2109 int32_t textLength);
2110
2130 UnicodeString &setTo(char16_t *buffer,
2133
2174
2183 char16_t ch);
2184
2185
2186 /* Append operations */
2187
2195 inline UnicodeString& operator+= (char16_t ch);
2196
2205
2214
2215#ifndef U_HIDE_DRAFT_API
2226 template<typename S, typename = std::enable_if_t<ConvertibleToU16StringView<S>>>
2227 inline UnicodeString& operator+=(const S &src) {
2228 return doAppend(internal::toU16StringView(src));
2229 }
2230#endif // U_HIDE_DRAFT_API
2231
2246 inline UnicodeString& append(const UnicodeString& srcText,
2249
2257 inline UnicodeString& append(const UnicodeString& srcText);
2258
2272 inline UnicodeString& append(const char16_t *srcChars,
2275
2285 inline UnicodeString& append(ConstChar16Ptr srcChars,
2287
2288#ifndef U_HIDE_DRAFT_API
2299 template<typename S, typename = std::enable_if_t<ConvertibleToU16StringView<S>>>
2300 inline UnicodeString& append(const S &src) {
2301 return doAppend(internal::toU16StringView(src));
2302 }
2303#endif // U_HIDE_DRAFT_API
2304
2311 inline UnicodeString& append(char16_t srcChar);
2312
2320
2321
2322 /* Insert operations */
2323
2337 inline UnicodeString& insert(int32_t start,
2338 const UnicodeString& srcText,
2341
2350 inline UnicodeString& insert(int32_t start,
2351 const UnicodeString& srcText);
2352
2366 inline UnicodeString& insert(int32_t start,
2367 const char16_t *srcChars,
2370
2380 inline UnicodeString& insert(int32_t start,
2383
2392 inline UnicodeString& insert(int32_t start,
2393 char16_t srcChar);
2394
2403 inline UnicodeString& insert(int32_t start,
2405
2406
2407 /* Replace operations */
2408
2426 inline UnicodeString& replace(int32_t start,
2427 int32_t length,
2428 const UnicodeString& srcText,
2431
2444 inline UnicodeString& replace(int32_t start,
2445 int32_t length,
2446 const UnicodeString& srcText);
2447
2465 inline UnicodeString& replace(int32_t start,
2466 int32_t length,
2467 const char16_t *srcChars,
2470
2483 inline UnicodeString& replace(int32_t start,
2484 int32_t length,
2487
2499 inline UnicodeString& replace(int32_t start,
2500 int32_t length,
2501 char16_t srcChar);
2502
2515
2525 inline UnicodeString& replaceBetween(int32_t start,
2526 int32_t limit,
2527 const UnicodeString& srcText);
2528
2543 inline UnicodeString& replaceBetween(int32_t start,
2544 int32_t limit,
2545 const UnicodeString& srcText,
2548
2556 virtual void handleReplaceBetween(int32_t start,
2557 int32_t limit,
2558 const UnicodeString& text) override;
2559
2565 virtual UBool hasMetaData() const override;
2566
2580 virtual void copy(int32_t start, int32_t limit, int32_t dest) override;
2581
2582 /* Search and replace operations */
2583
2592 inline UnicodeString& findAndReplace(const UnicodeString& oldText,
2593 const UnicodeString& newText);
2594
2606 inline UnicodeString& findAndReplace(int32_t start,
2607 int32_t length,
2608 const UnicodeString& oldText,
2609 const UnicodeString& newText);
2610
2629 int32_t length,
2630 const UnicodeString& oldText,
2632 int32_t oldLength,
2633 const UnicodeString& newText,
2635 int32_t newLength);
2636
2637
2638 /* Remove operations */
2639
2648 inline UnicodeString& remove();
2649
2658 inline UnicodeString& remove(int32_t start,
2659 int32_t length = static_cast<int32_t>(INT32_MAX));
2660
2669 inline UnicodeString& removeBetween(int32_t start,
2670 int32_t limit = static_cast<int32_t>(INT32_MAX));
2671
2681 inline UnicodeString &retainBetween(int32_t start, int32_t limit = INT32_MAX);
2682
2683 /* Length operations */
2684
2697 char16_t padChar = 0x0020);
2698
2711 char16_t padChar = 0x0020);
2712
2719 inline UBool truncate(int32_t targetLength);
2720
2727
2728 /* Miscellaneous operations */
2729
2735 inline UnicodeString& reverse();
2736
2745 inline UnicodeString& reverse(int32_t start,
2746 int32_t length);
2747
2755
2764
2772
2781
2782#if !UCONFIG_NO_BREAK_ITERATION
2783
2811
2840
2872
2873#endif
2874
2888 UnicodeString &foldCase(uint32_t options=0 /*U_FOLD_CASE_DEFAULT*/);
2889
2890 //========================================
2891 // Access to the internal buffer
2892 //========================================
2893
2938
2959 void releaseBuffer(int32_t newLength=-1);
2960
2991 inline const char16_t *getBuffer() const;
2992
3026 const char16_t *getTerminatedBuffer();
3027
3028#ifndef U_HIDE_DRAFT_API
3035 inline operator std::u16string_view() const {
3036 return {getBuffer(), static_cast<std::u16string_view::size_type>(length())};
3037 }
3038
3039#if U_SIZEOF_WCHAR_T==2 || defined(U_IN_DOXYGEN)
3049 inline operator std::wstring_view() const {
3050 const char16_t *p = getBuffer();
3051#ifdef U_ALIASING_BARRIER
3053#endif
3054 return { reinterpret_cast<const wchar_t *>(p), (std::wstring_view::size_type)length() };
3055 }
3056#endif // U_SIZEOF_WCHAR_T
3057#endif // U_HIDE_DRAFT_API
3058
3059 //========================================
3060 // Constructors
3061 //========================================
3062
3066 inline UnicodeString();
3067
3080
3091
3102
3103#ifdef U_HIDE_DRAFT_API
3123 UNISTR_FROM_STRING_EXPLICIT UnicodeString(const char16_t *text) :
3124 UnicodeString(text, -1) {}
3125#endif // U_HIDE_DRAFT_API
3126
3127#if !U_CHAR16_IS_TYPEDEF && \
3128 (defined(U_HIDE_DRAFT_API) || (defined(_LIBCPP_VERSION) && _LIBCPP_VERSION >= 180000))
3149 UnicodeString(ConstChar16Ptr(text), -1) {}
3150#endif
3151
3152#if defined(U_HIDE_DRAFT_API) && (U_SIZEOF_WCHAR_T==2 || defined(U_IN_DOXYGEN))
3173 UNISTR_FROM_STRING_EXPLICIT UnicodeString(const wchar_t *text) :
3174 UnicodeString(ConstChar16Ptr(text), -1) {}
3175#endif
3176
3187 UNISTR_FROM_STRING_EXPLICIT inline UnicodeString(const std::nullptr_t text);
3188
3205 UnicodeString(const char16_t *text,
3206 int32_t textLength);
3207
3208#if !U_CHAR16_IS_TYPEDEF
3225 UnicodeString(const uint16_t *text, int32_t textLength) :
3226 UnicodeString(ConstChar16Ptr(text), textLength) {}
3227#endif
3228
3229#if U_SIZEOF_WCHAR_T==2 || defined(U_IN_DOXYGEN)
3247 UnicodeString(const wchar_t *text, int32_t textLength) :
3248 UnicodeString(ConstChar16Ptr(text), textLength) {}
3249#endif
3250
3258 inline UnicodeString(const std::nullptr_t text, int32_t textLength);
3259
3260#ifndef U_HIDE_DRAFT_API
3273 template<typename S, typename = std::enable_if_t<ConvertibleToU16StringView<S>>>
3275 fUnion.fFields.fLengthAndFlags = kShortString;
3276 doAppend(internal::toU16StringViewNullable(text));
3277 }
3278#endif // U_HIDE_DRAFT_API
3279
3311 ConstChar16Ptr text,
3312 int32_t textLength);
3313
3333
3334#if !U_CHAR16_IS_TYPEDEF
3345#endif
3346
3347#if U_SIZEOF_WCHAR_T==2 || defined(U_IN_DOXYGEN)
3359#endif
3360
3369 inline UnicodeString(std::nullptr_t buffer, int32_t buffLength, int32_t buffCapacity);
3370
3371#if U_CHARSET_IS_UTF8 || !UCONFIG_NO_CONVERSION
3372
3397
3407
3408#endif
3409
3410#if !UCONFIG_NO_CONVERSION
3411
3429 UnicodeString(const char *codepageData, const char *codepage);
3430
3449
3472 const char *src, int32_t srcLength,
3473 UConverter *cnv,
3474 UErrorCode &errorCode);
3475
3476#endif
3477
3510 UnicodeString(const char *src, int32_t textLength, enum EInvariant inv);
3511
3512
3530
3538
3546
3555
3569 virtual UnicodeString *clone() const override;
3570
3575
3576#ifndef U_HIDE_DRAFT_API
3599 template<typename S, typename = std::enable_if_t<ConvertibleToU16StringView<S>>>
3600 static inline UnicodeString readOnlyAlias(const S &text) {
3601 return readOnlyAliasFromU16StringView(internal::toU16StringView(text));
3602 }
3603
3623 static inline UnicodeString readOnlyAlias(const UnicodeString &text) {
3624 return readOnlyAliasFromUnicodeString(text);
3625 }
3626#endif // U_HIDE_DRAFT_API
3627
3642
3655
3656 /* Miscellaneous operations */
3657
3693
3714
3721
3727 virtual UClassID getDynamicClassID() const override;
3728
3729 //========================================
3730 // Implementation methods
3731 //========================================
3732
3733protected:
3738 virtual int32_t getLength() const override;
3739
3745 virtual char16_t getCharAt(int32_t offset) const override;
3746
3752 virtual UChar32 getChar32At(int32_t offset) const override;
3753
3754private:
3755 static UnicodeString readOnlyAliasFromU16StringView(std::u16string_view text);
3756 static UnicodeString readOnlyAliasFromUnicodeString(const UnicodeString &text);
3757
3758 // For char* constructors. Could be made public.
3759 UnicodeString &setToUTF8(StringPiece utf8);
3760 // For extract(char*).
3761 // We could make a toUTF8(target, capacity, errorCode) public but not
3762 // this version: New API will be cleaner if we make callers create substrings
3763 // rather than having start+length on every method,
3764 // and it should take a UErrorCode&.
3765 int32_t
3766 toUTF8(int32_t start, int32_t len,
3767 char *target, int32_t capacity) const;
3768
3773 inline UBool doEquals(const UnicodeString &text, int32_t len) const {
3774 return doEquals(text.getArrayStart(), len);
3775 }
3776 UBool doEquals(const char16_t *text, int32_t len) const;
3777
3778 inline UBool
3779 doEqualsSubstring(int32_t start,
3780 int32_t length,
3781 const UnicodeString& srcText,
3783 int32_t srcLength) const;
3784
3785 UBool doEqualsSubstring(int32_t start,
3786 int32_t length,
3787 const char16_t *srcChars,
3789 int32_t srcLength) const;
3790
3791 inline int8_t
3792 doCompare(int32_t start,
3793 int32_t length,
3794 const UnicodeString& srcText,
3796 int32_t srcLength) const;
3797
3798 int8_t doCompare(int32_t start,
3799 int32_t length,
3800 const char16_t *srcChars,
3802 int32_t srcLength) const;
3803
3804 inline int8_t
3805 doCompareCodePointOrder(int32_t start,
3806 int32_t length,
3807 const UnicodeString& srcText,
3809 int32_t srcLength) const;
3810
3811 int8_t doCompareCodePointOrder(int32_t start,
3812 int32_t length,
3813 const char16_t *srcChars,
3815 int32_t srcLength) const;
3816
3817 inline int8_t
3818 doCaseCompare(int32_t start,
3819 int32_t length,
3820 const UnicodeString &srcText,
3823 uint32_t options) const;
3824
3825 int8_t
3826 doCaseCompare(int32_t start,
3827 int32_t length,
3828 const char16_t *srcChars,
3831 uint32_t options) const;
3832
3833 int32_t doIndexOf(char16_t c,
3834 int32_t start,
3835 int32_t length) const;
3836
3837 int32_t doIndexOf(UChar32 c,
3838 int32_t start,
3839 int32_t length) const;
3840
3841 int32_t doLastIndexOf(char16_t c,
3842 int32_t start,
3843 int32_t length) const;
3844
3845 int32_t doLastIndexOf(UChar32 c,
3846 int32_t start,
3847 int32_t length) const;
3848
3849 void doExtract(int32_t start,
3850 int32_t length,
3851 char16_t *dst,
3852 int32_t dstStart) const;
3853
3854 inline void doExtract(int32_t start,
3855 int32_t length,
3856 UnicodeString& target) const;
3857
3858 inline char16_t doCharAt(int32_t offset) const;
3859
3860 UnicodeString& doReplace(int32_t start,
3861 int32_t length,
3862 const UnicodeString& srcText,
3865
3866 UnicodeString& doReplace(int32_t start,
3867 int32_t length,
3868 const char16_t *srcChars,
3871 UnicodeString& doReplace(int32_t start, int32_t length, std::u16string_view src);
3872
3874 UnicodeString& doAppend(const char16_t *srcChars, int32_t srcStart, int32_t srcLength);
3875 UnicodeString& doAppend(std::u16string_view src);
3876
3877 UnicodeString& doReverse(int32_t start,
3878 int32_t length);
3879
3880 // calculate hash code
3881 int32_t doHashCode() const;
3882
3883 // get pointer to start of array
3884 // these do not check for kOpenGetBuffer, unlike the public getBuffer() function
3885 inline char16_t* getArrayStart();
3886 inline const char16_t* getArrayStart() const;
3887
3888 inline UBool hasShortLength() const;
3889 inline int32_t getShortLength() const;
3890
3891 // A UnicodeString object (not necessarily its current buffer)
3892 // is writable unless it isBogus() or it has an "open" getBuffer(minCapacity).
3893 inline UBool isWritable() const;
3894
3895 // Is the current buffer writable?
3896 inline UBool isBufferWritable() const;
3897
3898 // None of the following does releaseArray().
3899 inline void setZeroLength();
3900 inline void setShortLength(int32_t len);
3901 inline void setLength(int32_t len);
3902 inline void setToEmpty();
3903 inline void setArray(char16_t *array, int32_t len, int32_t capacity); // sets length but not flags
3904
3905 // allocate the array; result may be the stack buffer
3906 // sets refCount to 1 if appropriate
3907 // sets fArray, fCapacity, and flags
3908 // sets length to 0
3909 // returns boolean for success or failure
3910 UBool allocate(int32_t capacity);
3911
3912 // release the array if owned
3913 void releaseArray();
3914
3915 // turn a bogus string into an empty one
3916 void unBogus();
3917
3918 // implements assignment operator, copy constructor, and fastCopyFrom()
3919 UnicodeString &copyFrom(const UnicodeString &src, UBool fastCopy=false);
3920
3921 // Copies just the fields without memory management.
3922 void copyFieldsFrom(UnicodeString &src, UBool setSrcToBogus) noexcept;
3923
3924 // Pin start and limit to acceptable values.
3925 inline void pinIndex(int32_t& start) const;
3926 inline void pinIndices(int32_t& start,
3927 int32_t& length) const;
3928
3929#if !UCONFIG_NO_CONVERSION
3930
3931 /* Internal extract() using UConverter. */
3932 int32_t doExtract(int32_t start, int32_t length,
3933 char *dest, int32_t destCapacity,
3934 UConverter *cnv,
3935 UErrorCode &errorCode) const;
3936
3937 /*
3938 * Real constructor for converting from codepage data.
3939 * It assumes that it is called with !fRefCounted.
3940 *
3941 * If `codepage==0`, then the default converter
3942 * is used for the platform encoding.
3943 * If `codepage` is an empty string (`""`),
3944 * then a simple conversion is performed on the codepage-invariant
3945 * subset ("invariant characters") of the platform encoding. See utypes.h.
3946 */
3947 void doCodepageCreate(const char *codepageData,
3949 const char *codepage);
3950
3951 /*
3952 * Worker function for creating a UnicodeString from
3953 * a codepage string using a UConverter.
3954 */
3955 void
3956 doCodepageCreate(const char *codepageData,
3958 UConverter *converter,
3960
3961#endif
3962
3963 /*
3964 * This function is called when write access to the array
3965 * is necessary.
3966 *
3967 * We need to make a copy of the array if
3968 * the buffer is read-only, or
3969 * the buffer is refCounted (shared), and refCount>1, or
3970 * the buffer is too small.
3971 *
3972 * Return false if memory could not be allocated.
3973 */
3974 UBool cloneArrayIfNeeded(int32_t newCapacity = -1,
3975 int32_t growCapacity = -1,
3976 UBool doCopyArray = true,
3977 int32_t** pBufferToDelete = nullptr,
3978 UBool forceClone = false);
3979
3986 caseMap(int32_t caseLocale, uint32_t options,
3989#endif
3991
3992 // ref counting
3993 void addRef();
3994 int32_t removeRef();
3995 int32_t refCount() const;
3996
3997 // constants
3998 enum {
4004 US_STACKBUF_SIZE = static_cast<int32_t>(UNISTR_OBJECT_SIZE - sizeof(void*) - 2) / U_SIZEOF_UCHAR,
4005 kInvalidUChar=0xffff, // U+FFFF returned by charAt(invalid index)
4006 kInvalidHashCode=0, // invalid hash code
4007 kEmptyHashCode=1, // hash code for empty string
4008
4009 // bit flag values for fLengthAndFlags
4010 kIsBogus=1, // this string is bogus, i.e., not valid or nullptr
4011 kUsingStackBuffer=2,// using fUnion.fStackFields instead of fUnion.fFields
4012 kRefCounted=4, // there is a refCount field before the characters in fArray
4013 kBufferIsReadonly=8,// do not write to this buffer
4014 kOpenGetBuffer=16, // getBuffer(minCapacity) was called (is "open"),
4015 // and releaseBuffer(newLength) must be called
4016 kAllStorageFlags=0x1f,
4017
4018 kLengthShift=5, // remaining 11 bits for non-negative short length, or negative if long
4019 kLength1=1<<kLengthShift,
4020 kMaxShortLength=0x3ff, // max non-negative short length (leaves top bit 0)
4021 kLengthIsLarge=0xffe0, // short length < 0, real length is in fUnion.fFields.fLength
4022
4023 // combined values for convenience
4024 kShortString=kUsingStackBuffer,
4025 kLongString=kRefCounted,
4026 kReadonlyAlias=kBufferIsReadonly,
4027 kWritableAlias=0
4028 };
4029
4030 friend class UnicodeStringAppendable;
4031
4032 union StackBufferOrFields; // forward declaration necessary before friend declaration
4033 friend union StackBufferOrFields; // make US_STACKBUF_SIZE visible inside fUnion
4034
4035 /*
4036 * The following are all the class fields that are stored
4037 * in each UnicodeString object.
4038 * Note that UnicodeString has virtual functions,
4039 * therefore there is an implicit vtable pointer
4040 * as the first real field.
4041 * The fields should be aligned such that no padding is necessary.
4042 * On 32-bit machines, the size should be 32 bytes,
4043 * on 64-bit machines (8-byte pointers), it should be 40 bytes.
4044 *
4045 * We use a hack to achieve this.
4046 *
4047 * With at least some compilers, each of the following is forced to
4048 * a multiple of sizeof(pointer) [the largest field base unit here is a data pointer],
4049 * rounded up with additional padding if the fields do not already fit that requirement:
4050 * - sizeof(class UnicodeString)
4051 * - offsetof(UnicodeString, fUnion)
4052 * - sizeof(fUnion)
4053 * - sizeof(fStackFields)
4054 *
4055 * We optimize for the longest possible internal buffer for short strings.
4056 * fUnion.fStackFields begins with 2 bytes for storage flags
4057 * and the length of relatively short strings,
4058 * followed by the buffer for short string contents.
4059 * There is no padding inside fStackFields.
4060 *
4061 * Heap-allocated and aliased strings use fUnion.fFields.
4062 * Both fStackFields and fFields must begin with the same fields for flags and short length,
4063 * that is, those must have the same memory offsets inside the object,
4064 * because the flags must be inspected in order to decide which half of fUnion is being used.
4065 * We assume that the compiler does not reorder the fields.
4066 *
4067 * (Padding at the end of fFields is ok:
4068 * As long as it is no larger than fStackFields, it is not wasted space.)
4069 *
4070 * For some of the history of the UnicodeString class fields layout, see
4071 * - ICU ticket #11551 "longer UnicodeString contents in stack buffer"
4072 * - ICU ticket #11336 "UnicodeString: recombine stack buffer arrays"
4073 * - ICU ticket #8322 "why is sizeof(UnicodeString)==48?"
4074 */
4075 // (implicit) *vtable;
4076 union StackBufferOrFields {
4077 // fStackFields is used iff (fLengthAndFlags&kUsingStackBuffer) else fFields is used.
4078 // Each struct of the union must begin with fLengthAndFlags.
4079 struct {
4080 int16_t fLengthAndFlags; // bit fields: see constants above
4081 char16_t fBuffer[US_STACKBUF_SIZE]; // buffer for short strings
4082 } fStackFields;
4083 struct {
4084 int16_t fLengthAndFlags; // bit fields: see constants above
4085 int32_t fLength; // number of characters in fArray if >127; else undefined
4086 int32_t fCapacity; // capacity of fArray (in char16_ts)
4087 // array pointer last to minimize padding for machines with P128 data model
4088 // or pointer sizes that are not a power of 2
4089 char16_t *fArray; // the Unicode data
4090 } fFields;
4091 } fUnion;
4092};
4093
4102U_COMMON_API UnicodeString U_EXPORT2
4104
4105#ifndef U_HIDE_DRAFT_API
4116template<typename S, typename = std::enable_if_t<ConvertibleToU16StringView<S>>>
4117inline UnicodeString operator+(const UnicodeString &s1, const S &s2) {
4118 return unistr_internalConcat(s1, internal::toU16StringView(s2));
4119}
4120#endif // U_HIDE_DRAFT_API
4121
4122#ifndef U_FORCE_HIDE_INTERNAL_API
4124U_COMMON_API UnicodeString U_EXPORT2
4125unistr_internalConcat(const UnicodeString &s1, std::u16string_view s2);
4126#endif
4127
4128//========================================
4129// Inline members
4130//========================================
4131
4132//========================================
4133// Privates
4134//========================================
4135
4136inline void
4137UnicodeString::pinIndex(int32_t& start) const
4138{
4139 // pin index
4140 if(start < 0) {
4141 start = 0;
4142 } else if(start > length()) {
4143 start = length();
4144 }
4145}
4146
4147inline void
4148UnicodeString::pinIndices(int32_t& start,
4149 int32_t& _length) const
4150{
4151 // pin indices
4152 int32_t len = length();
4153 if(start < 0) {
4154 start = 0;
4155 } else if(start > len) {
4156 start = len;
4157 }
4158 if(_length < 0) {
4159 _length = 0;
4160 } else if(_length > (len - start)) {
4161 _length = (len - start);
4162 }
4163}
4164
4165inline char16_t*
4166UnicodeString::getArrayStart() {
4167 return (fUnion.fFields.fLengthAndFlags&kUsingStackBuffer) ?
4168 fUnion.fStackFields.fBuffer : fUnion.fFields.fArray;
4169}
4170
4171inline const char16_t*
4172UnicodeString::getArrayStart() const {
4173 return (fUnion.fFields.fLengthAndFlags&kUsingStackBuffer) ?
4174 fUnion.fStackFields.fBuffer : fUnion.fFields.fArray;
4175}
4176
4177//========================================
4178// Default constructor
4179//========================================
4180
4181inline
4182UnicodeString::UnicodeString() {
4183 fUnion.fStackFields.fLengthAndFlags=kShortString;
4184}
4185
4186inline UnicodeString::UnicodeString(const std::nullptr_t /*text*/) {
4187 fUnion.fStackFields.fLengthAndFlags=kShortString;
4188}
4189
4190inline UnicodeString::UnicodeString(const std::nullptr_t /*text*/, int32_t /*length*/) {
4191 fUnion.fStackFields.fLengthAndFlags=kShortString;
4192}
4193
4194inline UnicodeString::UnicodeString(std::nullptr_t /*buffer*/, int32_t /*buffLength*/, int32_t /*buffCapacity*/) {
4195 fUnion.fStackFields.fLengthAndFlags=kShortString;
4196}
4197
4198//========================================
4199// Read-only implementation methods
4200//========================================
4201inline UBool
4202UnicodeString::hasShortLength() const {
4203 return fUnion.fFields.fLengthAndFlags>=0;
4204}
4205
4206inline int32_t
4207UnicodeString::getShortLength() const {
4208 // fLengthAndFlags must be non-negative -> short length >= 0
4209 // and arithmetic or logical shift does not matter.
4210 return fUnion.fFields.fLengthAndFlags>>kLengthShift;
4211}
4212
4213inline int32_t
4214UnicodeString::length() const {
4215 return hasShortLength() ? getShortLength() : fUnion.fFields.fLength;
4216}
4217
4218inline int32_t
4219UnicodeString::getCapacity() const {
4220 return (fUnion.fFields.fLengthAndFlags&kUsingStackBuffer) ?
4221 US_STACKBUF_SIZE : fUnion.fFields.fCapacity;
4222}
4223
4224inline int32_t
4225UnicodeString::hashCode() const
4226{ return doHashCode(); }
4227
4228inline UBool
4229UnicodeString::isBogus() const
4230{ return fUnion.fFields.fLengthAndFlags & kIsBogus; }
4231
4232inline UBool
4233UnicodeString::isWritable() const
4234{ return !(fUnion.fFields.fLengthAndFlags & (kOpenGetBuffer | kIsBogus)); }
4235
4236inline UBool
4237UnicodeString::isBufferWritable() const
4238{
4239 return
4240 !(fUnion.fFields.fLengthAndFlags&(kOpenGetBuffer|kIsBogus|kBufferIsReadonly)) &&
4241 (!(fUnion.fFields.fLengthAndFlags&kRefCounted) || refCount()==1);
4242}
4243
4244inline const char16_t *
4245UnicodeString::getBuffer() const {
4246 if(fUnion.fFields.fLengthAndFlags&(kIsBogus|kOpenGetBuffer)) {
4247 return nullptr;
4248 } else if(fUnion.fFields.fLengthAndFlags&kUsingStackBuffer) {
4249 return fUnion.fStackFields.fBuffer;
4250 } else {
4251 return fUnion.fFields.fArray;
4252 }
4253}
4254
4255//========================================
4256// Read-only alias methods
4257//========================================
4258inline int8_t
4259UnicodeString::doCompare(int32_t start,
4261 const UnicodeString& srcText,
4263 int32_t srcLength) const
4264{
4265 if(srcText.isBogus()) {
4266 return static_cast<int8_t>(!isBogus()); // 0 if both are bogus, 1 otherwise
4267 } else {
4268 srcText.pinIndices(srcStart, srcLength);
4269 return doCompare(start, thisLength, srcText.getArrayStart(), srcStart, srcLength);
4270 }
4271}
4272
4273inline UBool
4274UnicodeString::doEqualsSubstring(int32_t start,
4275 int32_t thisLength,
4276 const UnicodeString& srcText,
4277 int32_t srcStart,
4278 int32_t srcLength) const
4279{
4280 if(srcText.isBogus()) {
4281 return isBogus();
4282 } else {
4283 srcText.pinIndices(srcStart, srcLength);
4284 return !isBogus() && doEqualsSubstring(start, thisLength, srcText.getArrayStart(), srcStart, srcLength);
4285 }
4286}
4287
4288inline bool
4289UnicodeString::operator== (const UnicodeString& text) const
4290{
4291 if(isBogus()) {
4292 return text.isBogus();
4293 } else {
4294 int32_t len = length(), textLength = text.length();
4295 return !text.isBogus() && len == textLength && doEquals(text, len);
4296 }
4297}
4298
4299inline bool
4300UnicodeString::operator!= (const UnicodeString& text) const
4301{ return (! operator==(text)); }
4302
4303inline UBool
4304UnicodeString::operator> (const UnicodeString& text) const
4305{ return doCompare(0, length(), text, 0, text.length()) == 1; }
4306
4307inline UBool
4308UnicodeString::operator< (const UnicodeString& text) const
4309{ return doCompare(0, length(), text, 0, text.length()) == -1; }
4310
4311inline UBool
4312UnicodeString::operator>= (const UnicodeString& text) const
4313{ return doCompare(0, length(), text, 0, text.length()) != -1; }
4314
4315inline UBool
4316UnicodeString::operator<= (const UnicodeString& text) const
4317{ return doCompare(0, length(), text, 0, text.length()) != 1; }
4318
4319inline int8_t
4320UnicodeString::compare(const UnicodeString& text) const
4321{ return doCompare(0, length(), text, 0, text.length()); }
4322
4323inline int8_t
4324UnicodeString::compare(int32_t start,
4326 const UnicodeString& srcText) const
4327{ return doCompare(start, _length, srcText, 0, srcText.length()); }
4328
4329inline int8_t
4330UnicodeString::compare(ConstChar16Ptr srcChars,
4331 int32_t srcLength) const
4332{ return doCompare(0, length(), srcChars, 0, srcLength); }
4333
4334inline int8_t
4335UnicodeString::compare(int32_t start,
4337 const UnicodeString& srcText,
4339 int32_t srcLength) const
4340{ return doCompare(start, _length, srcText, srcStart, srcLength); }
4341
4342inline int8_t
4343UnicodeString::compare(int32_t start,
4345 const char16_t *srcChars) const
4346{ return doCompare(start, _length, srcChars, 0, _length); }
4347
4348inline int8_t
4349UnicodeString::compare(int32_t start,
4351 const char16_t *srcChars,
4353 int32_t srcLength) const
4354{ return doCompare(start, _length, srcChars, srcStart, srcLength); }
4355
4356inline int8_t
4357UnicodeString::compareBetween(int32_t start,
4358 int32_t limit,
4359 const UnicodeString& srcText,
4361 int32_t srcLimit) const
4362{ return doCompare(start, limit - start,
4364
4365inline int8_t
4366UnicodeString::doCompareCodePointOrder(int32_t start,
4368 const UnicodeString& srcText,
4370 int32_t srcLength) const
4371{
4372 if(srcText.isBogus()) {
4373 return static_cast<int8_t>(!isBogus()); // 0 if both are bogus, 1 otherwise
4374 } else {
4375 srcText.pinIndices(srcStart, srcLength);
4376 return doCompareCodePointOrder(start, thisLength, srcText.getArrayStart(), srcStart, srcLength);
4377 }
4378}
4379
4380inline int8_t
4381UnicodeString::compareCodePointOrder(const UnicodeString& text) const
4382{ return doCompareCodePointOrder(0, length(), text, 0, text.length()); }
4383
4384inline int8_t
4385UnicodeString::compareCodePointOrder(int32_t start,
4387 const UnicodeString& srcText) const
4388{ return doCompareCodePointOrder(start, _length, srcText, 0, srcText.length()); }
4389
4390inline int8_t
4391UnicodeString::compareCodePointOrder(ConstChar16Ptr srcChars,
4392 int32_t srcLength) const
4393{ return doCompareCodePointOrder(0, length(), srcChars, 0, srcLength); }
4394
4395inline int8_t
4396UnicodeString::compareCodePointOrder(int32_t start,
4398 const UnicodeString& srcText,
4400 int32_t srcLength) const
4401{ return doCompareCodePointOrder(start, _length, srcText, srcStart, srcLength); }
4402
4403inline int8_t
4404UnicodeString::compareCodePointOrder(int32_t start,
4406 const char16_t *srcChars) const
4407{ return doCompareCodePointOrder(start, _length, srcChars, 0, _length); }
4408
4409inline int8_t
4410UnicodeString::compareCodePointOrder(int32_t start,
4412 const char16_t *srcChars,
4414 int32_t srcLength) const
4415{ return doCompareCodePointOrder(start, _length, srcChars, srcStart, srcLength); }
4416
4417inline int8_t
4418UnicodeString::compareCodePointOrderBetween(int32_t start,
4419 int32_t limit,
4420 const UnicodeString& srcText,
4422 int32_t srcLimit) const
4423{ return doCompareCodePointOrder(start, limit - start,
4425
4426inline int8_t
4427UnicodeString::doCaseCompare(int32_t start,
4429 const UnicodeString &srcText,
4432 uint32_t options) const
4433{
4434 if(srcText.isBogus()) {
4435 return static_cast<int8_t>(!isBogus()); // 0 if both are bogus, 1 otherwise
4436 } else {
4437 srcText.pinIndices(srcStart, srcLength);
4438 return doCaseCompare(start, thisLength, srcText.getArrayStart(), srcStart, srcLength, options);
4439 }
4440}
4441
4442inline int8_t
4443UnicodeString::caseCompare(const UnicodeString &text, uint32_t options) const {
4444 return doCaseCompare(0, length(), text, 0, text.length(), options);
4445}
4446
4447inline int8_t
4448UnicodeString::caseCompare(int32_t start,
4450 const UnicodeString &srcText,
4451 uint32_t options) const {
4452 return doCaseCompare(start, _length, srcText, 0, srcText.length(), options);
4453}
4454
4455inline int8_t
4456UnicodeString::caseCompare(ConstChar16Ptr srcChars,
4458 uint32_t options) const {
4459 return doCaseCompare(0, length(), srcChars, 0, srcLength, options);
4460}
4461
4462inline int8_t
4463UnicodeString::caseCompare(int32_t start,
4465 const UnicodeString &srcText,
4468 uint32_t options) const {
4469 return doCaseCompare(start, _length, srcText, srcStart, srcLength, options);
4470}
4471
4472inline int8_t
4473UnicodeString::caseCompare(int32_t start,
4475 const char16_t *srcChars,
4476 uint32_t options) const {
4477 return doCaseCompare(start, _length, srcChars, 0, _length, options);
4478}
4479
4480inline int8_t
4481UnicodeString::caseCompare(int32_t start,
4483 const char16_t *srcChars,
4486 uint32_t options) const {
4487 return doCaseCompare(start, _length, srcChars, srcStart, srcLength, options);
4488}
4489
4490inline int8_t
4491UnicodeString::caseCompareBetween(int32_t start,
4492 int32_t limit,
4493 const UnicodeString &srcText,
4496 uint32_t options) const {
4497 return doCaseCompare(start, limit - start, srcText, srcStart, srcLimit - srcStart, options);
4498}
4499
4500inline int32_t
4501UnicodeString::indexOf(const UnicodeString& srcText,
4504 int32_t start,
4505 int32_t _length) const
4506{
4507 if(!srcText.isBogus()) {
4508 srcText.pinIndices(srcStart, srcLength);
4509 if(srcLength > 0) {
4510 return indexOf(srcText.getArrayStart(), srcStart, srcLength, start, _length);
4511 }
4512 }
4513 return -1;
4514}
4515
4516inline int32_t
4517UnicodeString::indexOf(const UnicodeString& text) const
4518{ return indexOf(text, 0, text.length(), 0, length()); }
4519
4520inline int32_t
4521UnicodeString::indexOf(const UnicodeString& text,
4522 int32_t start) const {
4523 pinIndex(start);
4524 return indexOf(text, 0, text.length(), start, length() - start);
4525}
4526
4527inline int32_t
4528UnicodeString::indexOf(const UnicodeString& text,
4529 int32_t start,
4530 int32_t _length) const
4531{ return indexOf(text, 0, text.length(), start, _length); }
4532
4533inline int32_t
4534UnicodeString::indexOf(const char16_t *srcChars,
4536 int32_t start) const {
4537 pinIndex(start);
4538 return indexOf(srcChars, 0, srcLength, start, length() - start);
4539}
4540
4541inline int32_t
4542UnicodeString::indexOf(ConstChar16Ptr srcChars,
4544 int32_t start,
4545 int32_t _length) const
4546{ return indexOf(srcChars, 0, srcLength, start, _length); }
4547
4548inline int32_t
4549UnicodeString::indexOf(char16_t c,
4550 int32_t start,
4551 int32_t _length) const
4552{ return doIndexOf(c, start, _length); }
4553
4554inline int32_t
4555UnicodeString::indexOf(UChar32 c,
4556 int32_t start,
4557 int32_t _length) const
4558{ return doIndexOf(c, start, _length); }
4559
4560inline int32_t
4561UnicodeString::indexOf(char16_t c) const
4562{ return doIndexOf(c, 0, length()); }
4563
4564inline int32_t
4565UnicodeString::indexOf(UChar32 c) const
4566{ return indexOf(c, 0, length()); }
4567
4568inline int32_t
4569UnicodeString::indexOf(char16_t c,
4570 int32_t start) const {
4571 pinIndex(start);
4572 return doIndexOf(c, start, length() - start);
4573}
4574
4575inline int32_t
4576UnicodeString::indexOf(UChar32 c,
4577 int32_t start) const {
4578 pinIndex(start);
4579 return indexOf(c, start, length() - start);
4580}
4581
4582inline int32_t
4583UnicodeString::lastIndexOf(ConstChar16Ptr srcChars,
4585 int32_t start,
4586 int32_t _length) const
4587{ return lastIndexOf(srcChars, 0, srcLength, start, _length); }
4588
4589inline int32_t
4590UnicodeString::lastIndexOf(const char16_t *srcChars,
4592 int32_t start) const {
4593 pinIndex(start);
4594 return lastIndexOf(srcChars, 0, srcLength, start, length() - start);
4595}
4596
4597inline int32_t
4598UnicodeString::lastIndexOf(const UnicodeString& srcText,
4601 int32_t start,
4602 int32_t _length) const
4603{
4604 if(!srcText.isBogus()) {
4605 srcText.pinIndices(srcStart, srcLength);
4606 if(srcLength > 0) {
4607 return lastIndexOf(srcText.getArrayStart(), srcStart, srcLength, start, _length);
4608 }
4609 }
4610 return -1;
4611}
4612
4613inline int32_t
4614UnicodeString::lastIndexOf(const UnicodeString& text,
4615 int32_t start,
4616 int32_t _length) const
4617{ return lastIndexOf(text, 0, text.length(), start, _length); }
4618
4619inline int32_t
4620UnicodeString::lastIndexOf(const UnicodeString& text,
4621 int32_t start) const {
4622 pinIndex(start);
4623 return lastIndexOf(text, 0, text.length(), start, length() - start);
4624}
4625
4626inline int32_t
4627UnicodeString::lastIndexOf(const UnicodeString& text) const
4628{ return lastIndexOf(text, 0, text.length(), 0, length()); }
4629
4630inline int32_t
4631UnicodeString::lastIndexOf(char16_t c,
4632 int32_t start,
4633 int32_t _length) const
4634{ return doLastIndexOf(c, start, _length); }
4635
4636inline int32_t
4637UnicodeString::lastIndexOf(UChar32 c,
4638 int32_t start,
4639 int32_t _length) const {
4640 return doLastIndexOf(c, start, _length);
4641}
4642
4643inline int32_t
4644UnicodeString::lastIndexOf(char16_t c) const
4645{ return doLastIndexOf(c, 0, length()); }
4646
4647inline int32_t
4648UnicodeString::lastIndexOf(UChar32 c) const {
4649 return lastIndexOf(c, 0, length());
4650}
4651
4652inline int32_t
4653UnicodeString::lastIndexOf(char16_t c,
4654 int32_t start) const {
4655 pinIndex(start);
4656 return doLastIndexOf(c, start, length() - start);
4657}
4658
4659inline int32_t
4660UnicodeString::lastIndexOf(UChar32 c,
4661 int32_t start) const {
4662 pinIndex(start);
4663 return lastIndexOf(c, start, length() - start);
4664}
4665
4666inline UBool
4667UnicodeString::startsWith(const UnicodeString& text) const
4668{ return doEqualsSubstring(0, text.length(), text, 0, text.length()); }
4669
4670inline UBool
4671UnicodeString::startsWith(const UnicodeString& srcText,
4673 int32_t srcLength) const
4674{ return doEqualsSubstring(0, srcLength, srcText, srcStart, srcLength); }
4675
4676inline UBool
4677UnicodeString::startsWith(ConstChar16Ptr srcChars, int32_t srcLength) const {
4678 if(srcLength < 0) {
4680 }
4681 return doEqualsSubstring(0, srcLength, srcChars, 0, srcLength);
4682}
4683
4684inline UBool
4685UnicodeString::startsWith(const char16_t *srcChars, int32_t srcStart, int32_t srcLength) const {
4686 if(srcLength < 0) {
4688 }
4689 return doEqualsSubstring(0, srcLength, srcChars, srcStart, srcLength);
4690}
4691
4692inline UBool
4693UnicodeString::endsWith(const UnicodeString& text) const
4694{ return doEqualsSubstring(length() - text.length(), text.length(),
4695 text, 0, text.length()); }
4696
4697inline UBool
4698UnicodeString::endsWith(const UnicodeString& srcText,
4700 int32_t srcLength) const {
4701 srcText.pinIndices(srcStart, srcLength);
4702 return doEqualsSubstring(length() - srcLength, srcLength,
4704}
4705
4706inline UBool
4707UnicodeString::endsWith(ConstChar16Ptr srcChars,
4708 int32_t srcLength) const {
4709 if(srcLength < 0) {
4711 }
4712 return doEqualsSubstring(length() - srcLength, srcLength, srcChars, 0, srcLength);
4713}
4714
4715inline UBool
4716UnicodeString::endsWith(const char16_t *srcChars,
4718 int32_t srcLength) const {
4719 if(srcLength < 0) {
4721 }
4722 return doEqualsSubstring(length() - srcLength, srcLength,
4724}
4725
4726//========================================
4727// replace
4728//========================================
4729inline UnicodeString&
4730UnicodeString::replace(int32_t start,
4732 const UnicodeString& srcText)
4733{ return doReplace(start, _length, srcText, 0, srcText.length()); }
4734
4735inline UnicodeString&
4736UnicodeString::replace(int32_t start,
4738 const UnicodeString& srcText,
4741{ return doReplace(start, _length, srcText, srcStart, srcLength); }
4742
4743inline UnicodeString&
4744UnicodeString::replace(int32_t start,
4748{ return doReplace(start, _length, srcChars, 0, srcLength); }
4749
4750inline UnicodeString&
4751UnicodeString::replace(int32_t start,
4753 const char16_t *srcChars,
4756{ return doReplace(start, _length, srcChars, srcStart, srcLength); }
4757
4758inline UnicodeString&
4759UnicodeString::replace(int32_t start,
4761 char16_t srcChar)
4762{ return doReplace(start, _length, &srcChar, 0, 1); }
4763
4764inline UnicodeString&
4765UnicodeString::replaceBetween(int32_t start,
4766 int32_t limit,
4767 const UnicodeString& srcText)
4768{ return doReplace(start, limit - start, srcText, 0, srcText.length()); }
4769
4770inline UnicodeString&
4771UnicodeString::replaceBetween(int32_t start,
4772 int32_t limit,
4773 const UnicodeString& srcText,
4776{ return doReplace(start, limit - start, srcText, srcStart, srcLimit - srcStart); }
4777
4778inline UnicodeString&
4779UnicodeString::findAndReplace(const UnicodeString& oldText,
4780 const UnicodeString& newText)
4781{ return findAndReplace(0, length(), oldText, 0, oldText.length(),
4782 newText, 0, newText.length()); }
4783
4784inline UnicodeString&
4785UnicodeString::findAndReplace(int32_t start,
4787 const UnicodeString& oldText,
4788 const UnicodeString& newText)
4789{ return findAndReplace(start, _length, oldText, 0, oldText.length(),
4790 newText, 0, newText.length()); }
4791
4792// ============================
4793// extract
4794// ============================
4795inline void
4796UnicodeString::doExtract(int32_t start,
4798 UnicodeString& target) const
4799{ target.replace(0, target.length(), *this, start, _length); }
4800
4801inline void
4802UnicodeString::extract(int32_t start,
4804 Char16Ptr target,
4805 int32_t targetStart) const
4806{ doExtract(start, _length, target, targetStart); }
4807
4808inline void
4809UnicodeString::extract(int32_t start,
4811 UnicodeString& target) const
4812{ doExtract(start, _length, target); }
4813
4814#if !UCONFIG_NO_CONVERSION
4815
4816inline int32_t
4817UnicodeString::extract(int32_t start,
4819 char *dst,
4820 const char *codepage) const
4821
4822{
4823 // This dstSize value will be checked explicitly
4824 return extract(start, _length, dst, dst != nullptr ? 0xffffffff : 0, codepage);
4825}
4826
4827#endif
4828
4829inline void
4830UnicodeString::extractBetween(int32_t start,
4831 int32_t limit,
4832 char16_t *dst,
4833 int32_t dstStart) const {
4834 pinIndex(start);
4835 pinIndex(limit);
4836 doExtract(start, limit - start, dst, dstStart);
4837}
4838
4839inline UnicodeString
4840UnicodeString::tempSubStringBetween(int32_t start, int32_t limit) const {
4841 return tempSubString(start, limit - start);
4842}
4843
4844inline char16_t
4845UnicodeString::doCharAt(int32_t offset) const
4846{
4847 if (static_cast<uint32_t>(offset) < static_cast<uint32_t>(length())) {
4848 return getArrayStart()[offset];
4849 } else {
4850 return kInvalidUChar;
4851 }
4852}
4853
4854inline char16_t
4855UnicodeString::charAt(int32_t offset) const
4856{ return doCharAt(offset); }
4857
4858inline char16_t
4859UnicodeString::operator[] (int32_t offset) const
4860{ return doCharAt(offset); }
4861
4862inline UBool
4863UnicodeString::isEmpty() const {
4864 // Arithmetic or logical right shift does not matter: only testing for 0.
4865 return (fUnion.fFields.fLengthAndFlags>>kLengthShift) == 0;
4866}
4867
4868//========================================
4869// Write implementation methods
4870//========================================
4871inline void
4872UnicodeString::setZeroLength() {
4873 fUnion.fFields.fLengthAndFlags &= kAllStorageFlags;
4874}
4875
4876inline void
4877UnicodeString::setShortLength(int32_t len) {
4878 // requires 0 <= len <= kMaxShortLength
4879 fUnion.fFields.fLengthAndFlags =
4880 static_cast<int16_t>((fUnion.fFields.fLengthAndFlags & kAllStorageFlags) | (len << kLengthShift));
4881}
4882
4883inline void
4884UnicodeString::setLength(int32_t len) {
4885 if(len <= kMaxShortLength) {
4886 setShortLength(len);
4887 } else {
4888 fUnion.fFields.fLengthAndFlags |= kLengthIsLarge;
4889 fUnion.fFields.fLength = len;
4890 }
4891}
4892
4893inline void
4894UnicodeString::setToEmpty() {
4895 fUnion.fFields.fLengthAndFlags = kShortString;
4896}
4897
4898inline void
4899UnicodeString::setArray(char16_t *array, int32_t len, int32_t capacity) {
4900 setLength(len);
4901 fUnion.fFields.fArray = array;
4902 fUnion.fFields.fCapacity = capacity;
4903}
4904
4905inline UnicodeString&
4906UnicodeString::operator= (char16_t ch)
4907{ return doReplace(0, length(), &ch, 0, 1); }
4908
4909inline UnicodeString&
4910UnicodeString::operator= (UChar32 ch)
4911{ return replace(0, length(), ch); }
4912
4913inline UnicodeString&
4914UnicodeString::setTo(const UnicodeString& srcText,
4917{
4918 unBogus();
4919 return doReplace(0, length(), srcText, srcStart, srcLength);
4920}
4921
4922inline UnicodeString&
4923UnicodeString::setTo(const UnicodeString& srcText,
4925{
4926 unBogus();
4927 srcText.pinIndex(srcStart);
4928 return doReplace(0, length(), srcText, srcStart, srcText.length() - srcStart);
4929}
4930
4931inline UnicodeString&
4932UnicodeString::setTo(const UnicodeString& srcText)
4933{
4934 return copyFrom(srcText);
4935}
4936
4937inline UnicodeString&
4938UnicodeString::setTo(const char16_t *srcChars,
4940{
4941 unBogus();
4942 return doReplace(0, length(), srcChars, 0, srcLength);
4943}
4944
4945inline UnicodeString&
4946UnicodeString::setTo(char16_t srcChar)
4947{
4948 unBogus();
4949 return doReplace(0, length(), &srcChar, 0, 1);
4950}
4951
4952inline UnicodeString&
4953UnicodeString::setTo(UChar32 srcChar)
4954{
4955 unBogus();
4956 return replace(0, length(), srcChar);
4957}
4958
4959inline UnicodeString&
4960UnicodeString::append(const UnicodeString& srcText,
4963{ return doAppend(srcText, srcStart, srcLength); }
4964
4965inline UnicodeString&
4966UnicodeString::append(const UnicodeString& srcText)
4967{ return doAppend(srcText, 0, srcText.length()); }
4968
4969inline UnicodeString&
4970UnicodeString::append(const char16_t *srcChars,
4973{ return doAppend(srcChars, srcStart, srcLength); }
4974
4975inline UnicodeString&
4976UnicodeString::append(ConstChar16Ptr srcChars,
4978{ return doAppend(srcChars, 0, srcLength); }
4979
4980inline UnicodeString&
4981UnicodeString::append(char16_t srcChar)
4982{ return doAppend(&srcChar, 0, 1); }
4983
4984inline UnicodeString&
4985UnicodeString::operator+= (char16_t ch)
4986{ return doAppend(&ch, 0, 1); }
4987
4988inline UnicodeString&
4989UnicodeString::operator+= (UChar32 ch) {
4990 return append(ch);
4991}
4992
4993inline UnicodeString&
4994UnicodeString::operator+= (const UnicodeString& srcText)
4995{ return doAppend(srcText, 0, srcText.length()); }
4996
4997inline UnicodeString&
4998UnicodeString::insert(int32_t start,
4999 const UnicodeString& srcText,
5002{ return doReplace(start, 0, srcText, srcStart, srcLength); }
5003
5004inline UnicodeString&
5005UnicodeString::insert(int32_t start,
5006 const UnicodeString& srcText)
5007{ return doReplace(start, 0, srcText, 0, srcText.length()); }
5008
5009inline UnicodeString&
5010UnicodeString::insert(int32_t start,
5011 const char16_t *srcChars,
5014{ return doReplace(start, 0, srcChars, srcStart, srcLength); }
5015
5016inline UnicodeString&
5017UnicodeString::insert(int32_t start,
5020{ return doReplace(start, 0, srcChars, 0, srcLength); }
5021
5022inline UnicodeString&
5023UnicodeString::insert(int32_t start,
5024 char16_t srcChar)
5025{ return doReplace(start, 0, &srcChar, 0, 1); }
5026
5027inline UnicodeString&
5028UnicodeString::insert(int32_t start,
5030{ return replace(start, 0, srcChar); }
5031
5032
5033inline UnicodeString&
5034UnicodeString::remove()
5035{
5036 // remove() of a bogus string makes the string empty and non-bogus
5037 if(isBogus()) {
5038 setToEmpty();
5039 } else {
5040 setZeroLength();
5041 }
5042 return *this;
5043}
5044
5045inline UnicodeString&
5046UnicodeString::remove(int32_t start,
5048{
5049 if(start <= 0 && _length == INT32_MAX) {
5050 // remove(guaranteed everything) of a bogus string makes the string empty and non-bogus
5051 return remove();
5052 }
5053 return doReplace(start, _length, nullptr, 0, 0);
5054}
5055
5056inline UnicodeString&
5057UnicodeString::removeBetween(int32_t start,
5058 int32_t limit)
5059{ return doReplace(start, limit - start, nullptr, 0, 0); }
5060
5061inline UnicodeString &
5062UnicodeString::retainBetween(int32_t start, int32_t limit) {
5063 truncate(limit);
5064 return doReplace(0, start, nullptr, 0, 0);
5065}
5066
5067inline UBool
5068UnicodeString::truncate(int32_t targetLength)
5069{
5070 if(isBogus() && targetLength == 0) {
5071 // truncate(0) of a bogus string makes the string empty and non-bogus
5072 unBogus();
5073 return false;
5074 } else if (static_cast<uint32_t>(targetLength) < static_cast<uint32_t>(length())) {
5075 setLength(targetLength);
5076 return true;
5077 } else {
5078 return false;
5079 }
5080}
5081
5082inline UnicodeString&
5083UnicodeString::reverse()
5084{ return doReverse(0, length()); }
5085
5086inline UnicodeString&
5087UnicodeString::reverse(int32_t start,
5089{ return doReverse(start, _length); }
5090
5092
5093#endif /* U_SHOW_CPLUSPLUS_API */
5094
5095#endif
C++ API: Interface for writing bytes, and implementation classes.
C++ API: char16_t pointer wrappers with implicit conversion from bit-compatible raw pointer types.
#define U_ALIASING_BARRIER(ptr)
Barrier for pointer anti-aliasing optimizations even across function boundaries.
Definition char16ptr.h:36
The BreakIterator class implements methods for finding the location of boundaries in text.
Definition brkiter.h:106
A ByteSink can be filled with bytes.
Definition bytestream.h:53
char16_t * wrapper with implicit conversion from distinct but bit-compatible pointer types.
Definition char16ptr.h:43
const char16_t * wrapper with implicit conversion from distinct but bit-compatible pointer types.
Definition char16ptr.h:150
Records lengths of string edits but not replacement text.
Definition edits.h:80
"Smart pointer" base class; do not use directly: use LocalPointer etc.
A Locale object represents a specific geographical, political, or cultural region.
Definition locid.h:195
Replaceable is an abstract base class representing a string of characters that supports the replaceme...
Definition rep.h:77
virtual void extractBetween(int32_t start, int32_t limit, UnicodeString &target) const =0
Copies characters in the range [start, limit) into the UnicodeString target.
char16_t charAt(int32_t offset) const
Returns the 16-bit code unit at the given offset into the text.
Definition rep.h:251
int32_t length() const
Returns the number of 16-bit code units in the text.
Definition rep.h:246
A string-like object that points to a sized piece of memory.
Definition stringpiece.h:61
UnicodeString is a string class that stores Unicode characters directly and provides similar function...
Definition unistr.h:296
int32_t indexOf(const char16_t *srcChars, int32_t srcStart, int32_t srcLength, int32_t start, int32_t length) const
Locate in this the first occurrence in the range [start, start + length) of the characters in srcChar...
UnicodeString(const UnicodeString &that)
Copy constructor.
void swap(UnicodeString &other) noexcept
Swap strings.
virtual char16_t getCharAt(int32_t offset) const override
The change in Replaceable to use virtual getCharAt() allows UnicodeString::charAt() to be inline agai...
bool operator==(const S &text) const
Equality operator.
Definition unistr.h:347
virtual int32_t getLength() const override
Implement Replaceable::getLength() (see jitterbug 1027).
UnicodeString & setTo(char16_t *buffer, int32_t buffLength, int32_t buffCapacity)
Aliasing setTo() function, analogous to the writable-aliasing char16_t* constructor.
UnicodeString & operator=(const S &src)
Assignment operator.
Definition unistr.h:1960
UnicodeString & operator=(UnicodeString &&src) noexcept
Move assignment operator; might leave src in bogus state.
UChar32 unescapeAt(int32_t &offset) const
Unescape a single escape sequence and return the represented character.
UnicodeString(const wchar_t *text, int32_t textLength)
wchar_t * constructor.
Definition unistr.h:3247
virtual void handleReplaceBetween(int32_t start, int32_t limit, const UnicodeString &text) override
Replace a substring of this object with the given text.
UBool hasMoreChar32Than(int32_t start, int32_t length, int32_t number) const
Check if the length char16_t code units of the string contain more Unicode code points than a certain...
UnicodeString(const UnicodeString &src, int32_t srcStart, int32_t srcLength)
'Substring' constructor from subrange of source string.
virtual ~UnicodeString()
Destructor.
UnicodeString(const char *codepageData, int32_t dataLength, const char *codepage)
char* constructor.
UnicodeString(const char *codepageData, const char *codepage)
char* constructor.
UnicodeString(UBool isTerminated, ConstChar16Ptr text, int32_t textLength)
Readonly-aliasing char16_t* constructor.
static UnicodeString readOnlyAlias(const UnicodeString &text)
Readonly-aliasing factory method.
Definition unistr.h:3623
UnicodeString & toTitle(BreakIterator *titleIter, const Locale &locale, uint32_t options)
Titlecase this string, with options.
UnicodeString & append(UChar32 srcChar)
Append the code point srcChar to the UnicodeString object.
EInvariant
Constant to be used in the UnicodeString(char *, int32_t, EInvariant) constructor which constructs a ...
Definition unistr.h:307
bool operator!=(const S &text) const
Inequality operator.
Definition unistr.h:382
UnicodeString unescape() const
Unescape a string of characters and return a string containing the result.
UnicodeString(const UnicodeString &src, int32_t srcStart)
'Substring' constructor from tail of source string.
int32_t getChar32Limit(int32_t offset) const
Adjust a random-access offset so that it points behind a Unicode character.
UnicodeString(char16_t *buffer, int32_t buffLength, int32_t buffCapacity)
Writable-aliasing char16_t* constructor.
UnicodeString & findAndReplace(int32_t start, int32_t length, const UnicodeString &oldText, int32_t oldStart, int32_t oldLength, const UnicodeString &newText, int32_t newStart, int32_t newLength)
Replace all occurrences of characters in oldText in the range [oldStart, oldStart + oldLength) with t...
UnicodeString(int32_t capacity, UChar32 c, int32_t count)
Construct a UnicodeString with capacity to hold capacity char16_ts.
UnicodeString & operator+=(const S &src)
Append operator.
Definition unistr.h:2227
int32_t extract(int32_t start, int32_t startLength, char *target, uint32_t targetLength, const char *codepage) const
Copy the characters in the range [start, start + length) into an array of characters in a specified c...
virtual void copy(int32_t start, int32_t limit, int32_t dest) override
Copy a substring of this object, retaining attribute (out-of-band) information.
virtual void extractBetween(int32_t start, int32_t limit, UnicodeString &target) const override
Copy the characters in the range [start, limit) into the UnicodeString target.
UnicodeString & replace(int32_t start, int32_t length, const UnicodeString &srcText, int32_t srcStart, int32_t srcLength)
Replace the characters in the range [start, start + length) with the characters in srcText in the ran...
Definition unistr.h:4736
UnicodeString & toTitle(BreakIterator *titleIter)
Titlecase this string, convenience function using the default locale.
UnicodeString & fastCopyFrom(const UnicodeString &src)
Almost the same as the assignment operator.
virtual UnicodeString * clone() const override
Clone this object, an instance of a subclass of Replaceable.
UBool padLeading(int32_t targetLength, char16_t padChar=0x0020)
Pad the start of this UnicodeString with the character padChar.
int32_t getChar32Start(int32_t offset) const
Adjust a random-access offset so that it points to the beginning of a Unicode character.
UnicodeString & append(const S &src)
Appends the characters in src which is, or which is implicitly convertible to, a std::u16string_view ...
Definition unistr.h:2300
UChar32 char32At(int32_t offset) const
Return the code point that contains the code unit at offset offset.
UnicodeString(const char *src, int32_t textLength, enum EInvariant inv)
Constructs a Unicode string from an invariant-character char * string.
UnicodeString tempSubString(int32_t start=0, int32_t length=INT32_MAX) const
Create a temporary substring for the specified range.
int32_t extract(Char16Ptr dest, int32_t destCapacity, UErrorCode &errorCode) const
Copy the contents of the string into dest.
int32_t length() const
Return the length of the UnicodeString object.
Definition unistr.h:4214
virtual UChar32 getChar32At(int32_t offset) const override
The change in Replaceable to use virtual getChar32At() allows UnicodeString::char32At() to be inline ...
int32_t extract(int32_t start, int32_t startLength, char *target, uint32_t targetLength) const
Copy the characters in the range [start, start + length) into an array of characters in the platform'...
static UnicodeString fromUTF8(StringPiece utf8)
Create a UnicodeString from a UTF-8 string.
UNISTR_FROM_CHAR_EXPLICIT UnicodeString(char16_t ch)
Single char16_t (code unit) constructor.
int32_t lastIndexOf(const char16_t *srcChars, int32_t srcStart, int32_t srcLength, int32_t start, int32_t length) const
Locate in this the last occurrence in the range [start, start + length) of the characters in srcChars...
UnicodeString(wchar_t *buffer, int32_t buffLength, int32_t buffCapacity)
Writable-aliasing wchar_t * constructor.
Definition unistr.h:3357
void setToBogus()
Make this UnicodeString object invalid.
friend void swap(UnicodeString &s1, UnicodeString &s2) noexcept
Non-member UnicodeString swap function.
Definition unistr.h:1990
int32_t moveIndex32(int32_t index, int32_t delta) const
Move the code unit index along the string by delta code points.
static UnicodeString fromUTF32(const UChar32 *utf32, int32_t length)
Create a UnicodeString from a UTF-32 string.
UnicodeString & replace(int32_t start, int32_t length, UChar32 srcChar)
Replace the characters in the range [start, start + length) with the code point srcChar.
UNISTR_FROM_STRING_EXPLICIT UnicodeString(const S &text)
Constructor from text which is, or which is implicitly convertible to, a std::u16string_view or (if U...
Definition unistr.h:3274
int32_t countChar32(int32_t start=0, int32_t length=INT32_MAX) const
Count Unicode code points in the length char16_t code units of the string.
UNISTR_FROM_CHAR_EXPLICIT UnicodeString(UChar32 ch)
Single UChar32 (code point) constructor.
static UnicodeString readOnlyAlias(const S &text)
Readonly-aliasing factory method.
Definition unistr.h:3600
UnicodeString & trim()
Trims leading and trailing whitespace from this UnicodeString.
UnicodeString & operator=(const UnicodeString &srcText)
Assignment operator.
UnicodeString & setTo(UBool isTerminated, ConstChar16Ptr text, int32_t textLength)
Aliasing setTo() function, analogous to the readonly-aliasing char16_t* constructor.
int32_t extract(char *dest, int32_t destCapacity, UConverter *cnv, UErrorCode &errorCode) const
Convert the UnicodeString into a codepage string using an existing UConverter.
StringClass & toUTF8String(StringClass &result) const
Convert the UnicodeString to UTF-8 and append the result to a standard string.
Definition unistr.h:1777
UnicodeString(UnicodeString &&src) noexcept
Move constructor; might leave src in bogus state.
UnicodeString & toUpper()
Convert the characters in this to UPPER CASE following the conventions of the default locale.
UnicodeString(const char16_t *text, int32_t textLength)
char16_t* constructor.
UnicodeString(const char *src, int32_t srcLength, UConverter *cnv, UErrorCode &errorCode)
char * / UConverter constructor.
UnicodeString(const char *codepageData, int32_t dataLength)
char* constructor.
static UClassID getStaticClassID()
ICU "poor man's RTTI", returns a UClassID for this class.
UnicodeString(uint16_t *buffer, int32_t buffLength, int32_t buffCapacity)
Writable-aliasing uint16_t * constructor.
Definition unistr.h:3343
int32_t toUTF32(UChar32 *utf32, int32_t capacity, UErrorCode &errorCode) const
Convert the UnicodeString to UTF-32.
UBool isBogus() const
Determine if this object contains a valid string.
Definition unistr.h:4229
UnicodeString & toLower(const Locale &locale)
Convert the characters in this to lower case following the conventions of a specific locale.
UnicodeString & toTitle(BreakIterator *titleIter, const Locale &locale)
Titlecase this string.
UnicodeString & foldCase(uint32_t options=0)
Case-folds the characters in this string.
const char16_t * getTerminatedBuffer()
Get a read-only pointer to the internal buffer, making sure that it is NUL-terminated.
UnicodeString(const uint16_t *text, int32_t textLength)
uint16_t * constructor.
Definition unistr.h:3225
UnicodeString & toUpper(const Locale &locale)
Convert the characters in this to UPPER CASE following the conventions of a specific locale.
UnicodeString & setCharAt(int32_t offset, char16_t ch)
Set the character at the specified offset to the specified character.
virtual UClassID getDynamicClassID() const override
ICU "poor man's RTTI", returns a UClassID for the actual class.
int32_t extract(int32_t start, int32_t startLength, char *target, int32_t targetCapacity, enum EInvariant inv) const
Copy the characters in the range [start, start + startLength) into an array of characters.
char16_t * getBuffer(int32_t minCapacity)
Get a read/write pointer to the internal buffer.
UnicodeString & toLower()
Convert the characters in this to lower case following the conventions of the default locale.
UBool padTrailing(int32_t targetLength, char16_t padChar=0x0020)
Pad the end of this UnicodeString with the character padChar.
UNISTR_FROM_STRING_EXPLICIT UnicodeString(const char *codepageData)
char* constructor.
void releaseBuffer(int32_t newLength=-1)
Release a read/write buffer on a UnicodeString object with an "open" getBuffer(minCapacity).
void toUTF8(ByteSink &sink) const
Convert the UnicodeString to UTF-8 and write the result to a ByteSink.
virtual UBool hasMetaData() const override
Replaceable API.
U_CAPI int32_t u_strlen(const UChar *s)
U_EXPORT UBool operator==(const StringPiece &x, const StringPiece &y)
Global operator == for StringPiece.
U_COMMON_API UnicodeString unistr_internalConcat(const UnicodeString &s1, std::u16string_view s2)
bool operator!=(const StringPiece &x, const StringPiece &y)
Global operator != for StringPiece.
U_COMMON_API UnicodeString operator+(const UnicodeString &s1, const UnicodeString &s2)
Creates a new UnicodeString from the concatenation of two others.
const UChar * toUCharPtr(const char16_t *p)
Converts from const char16_t * to const UChar *.
Definition char16ptr.h:261
#define U_CALLCONV
Similar to U_CDECL_BEGIN/U_CDECL_END, this qualifier is necessary in callback function typedefs to ma...
Definition platform.h:846
C++ API: Replaceable String.
C++ API: Central ICU header for including the C++ standard <string> header and for related definition...
C++ API: StringPiece: Read-only byte string wrapper class.
struct UConverter UConverter
Definition ucnv_err.h:96
#define UCONFIG_NO_BREAK_ITERATION
This switch turns off break iteration.
Definition uconfig.h:358
int32_t UChar32
Define UChar32 as a type for single Unicode code points.
Definition umachine.h:427
#define INT32_MAX
The largest value a 32 bit signed integer can hold.
Definition umachine.h:186
int8_t UBool
The ICU boolean type, a signed-byte integer.
Definition umachine.h:247
#define U_CAPI
This is used to declare a function as a public ICU C API.
Definition umachine.h:110
char16_t UChar
The base type for UTF-16 code units and pointers.
Definition umachine.h:378
#define U_SIZEOF_UCHAR
Number of bytes in a UChar (always 2).
Definition umachine.h:330
#define UNISTR_FROM_CHAR_EXPLICIT
This can be defined to be empty or "explicit".
Definition unistr.h:150
int32_t UStringCaseMapper(int32_t caseLocale, uint32_t options, icu::BreakIterator *iter, char16_t *dest, int32_t destCapacity, const char16_t *src, int32_t srcLength, icu::Edits *edits, UErrorCode &errorCode)
Internal string case mapping function type.
Definition unistr.h:71
#define UNISTR_FROM_STRING_EXPLICIT
This can be defined to be empty or "explicit".
Definition unistr.h:170
#define UNISTR_OBJECT_SIZE
Desired sizeof(UnicodeString) in bytes.
Definition unistr.h:208
void * UClassID
UClassID is used to identify classes without using the compiler's RTTI.
Definition uobject.h:96
Basic definitions for ICU, for both C and C++ APIs.
UErrorCode
Standard ICU4C error code type, a substitute for exceptions.
Definition utypes.h:430
#define U_COMMON_API
Set to export library symbols from inside the common library, and to import them from outside.
Definition utypes.h:315