ICU 74.1 74.1
unistr.h
Go to the documentation of this file.
1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
3/*
4**********************************************************************
5* Copyright (C) 1998-2016, International Business Machines
6* Corporation and others. All Rights Reserved.
7**********************************************************************
8*
9* File unistr.h
10*
11* Modification History:
12*
13* Date Name Description
14* 09/25/98 stephen Creation.
15* 11/11/98 stephen Changed per 11/9 code review.
16* 04/20/99 stephen Overhauled per 4/16 code review.
17* 11/18/99 aliu Made to inherit from Replaceable. Added method
18* handleReplaceBetween(); other methods unchanged.
19* 06/25/01 grhoten Remove dependency on iostream.
20******************************************************************************
21*/
22
23#ifndef UNISTR_H
24#define UNISTR_H
25
31#include "unicode/utypes.h"
32
33#if U_SHOW_CPLUSPLUS_API
34
35#include <cstddef>
36#include "unicode/char16ptr.h"
37#include "unicode/rep.h"
38#include "unicode/std_string.h"
39#include "unicode/stringpiece.h"
40#include "unicode/bytestream.h"
41
42struct UConverter; // unicode/ucnv.h
43
44#ifndef USTRING_H
50U_CAPI int32_t U_EXPORT2 u_strlen(const UChar *s);
51#endif
52
53U_NAMESPACE_BEGIN
54
55#if !UCONFIG_NO_BREAK_ITERATION
56class BreakIterator; // unicode/brkiter.h
57#endif
58class Edits;
59
60U_NAMESPACE_END
61
62// Not #ifndef U_HIDE_INTERNAL_API because UnicodeString needs the UStringCaseMapper.
69typedef int32_t U_CALLCONV
70UStringCaseMapper(int32_t caseLocale, uint32_t options,
73#endif
74 char16_t *dest, int32_t destCapacity,
75 const char16_t *src, int32_t srcLength,
76 icu::Edits *edits,
77 UErrorCode &errorCode);
78
79U_NAMESPACE_BEGIN
80
81class Locale; // unicode/locid.h
82class StringCharacterIterator;
83class UnicodeStringAppendable; // unicode/appendable.h
84
85/* The <iostream> include has been moved to unicode/ustream.h */
86
97#define US_INV icu::UnicodeString::kInvariant
98
116#if !U_CHAR16_IS_TYPEDEF
117# define UNICODE_STRING(cs, _length) icu::UnicodeString(true, u ## cs, _length)
118#else
119# define UNICODE_STRING(cs, _length) icu::UnicodeString(true, (const char16_t*)u ## cs, _length)
120#endif
121
135#define UNICODE_STRING_SIMPLE(cs) UNICODE_STRING(cs, -1)
136
144#ifndef UNISTR_FROM_CHAR_EXPLICIT
145# if defined(U_COMBINED_IMPLEMENTATION) || defined(U_COMMON_IMPLEMENTATION) || defined(U_I18N_IMPLEMENTATION) || defined(U_IO_IMPLEMENTATION)
146 // Auto-"explicit" in ICU library code.
147# define UNISTR_FROM_CHAR_EXPLICIT explicit
148# else
149 // Empty by default for source code compatibility.
150# define UNISTR_FROM_CHAR_EXPLICIT
151# endif
152#endif
153
164#ifndef UNISTR_FROM_STRING_EXPLICIT
165# if defined(U_COMBINED_IMPLEMENTATION) || defined(U_COMMON_IMPLEMENTATION) || defined(U_I18N_IMPLEMENTATION) || defined(U_IO_IMPLEMENTATION)
166 // Auto-"explicit" in ICU library code.
167# define UNISTR_FROM_STRING_EXPLICIT explicit
168# else
169 // Empty by default for source code compatibility.
170# define UNISTR_FROM_STRING_EXPLICIT
171# endif
172#endif
173
207#ifndef UNISTR_OBJECT_SIZE
208# define UNISTR_OBJECT_SIZE 64
209#endif
210
296{
297public:
298
312 kInvariant
313 };
314
315 //========================================
316 // Read-only operations
317 //========================================
318
319 /* Comparison - bitwise only - for international comparison use collation */
320
328 inline bool operator== (const UnicodeString& text) const;
329
337 inline bool operator!= (const UnicodeString& text) const;
338
346 inline UBool operator> (const UnicodeString& text) const;
347
355 inline UBool operator< (const UnicodeString& text) const;
356
364 inline UBool operator>= (const UnicodeString& text) const;
365
373 inline UBool operator<= (const UnicodeString& text) const;
374
386 inline int8_t compare(const UnicodeString& text) const;
387
403 inline int8_t compare(int32_t start,
404 int32_t length,
405 const UnicodeString& text) const;
406
424 inline int8_t compare(int32_t start,
425 int32_t length,
426 const UnicodeString& srcText,
427 int32_t srcStart,
428 int32_t srcLength) const;
429
442 inline int8_t compare(ConstChar16Ptr srcChars,
443 int32_t srcLength) const;
444
459 inline int8_t compare(int32_t start,
460 int32_t length,
461 const char16_t *srcChars) const;
462
480 inline int8_t compare(int32_t start,
481 int32_t length,
482 const char16_t *srcChars,
483 int32_t srcStart,
484 int32_t srcLength) const;
485
503 inline int8_t compareBetween(int32_t start,
504 int32_t limit,
505 const UnicodeString& srcText,
506 int32_t srcStart,
507 int32_t srcLimit) const;
508
526 inline int8_t compareCodePointOrder(const UnicodeString& text) const;
527
547 inline int8_t compareCodePointOrder(int32_t start,
548 int32_t length,
549 const UnicodeString& srcText) const;
550
572 inline int8_t compareCodePointOrder(int32_t start,
573 int32_t length,
574 const UnicodeString& srcText,
575 int32_t srcStart,
576 int32_t srcLength) const;
577
596 inline int8_t compareCodePointOrder(ConstChar16Ptr srcChars,
597 int32_t srcLength) const;
598
618 inline int8_t compareCodePointOrder(int32_t start,
619 int32_t length,
620 const char16_t *srcChars) const;
621
643 inline int8_t compareCodePointOrder(int32_t start,
644 int32_t length,
645 const char16_t *srcChars,
646 int32_t srcStart,
647 int32_t srcLength) const;
648
670 inline int8_t compareCodePointOrderBetween(int32_t start,
671 int32_t limit,
672 const UnicodeString& srcText,
673 int32_t srcStart,
674 int32_t srcLimit) const;
675
694 inline int8_t caseCompare(const UnicodeString& text, uint32_t options) const;
695
716 inline int8_t caseCompare(int32_t start,
717 int32_t length,
718 const UnicodeString& srcText,
719 uint32_t options) const;
720
743 inline int8_t caseCompare(int32_t start,
744 int32_t length,
745 const UnicodeString& srcText,
746 int32_t srcStart,
747 int32_t srcLength,
748 uint32_t options) const;
749
769 inline int8_t caseCompare(ConstChar16Ptr srcChars,
770 int32_t srcLength,
771 uint32_t options) const;
772
793 inline int8_t caseCompare(int32_t start,
794 int32_t length,
795 const char16_t *srcChars,
796 uint32_t options) const;
797
820 inline int8_t caseCompare(int32_t start,
821 int32_t length,
822 const char16_t *srcChars,
823 int32_t srcStart,
824 int32_t srcLength,
825 uint32_t options) const;
826
849 inline int8_t caseCompareBetween(int32_t start,
850 int32_t limit,
851 const UnicodeString& srcText,
852 int32_t srcStart,
853 int32_t srcLimit,
854 uint32_t options) const;
855
863 inline UBool startsWith(const UnicodeString& text) const;
864
875 inline UBool startsWith(const UnicodeString& srcText,
876 int32_t srcStart,
877 int32_t srcLength) const;
878
887 inline UBool startsWith(ConstChar16Ptr srcChars,
888 int32_t srcLength) const;
889
899 inline UBool startsWith(const char16_t *srcChars,
900 int32_t srcStart,
901 int32_t srcLength) const;
902
910 inline UBool endsWith(const UnicodeString& text) const;
911
922 inline UBool endsWith(const UnicodeString& srcText,
923 int32_t srcStart,
924 int32_t srcLength) const;
925
934 inline UBool endsWith(ConstChar16Ptr srcChars,
935 int32_t srcLength) const;
936
947 inline UBool endsWith(const char16_t *srcChars,
948 int32_t srcStart,
949 int32_t srcLength) const;
950
951
952 /* Searching - bitwise only */
953
962 inline int32_t indexOf(const UnicodeString& text) const;
963
973 inline int32_t indexOf(const UnicodeString& text,
974 int32_t start) const;
975
987 inline int32_t indexOf(const UnicodeString& text,
988 int32_t start,
989 int32_t length) const;
990
1007 inline int32_t indexOf(const UnicodeString& srcText,
1008 int32_t srcStart,
1009 int32_t srcLength,
1010 int32_t start,
1011 int32_t length) const;
1012
1024 inline int32_t indexOf(const char16_t *srcChars,
1025 int32_t srcLength,
1026 int32_t start) const;
1027
1040 inline int32_t indexOf(ConstChar16Ptr srcChars,
1041 int32_t srcLength,
1042 int32_t start,
1043 int32_t length) const;
1044
1061 int32_t indexOf(const char16_t *srcChars,
1062 int32_t srcStart,
1063 int32_t srcLength,
1064 int32_t start,
1065 int32_t length) const;
1066
1074 inline int32_t indexOf(char16_t c) const;
1075
1084 inline int32_t indexOf(UChar32 c) const;
1085
1094 inline int32_t indexOf(char16_t c,
1095 int32_t start) const;
1096
1106 inline int32_t indexOf(UChar32 c,
1107 int32_t start) const;
1108
1119 inline int32_t indexOf(char16_t c,
1120 int32_t start,
1121 int32_t length) const;
1122
1134 inline int32_t indexOf(UChar32 c,
1135 int32_t start,
1136 int32_t length) const;
1137
1146 inline int32_t lastIndexOf(const UnicodeString& text) const;
1147
1157 inline int32_t lastIndexOf(const UnicodeString& text,
1158 int32_t start) const;
1159
1171 inline int32_t lastIndexOf(const UnicodeString& text,
1172 int32_t start,
1173 int32_t length) const;
1174
1191 inline int32_t lastIndexOf(const UnicodeString& srcText,
1192 int32_t srcStart,
1193 int32_t srcLength,
1194 int32_t start,
1195 int32_t length) const;
1196
1207 inline int32_t lastIndexOf(const char16_t *srcChars,
1208 int32_t srcLength,
1209 int32_t start) const;
1210
1223 inline int32_t lastIndexOf(ConstChar16Ptr srcChars,
1224 int32_t srcLength,
1225 int32_t start,
1226 int32_t length) const;
1227
1244 int32_t lastIndexOf(const char16_t *srcChars,
1245 int32_t srcStart,
1246 int32_t srcLength,
1247 int32_t start,
1248 int32_t length) const;
1249
1257 inline int32_t lastIndexOf(char16_t c) const;
1258
1267 inline int32_t lastIndexOf(UChar32 c) const;
1268
1277 inline int32_t lastIndexOf(char16_t c,
1278 int32_t start) const;
1279
1289 inline int32_t lastIndexOf(UChar32 c,
1290 int32_t start) const;
1291
1302 inline int32_t lastIndexOf(char16_t c,
1303 int32_t start,
1304 int32_t length) const;
1305
1317 inline int32_t lastIndexOf(UChar32 c,
1318 int32_t start,
1319 int32_t length) const;
1320
1321
1322 /* Character access */
1323
1332 inline char16_t charAt(int32_t offset) const;
1333
1341 inline char16_t operator[] (int32_t offset) const;
1342
1354 UChar32 char32At(int32_t offset) const;
1355
1371 int32_t getChar32Start(int32_t offset) const;
1372
1389 int32_t getChar32Limit(int32_t offset) const;
1390
1441 int32_t moveIndex32(int32_t index, int32_t delta) const;
1442
1443 /* Substring extraction */
1444
1460 inline void extract(int32_t start,
1461 int32_t length,
1462 Char16Ptr dst,
1463 int32_t dstStart = 0) const;
1464
1486 int32_t
1487 extract(Char16Ptr dest, int32_t destCapacity,
1488 UErrorCode &errorCode) const;
1489
1499 inline void extract(int32_t start,
1500 int32_t length,
1501 UnicodeString& target) const;
1502
1514 inline void extractBetween(int32_t start,
1515 int32_t limit,
1516 char16_t *dst,
1517 int32_t dstStart = 0) const;
1518
1527 virtual void extractBetween(int32_t start,
1528 int32_t limit,
1529 UnicodeString& target) const override;
1530
1552 int32_t extract(int32_t start,
1553 int32_t startLength,
1554 char *target,
1555 int32_t targetCapacity,
1556 enum EInvariant inv) const;
1557
1558#if U_CHARSET_IS_UTF8 || !UCONFIG_NO_CONVERSION
1559
1579 int32_t extract(int32_t start,
1580 int32_t startLength,
1581 char *target,
1582 uint32_t targetLength) const;
1583
1584#endif
1585
1586#if !UCONFIG_NO_CONVERSION
1587
1613 inline int32_t extract(int32_t start,
1614 int32_t startLength,
1615 char *target,
1616 const char *codepage = 0) const;
1617
1647 int32_t extract(int32_t start,
1648 int32_t startLength,
1649 char *target,
1650 uint32_t targetLength,
1651 const char *codepage) const;
1652
1670 int32_t extract(char *dest, int32_t destCapacity,
1671 UConverter *cnv,
1672 UErrorCode &errorCode) const;
1673
1674#endif
1675
1689 UnicodeString tempSubString(int32_t start=0, int32_t length=INT32_MAX) const;
1690
1701 inline UnicodeString tempSubStringBetween(int32_t start, int32_t limit=INT32_MAX) const;
1702
1714 void toUTF8(ByteSink &sink) const;
1715
1728 template<typename StringClass>
1729 StringClass &toUTF8String(StringClass &result) const {
1730 StringByteSink<StringClass> sbs(&result, length());
1731 toUTF8(sbs);
1732 return result;
1733 }
1734
1750 int32_t toUTF32(UChar32 *utf32, int32_t capacity, UErrorCode &errorCode) const;
1751
1752 /* Length operations */
1753
1762 inline int32_t length(void) const;
1763
1777 int32_t
1778 countChar32(int32_t start=0, int32_t length=INT32_MAX) const;
1779
1803 UBool
1804 hasMoreChar32Than(int32_t start, int32_t length, int32_t number) const;
1805
1811 inline UBool isEmpty(void) const;
1812
1822 inline int32_t getCapacity(void) const;
1823
1824 /* Other operations */
1825
1831 inline int32_t hashCode(void) const;
1832
1845 inline UBool isBogus(void) const;
1846
1847
1848 //========================================
1849 // Write operations
1850 //========================================
1851
1852 /* Assignment operations */
1853
1873
1900
1910
1916 void swap(UnicodeString &other) noexcept;
1917
1924 friend inline void U_EXPORT2
1925 swap(UnicodeString &s1, UnicodeString &s2) noexcept {
1926 s1.swap(s2);
1927 }
1928
1936 inline UnicodeString& operator= (char16_t ch);
1937
1945 inline UnicodeString& operator= (UChar32 ch);
1946
1958 inline UnicodeString& setTo(const UnicodeString& srcText,
1959 int32_t srcStart);
1960
1974 inline UnicodeString& setTo(const UnicodeString& srcText,
1975 int32_t srcStart,
1976 int32_t srcLength);
1977
1986 inline UnicodeString& setTo(const UnicodeString& srcText);
1987
1996 inline UnicodeString& setTo(const char16_t *srcChars,
1997 int32_t srcLength);
1998
2007 inline UnicodeString& setTo(char16_t srcChar);
2008
2017 inline UnicodeString& setTo(UChar32 srcChar);
2018
2043 ConstChar16Ptr text,
2044 int32_t textLength);
2045
2065 UnicodeString &setTo(char16_t *buffer,
2066 int32_t buffLength,
2067 int32_t buffCapacity);
2068
2109
2117 UnicodeString& setCharAt(int32_t offset,
2118 char16_t ch);
2119
2120
2121 /* Append operations */
2122
2130 inline UnicodeString& operator+= (char16_t ch);
2131
2139 inline UnicodeString& operator+= (UChar32 ch);
2140
2148 inline UnicodeString& operator+= (const UnicodeString& srcText);
2149
2164 inline UnicodeString& append(const UnicodeString& srcText,
2165 int32_t srcStart,
2166 int32_t srcLength);
2167
2175 inline UnicodeString& append(const UnicodeString& srcText);
2176
2190 inline UnicodeString& append(const char16_t *srcChars,
2191 int32_t srcStart,
2192 int32_t srcLength);
2193
2203 inline UnicodeString& append(ConstChar16Ptr srcChars,
2204 int32_t srcLength);
2205
2212 inline UnicodeString& append(char16_t srcChar);
2213
2221
2222
2223 /* Insert operations */
2224
2238 inline UnicodeString& insert(int32_t start,
2239 const UnicodeString& srcText,
2240 int32_t srcStart,
2241 int32_t srcLength);
2242
2251 inline UnicodeString& insert(int32_t start,
2252 const UnicodeString& srcText);
2253
2267 inline UnicodeString& insert(int32_t start,
2268 const char16_t *srcChars,
2269 int32_t srcStart,
2270 int32_t srcLength);
2271
2281 inline UnicodeString& insert(int32_t start,
2282 ConstChar16Ptr srcChars,
2283 int32_t srcLength);
2284
2293 inline UnicodeString& insert(int32_t start,
2294 char16_t srcChar);
2295
2304 inline UnicodeString& insert(int32_t start,
2305 UChar32 srcChar);
2306
2307
2308 /* Replace operations */
2309
2327 inline UnicodeString& replace(int32_t start,
2328 int32_t length,
2329 const UnicodeString& srcText,
2330 int32_t srcStart,
2331 int32_t srcLength);
2332
2345 inline UnicodeString& replace(int32_t start,
2346 int32_t length,
2347 const UnicodeString& srcText);
2348
2366 inline UnicodeString& replace(int32_t start,
2367 int32_t length,
2368 const char16_t *srcChars,
2369 int32_t srcStart,
2370 int32_t srcLength);
2371
2384 inline UnicodeString& replace(int32_t start,
2385 int32_t length,
2386 ConstChar16Ptr srcChars,
2387 int32_t srcLength);
2388
2400 inline UnicodeString& replace(int32_t start,
2401 int32_t length,
2402 char16_t srcChar);
2403
2415 UnicodeString& replace(int32_t start, int32_t length, UChar32 srcChar);
2416
2426 inline UnicodeString& replaceBetween(int32_t start,
2427 int32_t limit,
2428 const UnicodeString& srcText);
2429
2444 inline UnicodeString& replaceBetween(int32_t start,
2445 int32_t limit,
2446 const UnicodeString& srcText,
2447 int32_t srcStart,
2448 int32_t srcLimit);
2449
2457 virtual void handleReplaceBetween(int32_t start,
2458 int32_t limit,
2459 const UnicodeString& text) override;
2460
2466 virtual UBool hasMetaData() const override;
2467
2481 virtual void copy(int32_t start, int32_t limit, int32_t dest) override;
2482
2483 /* Search and replace operations */
2484
2493 inline UnicodeString& findAndReplace(const UnicodeString& oldText,
2494 const UnicodeString& newText);
2495
2507 inline UnicodeString& findAndReplace(int32_t start,
2508 int32_t length,
2509 const UnicodeString& oldText,
2510 const UnicodeString& newText);
2511
2530 int32_t length,
2531 const UnicodeString& oldText,
2532 int32_t oldStart,
2533 int32_t oldLength,
2534 const UnicodeString& newText,
2535 int32_t newStart,
2536 int32_t newLength);
2537
2538
2539 /* Remove operations */
2540
2549 inline UnicodeString& remove();
2550
2559 inline UnicodeString& remove(int32_t start,
2560 int32_t length = (int32_t)INT32_MAX);
2561
2570 inline UnicodeString& removeBetween(int32_t start,
2571 int32_t limit = (int32_t)INT32_MAX);
2572
2582 inline UnicodeString &retainBetween(int32_t start, int32_t limit = INT32_MAX);
2583
2584 /* Length operations */
2585
2597 UBool padLeading(int32_t targetLength,
2598 char16_t padChar = 0x0020);
2599
2611 UBool padTrailing(int32_t targetLength,
2612 char16_t padChar = 0x0020);
2613
2620 inline UBool truncate(int32_t targetLength);
2621
2628
2629
2630 /* Miscellaneous operations */
2631
2637 inline UnicodeString& reverse(void);
2638
2647 inline UnicodeString& reverse(int32_t start,
2648 int32_t length);
2649
2657
2666
2674
2683
2684#if !UCONFIG_NO_BREAK_ITERATION
2685
2713
2741 UnicodeString &toTitle(BreakIterator *titleIter, const Locale &locale);
2742
2773 UnicodeString &toTitle(BreakIterator *titleIter, const Locale &locale, uint32_t options);
2774
2775#endif
2776
2790 UnicodeString &foldCase(uint32_t options=0 /*U_FOLD_CASE_DEFAULT*/);
2791
2792 //========================================
2793 // Access to the internal buffer
2794 //========================================
2795
2839 char16_t *getBuffer(int32_t minCapacity);
2840
2861 void releaseBuffer(int32_t newLength=-1);
2862
2893 inline const char16_t *getBuffer() const;
2894
2928 const char16_t *getTerminatedBuffer();
2929
2930 //========================================
2931 // Constructors
2932 //========================================
2933
2937 inline UnicodeString();
2938
2950 UnicodeString(int32_t capacity, UChar32 c, int32_t count);
2951
2962
2973
2985
2986#if !U_CHAR16_IS_TYPEDEF
2999#endif
3000
3001#if U_SIZEOF_WCHAR_T==2 || defined(U_IN_DOXYGEN)
3015#endif
3016
3027 UNISTR_FROM_STRING_EXPLICIT inline UnicodeString(const std::nullptr_t text);
3028
3036 UnicodeString(const char16_t *text,
3037 int32_t textLength);
3038
3039#if !U_CHAR16_IS_TYPEDEF
3047 UnicodeString(const uint16_t *text, int32_t textLength) :
3048 UnicodeString(ConstChar16Ptr(text), textLength) {}
3049#endif
3050
3051#if U_SIZEOF_WCHAR_T==2 || defined(U_IN_DOXYGEN)
3060 UnicodeString(const wchar_t *text, int32_t textLength) :
3061 UnicodeString(ConstChar16Ptr(text), textLength) {}
3062#endif
3063
3071 inline UnicodeString(const std::nullptr_t text, int32_t textLength);
3072
3095 UnicodeString(UBool isTerminated,
3096 ConstChar16Ptr text,
3097 int32_t textLength);
3098
3117 UnicodeString(char16_t *buffer, int32_t buffLength, int32_t buffCapacity);
3118
3119#if !U_CHAR16_IS_TYPEDEF
3128 UnicodeString(uint16_t *buffer, int32_t buffLength, int32_t buffCapacity) :
3129 UnicodeString(Char16Ptr(buffer), buffLength, buffCapacity) {}
3130#endif
3131
3132#if U_SIZEOF_WCHAR_T==2 || defined(U_IN_DOXYGEN)
3142 UnicodeString(wchar_t *buffer, int32_t buffLength, int32_t buffCapacity) :
3143 UnicodeString(Char16Ptr(buffer), buffLength, buffCapacity) {}
3144#endif
3145
3154 inline UnicodeString(std::nullptr_t buffer, int32_t buffLength, int32_t buffCapacity);
3155
3156#if U_CHARSET_IS_UTF8 || !UCONFIG_NO_CONVERSION
3157
3178
3187 UnicodeString(const char *codepageData, int32_t dataLength);
3188
3189#endif
3190
3191#if !UCONFIG_NO_CONVERSION
3192
3210 UnicodeString(const char *codepageData, const char *codepage);
3211
3229 UnicodeString(const char *codepageData, int32_t dataLength, const char *codepage);
3230
3253 const char *src, int32_t srcLength,
3254 UConverter *cnv,
3255 UErrorCode &errorCode);
3256
3257#endif
3258
3282 UnicodeString(const char *src, int32_t textLength, enum EInvariant inv);
3283
3284
3302
3310
3317 UnicodeString(const UnicodeString& src, int32_t srcStart);
3318
3326 UnicodeString(const UnicodeString& src, int32_t srcStart, int32_t srcLength);
3327
3341 virtual UnicodeString *clone() const override;
3342
3347
3362
3374 static UnicodeString fromUTF32(const UChar32 *utf32, int32_t length);
3375
3376 /* Miscellaneous operations */
3377
3413
3433 UChar32 unescapeAt(int32_t &offset) const;
3434
3440 static UClassID U_EXPORT2 getStaticClassID();
3441
3447 virtual UClassID getDynamicClassID() const override;
3448
3449 //========================================
3450 // Implementation methods
3451 //========================================
3452
3453protected:
3458 virtual int32_t getLength() const override;
3459
3465 virtual char16_t getCharAt(int32_t offset) const override;
3466
3472 virtual UChar32 getChar32At(int32_t offset) const override;
3473
3474private:
3475 // For char* constructors. Could be made public.
3476 UnicodeString &setToUTF8(StringPiece utf8);
3477 // For extract(char*).
3478 // We could make a toUTF8(target, capacity, errorCode) public but not
3479 // this version: New API will be cleaner if we make callers create substrings
3480 // rather than having start+length on every method,
3481 // and it should take a UErrorCode&.
3482 int32_t
3483 toUTF8(int32_t start, int32_t len,
3484 char *target, int32_t capacity) const;
3485
3490 UBool doEquals(const UnicodeString &text, int32_t len) const;
3491
3492 inline UBool
3493 doEqualsSubstring(int32_t start,
3494 int32_t length,
3495 const UnicodeString& srcText,
3496 int32_t srcStart,
3497 int32_t srcLength) const;
3498
3499 UBool doEqualsSubstring(int32_t start,
3500 int32_t length,
3501 const char16_t *srcChars,
3502 int32_t srcStart,
3503 int32_t srcLength) const;
3504
3505 inline int8_t
3506 doCompare(int32_t start,
3507 int32_t length,
3508 const UnicodeString& srcText,
3509 int32_t srcStart,
3510 int32_t srcLength) const;
3511
3512 int8_t doCompare(int32_t start,
3513 int32_t length,
3514 const char16_t *srcChars,
3515 int32_t srcStart,
3516 int32_t srcLength) const;
3517
3518 inline int8_t
3519 doCompareCodePointOrder(int32_t start,
3520 int32_t length,
3521 const UnicodeString& srcText,
3522 int32_t srcStart,
3523 int32_t srcLength) const;
3524
3525 int8_t doCompareCodePointOrder(int32_t start,
3526 int32_t length,
3527 const char16_t *srcChars,
3528 int32_t srcStart,
3529 int32_t srcLength) const;
3530
3531 inline int8_t
3532 doCaseCompare(int32_t start,
3533 int32_t length,
3534 const UnicodeString &srcText,
3535 int32_t srcStart,
3536 int32_t srcLength,
3537 uint32_t options) const;
3538
3539 int8_t
3540 doCaseCompare(int32_t start,
3541 int32_t length,
3542 const char16_t *srcChars,
3543 int32_t srcStart,
3544 int32_t srcLength,
3545 uint32_t options) const;
3546
3547 int32_t doIndexOf(char16_t c,
3548 int32_t start,
3549 int32_t length) const;
3550
3551 int32_t doIndexOf(UChar32 c,
3552 int32_t start,
3553 int32_t length) const;
3554
3555 int32_t doLastIndexOf(char16_t c,
3556 int32_t start,
3557 int32_t length) const;
3558
3559 int32_t doLastIndexOf(UChar32 c,
3560 int32_t start,
3561 int32_t length) const;
3562
3563 void doExtract(int32_t start,
3564 int32_t length,
3565 char16_t *dst,
3566 int32_t dstStart) const;
3567
3568 inline void doExtract(int32_t start,
3569 int32_t length,
3570 UnicodeString& target) const;
3571
3572 inline char16_t doCharAt(int32_t offset) const;
3573
3574 UnicodeString& doReplace(int32_t start,
3575 int32_t length,
3576 const UnicodeString& srcText,
3577 int32_t srcStart,
3578 int32_t srcLength);
3579
3580 UnicodeString& doReplace(int32_t start,
3581 int32_t length,
3582 const char16_t *srcChars,
3583 int32_t srcStart,
3584 int32_t srcLength);
3585
3586 UnicodeString& doAppend(const UnicodeString& src, int32_t srcStart, int32_t srcLength);
3587 UnicodeString& doAppend(const char16_t *srcChars, int32_t srcStart, int32_t srcLength);
3588
3589 UnicodeString& doReverse(int32_t start,
3590 int32_t length);
3591
3592 // calculate hash code
3593 int32_t doHashCode(void) const;
3594
3595 // get pointer to start of array
3596 // these do not check for kOpenGetBuffer, unlike the public getBuffer() function
3597 inline char16_t* getArrayStart(void);
3598 inline const char16_t* getArrayStart(void) const;
3599
3600 inline UBool hasShortLength() const;
3601 inline int32_t getShortLength() const;
3602
3603 // A UnicodeString object (not necessarily its current buffer)
3604 // is writable unless it isBogus() or it has an "open" getBuffer(minCapacity).
3605 inline UBool isWritable() const;
3606
3607 // Is the current buffer writable?
3608 inline UBool isBufferWritable() const;
3609
3610 // None of the following does releaseArray().
3611 inline void setZeroLength();
3612 inline void setShortLength(int32_t len);
3613 inline void setLength(int32_t len);
3614 inline void setToEmpty();
3615 inline void setArray(char16_t *array, int32_t len, int32_t capacity); // sets length but not flags
3616
3617 // allocate the array; result may be the stack buffer
3618 // sets refCount to 1 if appropriate
3619 // sets fArray, fCapacity, and flags
3620 // sets length to 0
3621 // returns boolean for success or failure
3622 UBool allocate(int32_t capacity);
3623
3624 // release the array if owned
3625 void releaseArray(void);
3626
3627 // turn a bogus string into an empty one
3628 void unBogus();
3629
3630 // implements assignment operator, copy constructor, and fastCopyFrom()
3631 UnicodeString &copyFrom(const UnicodeString &src, UBool fastCopy=false);
3632
3633 // Copies just the fields without memory management.
3634 void copyFieldsFrom(UnicodeString &src, UBool setSrcToBogus) noexcept;
3635
3636 // Pin start and limit to acceptable values.
3637 inline void pinIndex(int32_t& start) const;
3638 inline void pinIndices(int32_t& start,
3639 int32_t& length) const;
3640
3641#if !UCONFIG_NO_CONVERSION
3642
3643 /* Internal extract() using UConverter. */
3644 int32_t doExtract(int32_t start, int32_t length,
3645 char *dest, int32_t destCapacity,
3646 UConverter *cnv,
3647 UErrorCode &errorCode) const;
3648
3649 /*
3650 * Real constructor for converting from codepage data.
3651 * It assumes that it is called with !fRefCounted.
3652 *
3653 * If `codepage==0`, then the default converter
3654 * is used for the platform encoding.
3655 * If `codepage` is an empty string (`""`),
3656 * then a simple conversion is performed on the codepage-invariant
3657 * subset ("invariant characters") of the platform encoding. See utypes.h.
3658 */
3659 void doCodepageCreate(const char *codepageData,
3660 int32_t dataLength,
3661 const char *codepage);
3662
3663 /*
3664 * Worker function for creating a UnicodeString from
3665 * a codepage string using a UConverter.
3666 */
3667 void
3668 doCodepageCreate(const char *codepageData,
3669 int32_t dataLength,
3670 UConverter *converter,
3671 UErrorCode &status);
3672
3673#endif
3674
3675 /*
3676 * This function is called when write access to the array
3677 * is necessary.
3678 *
3679 * We need to make a copy of the array if
3680 * the buffer is read-only, or
3681 * the buffer is refCounted (shared), and refCount>1, or
3682 * the buffer is too small.
3683 *
3684 * Return false if memory could not be allocated.
3685 */
3686 UBool cloneArrayIfNeeded(int32_t newCapacity = -1,
3687 int32_t growCapacity = -1,
3688 UBool doCopyArray = true,
3689 int32_t **pBufferToDelete = 0,
3690 UBool forceClone = false);
3691
3698 caseMap(int32_t caseLocale, uint32_t options,
3700 BreakIterator *iter,
3701#endif
3702 UStringCaseMapper *stringCaseMapper);
3703
3704 // ref counting
3705 void addRef(void);
3706 int32_t removeRef(void);
3707 int32_t refCount(void) const;
3708
3709 // constants
3710 enum {
3716 US_STACKBUF_SIZE=(int32_t)(UNISTR_OBJECT_SIZE-sizeof(void *)-2)/U_SIZEOF_UCHAR,
3717 kInvalidUChar=0xffff, // U+FFFF returned by charAt(invalid index)
3718 kInvalidHashCode=0, // invalid hash code
3719 kEmptyHashCode=1, // hash code for empty string
3720
3721 // bit flag values for fLengthAndFlags
3722 kIsBogus=1, // this string is bogus, i.e., not valid or nullptr
3723 kUsingStackBuffer=2,// using fUnion.fStackFields instead of fUnion.fFields
3724 kRefCounted=4, // there is a refCount field before the characters in fArray
3725 kBufferIsReadonly=8,// do not write to this buffer
3726 kOpenGetBuffer=16, // getBuffer(minCapacity) was called (is "open"),
3727 // and releaseBuffer(newLength) must be called
3728 kAllStorageFlags=0x1f,
3729
3730 kLengthShift=5, // remaining 11 bits for non-negative short length, or negative if long
3731 kLength1=1<<kLengthShift,
3732 kMaxShortLength=0x3ff, // max non-negative short length (leaves top bit 0)
3733 kLengthIsLarge=0xffe0, // short length < 0, real length is in fUnion.fFields.fLength
3734
3735 // combined values for convenience
3736 kShortString=kUsingStackBuffer,
3737 kLongString=kRefCounted,
3738 kReadonlyAlias=kBufferIsReadonly,
3739 kWritableAlias=0
3740 };
3741
3742 friend class UnicodeStringAppendable;
3743
3744 union StackBufferOrFields; // forward declaration necessary before friend declaration
3745 friend union StackBufferOrFields; // make US_STACKBUF_SIZE visible inside fUnion
3746
3747 /*
3748 * The following are all the class fields that are stored
3749 * in each UnicodeString object.
3750 * Note that UnicodeString has virtual functions,
3751 * therefore there is an implicit vtable pointer
3752 * as the first real field.
3753 * The fields should be aligned such that no padding is necessary.
3754 * On 32-bit machines, the size should be 32 bytes,
3755 * on 64-bit machines (8-byte pointers), it should be 40 bytes.
3756 *
3757 * We use a hack to achieve this.
3758 *
3759 * With at least some compilers, each of the following is forced to
3760 * a multiple of sizeof(pointer) [the largest field base unit here is a data pointer],
3761 * rounded up with additional padding if the fields do not already fit that requirement:
3762 * - sizeof(class UnicodeString)
3763 * - offsetof(UnicodeString, fUnion)
3764 * - sizeof(fUnion)
3765 * - sizeof(fStackFields)
3766 *
3767 * We optimize for the longest possible internal buffer for short strings.
3768 * fUnion.fStackFields begins with 2 bytes for storage flags
3769 * and the length of relatively short strings,
3770 * followed by the buffer for short string contents.
3771 * There is no padding inside fStackFields.
3772 *
3773 * Heap-allocated and aliased strings use fUnion.fFields.
3774 * Both fStackFields and fFields must begin with the same fields for flags and short length,
3775 * that is, those must have the same memory offsets inside the object,
3776 * because the flags must be inspected in order to decide which half of fUnion is being used.
3777 * We assume that the compiler does not reorder the fields.
3778 *
3779 * (Padding at the end of fFields is ok:
3780 * As long as it is no larger than fStackFields, it is not wasted space.)
3781 *
3782 * For some of the history of the UnicodeString class fields layout, see
3783 * - ICU ticket #11551 "longer UnicodeString contents in stack buffer"
3784 * - ICU ticket #11336 "UnicodeString: recombine stack buffer arrays"
3785 * - ICU ticket #8322 "why is sizeof(UnicodeString)==48?"
3786 */
3787 // (implicit) *vtable;
3788 union StackBufferOrFields {
3789 // fStackFields is used iff (fLengthAndFlags&kUsingStackBuffer) else fFields is used.
3790 // Each struct of the union must begin with fLengthAndFlags.
3791 struct {
3792 int16_t fLengthAndFlags; // bit fields: see constants above
3793 char16_t fBuffer[US_STACKBUF_SIZE]; // buffer for short strings
3794 } fStackFields;
3795 struct {
3796 int16_t fLengthAndFlags; // bit fields: see constants above
3797 int32_t fLength; // number of characters in fArray if >127; else undefined
3798 int32_t fCapacity; // capacity of fArray (in char16_ts)
3799 // array pointer last to minimize padding for machines with P128 data model
3800 // or pointer sizes that are not a power of 2
3801 char16_t *fArray; // the Unicode data
3802 } fFields;
3803 } fUnion;
3804};
3805
3814U_COMMON_API UnicodeString U_EXPORT2
3816
3817//========================================
3818// Inline members
3819//========================================
3820
3821//========================================
3822// Privates
3823//========================================
3824
3825inline void
3826UnicodeString::pinIndex(int32_t& start) const
3827{
3828 // pin index
3829 if(start < 0) {
3830 start = 0;
3831 } else if(start > length()) {
3832 start = length();
3833 }
3834}
3835
3836inline void
3837UnicodeString::pinIndices(int32_t& start,
3838 int32_t& _length) const
3839{
3840 // pin indices
3841 int32_t len = length();
3842 if(start < 0) {
3843 start = 0;
3844 } else if(start > len) {
3845 start = len;
3846 }
3847 if(_length < 0) {
3848 _length = 0;
3849 } else if(_length > (len - start)) {
3850 _length = (len - start);
3851 }
3852}
3853
3854inline char16_t*
3855UnicodeString::getArrayStart() {
3856 return (fUnion.fFields.fLengthAndFlags&kUsingStackBuffer) ?
3857 fUnion.fStackFields.fBuffer : fUnion.fFields.fArray;
3858}
3859
3860inline const char16_t*
3861UnicodeString::getArrayStart() const {
3862 return (fUnion.fFields.fLengthAndFlags&kUsingStackBuffer) ?
3863 fUnion.fStackFields.fBuffer : fUnion.fFields.fArray;
3864}
3865
3866//========================================
3867// Default constructor
3868//========================================
3869
3870inline
3871UnicodeString::UnicodeString() {
3872 fUnion.fStackFields.fLengthAndFlags=kShortString;
3873}
3874
3875inline UnicodeString::UnicodeString(const std::nullptr_t /*text*/) {
3876 fUnion.fStackFields.fLengthAndFlags=kShortString;
3877}
3878
3879inline UnicodeString::UnicodeString(const std::nullptr_t /*text*/, int32_t /*length*/) {
3880 fUnion.fStackFields.fLengthAndFlags=kShortString;
3881}
3882
3883inline UnicodeString::UnicodeString(std::nullptr_t /*buffer*/, int32_t /*buffLength*/, int32_t /*buffCapacity*/) {
3884 fUnion.fStackFields.fLengthAndFlags=kShortString;
3885}
3886
3887//========================================
3888// Read-only implementation methods
3889//========================================
3890inline UBool
3891UnicodeString::hasShortLength() const {
3892 return fUnion.fFields.fLengthAndFlags>=0;
3893}
3894
3895inline int32_t
3896UnicodeString::getShortLength() const {
3897 // fLengthAndFlags must be non-negative -> short length >= 0
3898 // and arithmetic or logical shift does not matter.
3899 return fUnion.fFields.fLengthAndFlags>>kLengthShift;
3900}
3901
3902inline int32_t
3903UnicodeString::length() const {
3904 return hasShortLength() ? getShortLength() : fUnion.fFields.fLength;
3905}
3906
3907inline int32_t
3908UnicodeString::getCapacity() const {
3909 return (fUnion.fFields.fLengthAndFlags&kUsingStackBuffer) ?
3910 US_STACKBUF_SIZE : fUnion.fFields.fCapacity;
3911}
3912
3913inline int32_t
3914UnicodeString::hashCode() const
3915{ return doHashCode(); }
3916
3917inline UBool
3918UnicodeString::isBogus() const
3919{ return (UBool)(fUnion.fFields.fLengthAndFlags & kIsBogus); }
3920
3921inline UBool
3922UnicodeString::isWritable() const
3923{ return (UBool)!(fUnion.fFields.fLengthAndFlags&(kOpenGetBuffer|kIsBogus)); }
3924
3925inline UBool
3926UnicodeString::isBufferWritable() const
3927{
3928 return (UBool)(
3929 !(fUnion.fFields.fLengthAndFlags&(kOpenGetBuffer|kIsBogus|kBufferIsReadonly)) &&
3930 (!(fUnion.fFields.fLengthAndFlags&kRefCounted) || refCount()==1));
3931}
3932
3933inline const char16_t *
3934UnicodeString::getBuffer() const {
3935 if(fUnion.fFields.fLengthAndFlags&(kIsBogus|kOpenGetBuffer)) {
3936 return nullptr;
3937 } else if(fUnion.fFields.fLengthAndFlags&kUsingStackBuffer) {
3938 return fUnion.fStackFields.fBuffer;
3939 } else {
3940 return fUnion.fFields.fArray;
3941 }
3942}
3943
3944//========================================
3945// Read-only alias methods
3946//========================================
3947inline int8_t
3948UnicodeString::doCompare(int32_t start,
3949 int32_t thisLength,
3950 const UnicodeString& srcText,
3951 int32_t srcStart,
3952 int32_t srcLength) const
3953{
3954 if(srcText.isBogus()) {
3955 return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise
3956 } else {
3957 srcText.pinIndices(srcStart, srcLength);
3958 return doCompare(start, thisLength, srcText.getArrayStart(), srcStart, srcLength);
3959 }
3960}
3961
3962inline UBool
3963UnicodeString::doEqualsSubstring(int32_t start,
3964 int32_t thisLength,
3965 const UnicodeString& srcText,
3966 int32_t srcStart,
3967 int32_t srcLength) const
3968{
3969 if(srcText.isBogus()) {
3970 return isBogus();
3971 } else {
3972 srcText.pinIndices(srcStart, srcLength);
3973 return !isBogus() && doEqualsSubstring(start, thisLength, srcText.getArrayStart(), srcStart, srcLength);
3974 }
3975}
3976
3977inline bool
3979{
3980 if(isBogus()) {
3981 return text.isBogus();
3982 } else {
3983 int32_t len = length(), textLength = text.length();
3984 return !text.isBogus() && len == textLength && doEquals(text, len);
3985 }
3986}
3987
3988inline bool
3990{ return (! operator==(text)); }
3991
3992inline UBool
3993UnicodeString::operator> (const UnicodeString& text) const
3994{ return doCompare(0, length(), text, 0, text.length()) == 1; }
3995
3996inline UBool
3997UnicodeString::operator< (const UnicodeString& text) const
3998{ return doCompare(0, length(), text, 0, text.length()) == -1; }
3999
4000inline UBool
4001UnicodeString::operator>= (const UnicodeString& text) const
4002{ return doCompare(0, length(), text, 0, text.length()) != -1; }
4003
4004inline UBool
4005UnicodeString::operator<= (const UnicodeString& text) const
4006{ return doCompare(0, length(), text, 0, text.length()) != 1; }
4007
4008inline int8_t
4009UnicodeString::compare(const UnicodeString& text) const
4010{ return doCompare(0, length(), text, 0, text.length()); }
4011
4012inline int8_t
4013UnicodeString::compare(int32_t start,
4014 int32_t _length,
4015 const UnicodeString& srcText) const
4016{ return doCompare(start, _length, srcText, 0, srcText.length()); }
4017
4018inline int8_t
4019UnicodeString::compare(ConstChar16Ptr srcChars,
4020 int32_t srcLength) const
4021{ return doCompare(0, length(), srcChars, 0, srcLength); }
4022
4023inline int8_t
4024UnicodeString::compare(int32_t start,
4025 int32_t _length,
4026 const UnicodeString& srcText,
4027 int32_t srcStart,
4028 int32_t srcLength) const
4029{ return doCompare(start, _length, srcText, srcStart, srcLength); }
4030
4031inline int8_t
4032UnicodeString::compare(int32_t start,
4033 int32_t _length,
4034 const char16_t *srcChars) const
4035{ return doCompare(start, _length, srcChars, 0, _length); }
4036
4037inline int8_t
4038UnicodeString::compare(int32_t start,
4039 int32_t _length,
4040 const char16_t *srcChars,
4041 int32_t srcStart,
4042 int32_t srcLength) const
4043{ return doCompare(start, _length, srcChars, srcStart, srcLength); }
4044
4045inline int8_t
4046UnicodeString::compareBetween(int32_t start,
4047 int32_t limit,
4048 const UnicodeString& srcText,
4049 int32_t srcStart,
4050 int32_t srcLimit) const
4051{ return doCompare(start, limit - start,
4052 srcText, srcStart, srcLimit - srcStart); }
4053
4054inline int8_t
4055UnicodeString::doCompareCodePointOrder(int32_t start,
4056 int32_t thisLength,
4057 const UnicodeString& srcText,
4058 int32_t srcStart,
4059 int32_t srcLength) const
4060{
4061 if(srcText.isBogus()) {
4062 return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise
4063 } else {
4064 srcText.pinIndices(srcStart, srcLength);
4065 return doCompareCodePointOrder(start, thisLength, srcText.getArrayStart(), srcStart, srcLength);
4066 }
4067}
4068
4069inline int8_t
4070UnicodeString::compareCodePointOrder(const UnicodeString& text) const
4071{ return doCompareCodePointOrder(0, length(), text, 0, text.length()); }
4072
4073inline int8_t
4074UnicodeString::compareCodePointOrder(int32_t start,
4075 int32_t _length,
4076 const UnicodeString& srcText) const
4077{ return doCompareCodePointOrder(start, _length, srcText, 0, srcText.length()); }
4078
4079inline int8_t
4080UnicodeString::compareCodePointOrder(ConstChar16Ptr srcChars,
4081 int32_t srcLength) const
4082{ return doCompareCodePointOrder(0, length(), srcChars, 0, srcLength); }
4083
4084inline int8_t
4085UnicodeString::compareCodePointOrder(int32_t start,
4086 int32_t _length,
4087 const UnicodeString& srcText,
4088 int32_t srcStart,
4089 int32_t srcLength) const
4090{ return doCompareCodePointOrder(start, _length, srcText, srcStart, srcLength); }
4091
4092inline int8_t
4093UnicodeString::compareCodePointOrder(int32_t start,
4094 int32_t _length,
4095 const char16_t *srcChars) const
4096{ return doCompareCodePointOrder(start, _length, srcChars, 0, _length); }
4097
4098inline int8_t
4099UnicodeString::compareCodePointOrder(int32_t start,
4100 int32_t _length,
4101 const char16_t *srcChars,
4102 int32_t srcStart,
4103 int32_t srcLength) const
4104{ return doCompareCodePointOrder(start, _length, srcChars, srcStart, srcLength); }
4105
4106inline int8_t
4107UnicodeString::compareCodePointOrderBetween(int32_t start,
4108 int32_t limit,
4109 const UnicodeString& srcText,
4110 int32_t srcStart,
4111 int32_t srcLimit) const
4112{ return doCompareCodePointOrder(start, limit - start,
4113 srcText, srcStart, srcLimit - srcStart); }
4114
4115inline int8_t
4116UnicodeString::doCaseCompare(int32_t start,
4117 int32_t thisLength,
4118 const UnicodeString &srcText,
4119 int32_t srcStart,
4120 int32_t srcLength,
4121 uint32_t options) const
4122{
4123 if(srcText.isBogus()) {
4124 return (int8_t)!isBogus(); // 0 if both are bogus, 1 otherwise
4125 } else {
4126 srcText.pinIndices(srcStart, srcLength);
4127 return doCaseCompare(start, thisLength, srcText.getArrayStart(), srcStart, srcLength, options);
4128 }
4129}
4130
4131inline int8_t
4132UnicodeString::caseCompare(const UnicodeString &text, uint32_t options) const {
4133 return doCaseCompare(0, length(), text, 0, text.length(), options);
4134}
4135
4136inline int8_t
4137UnicodeString::caseCompare(int32_t start,
4138 int32_t _length,
4139 const UnicodeString &srcText,
4140 uint32_t options) const {
4141 return doCaseCompare(start, _length, srcText, 0, srcText.length(), options);
4142}
4143
4144inline int8_t
4145UnicodeString::caseCompare(ConstChar16Ptr srcChars,
4146 int32_t srcLength,
4147 uint32_t options) const {
4148 return doCaseCompare(0, length(), srcChars, 0, srcLength, options);
4149}
4150
4151inline int8_t
4152UnicodeString::caseCompare(int32_t start,
4153 int32_t _length,
4154 const UnicodeString &srcText,
4155 int32_t srcStart,
4156 int32_t srcLength,
4157 uint32_t options) const {
4158 return doCaseCompare(start, _length, srcText, srcStart, srcLength, options);
4159}
4160
4161inline int8_t
4162UnicodeString::caseCompare(int32_t start,
4163 int32_t _length,
4164 const char16_t *srcChars,
4165 uint32_t options) const {
4166 return doCaseCompare(start, _length, srcChars, 0, _length, options);
4167}
4168
4169inline int8_t
4170UnicodeString::caseCompare(int32_t start,
4171 int32_t _length,
4172 const char16_t *srcChars,
4173 int32_t srcStart,
4174 int32_t srcLength,
4175 uint32_t options) const {
4176 return doCaseCompare(start, _length, srcChars, srcStart, srcLength, options);
4177}
4178
4179inline int8_t
4180UnicodeString::caseCompareBetween(int32_t start,
4181 int32_t limit,
4182 const UnicodeString &srcText,
4183 int32_t srcStart,
4184 int32_t srcLimit,
4185 uint32_t options) const {
4186 return doCaseCompare(start, limit - start, srcText, srcStart, srcLimit - srcStart, options);
4187}
4188
4189inline int32_t
4190UnicodeString::indexOf(const UnicodeString& srcText,
4191 int32_t srcStart,
4192 int32_t srcLength,
4193 int32_t start,
4194 int32_t _length) const
4195{
4196 if(!srcText.isBogus()) {
4197 srcText.pinIndices(srcStart, srcLength);
4198 if(srcLength > 0) {
4199 return indexOf(srcText.getArrayStart(), srcStart, srcLength, start, _length);
4200 }
4201 }
4202 return -1;
4203}
4204
4205inline int32_t
4206UnicodeString::indexOf(const UnicodeString& text) const
4207{ return indexOf(text, 0, text.length(), 0, length()); }
4208
4209inline int32_t
4210UnicodeString::indexOf(const UnicodeString& text,
4211 int32_t start) const {
4212 pinIndex(start);
4213 return indexOf(text, 0, text.length(), start, length() - start);
4214}
4215
4216inline int32_t
4217UnicodeString::indexOf(const UnicodeString& text,
4218 int32_t start,
4219 int32_t _length) const
4220{ return indexOf(text, 0, text.length(), start, _length); }
4221
4222inline int32_t
4223UnicodeString::indexOf(const char16_t *srcChars,
4224 int32_t srcLength,
4225 int32_t start) const {
4226 pinIndex(start);
4227 return indexOf(srcChars, 0, srcLength, start, length() - start);
4228}
4229
4230inline int32_t
4231UnicodeString::indexOf(ConstChar16Ptr srcChars,
4232 int32_t srcLength,
4233 int32_t start,
4234 int32_t _length) const
4235{ return indexOf(srcChars, 0, srcLength, start, _length); }
4236
4237inline int32_t
4238UnicodeString::indexOf(char16_t c,
4239 int32_t start,
4240 int32_t _length) const
4241{ return doIndexOf(c, start, _length); }
4242
4243inline int32_t
4244UnicodeString::indexOf(UChar32 c,
4245 int32_t start,
4246 int32_t _length) const
4247{ return doIndexOf(c, start, _length); }
4248
4249inline int32_t
4250UnicodeString::indexOf(char16_t c) const
4251{ return doIndexOf(c, 0, length()); }
4252
4253inline int32_t
4254UnicodeString::indexOf(UChar32 c) const
4255{ return indexOf(c, 0, length()); }
4256
4257inline int32_t
4258UnicodeString::indexOf(char16_t c,
4259 int32_t start) const {
4260 pinIndex(start);
4261 return doIndexOf(c, start, length() - start);
4262}
4263
4264inline int32_t
4265UnicodeString::indexOf(UChar32 c,
4266 int32_t start) const {
4267 pinIndex(start);
4268 return indexOf(c, start, length() - start);
4269}
4270
4271inline int32_t
4272UnicodeString::lastIndexOf(ConstChar16Ptr srcChars,
4273 int32_t srcLength,
4274 int32_t start,
4275 int32_t _length) const
4276{ return lastIndexOf(srcChars, 0, srcLength, start, _length); }
4277
4278inline int32_t
4279UnicodeString::lastIndexOf(const char16_t *srcChars,
4280 int32_t srcLength,
4281 int32_t start) const {
4282 pinIndex(start);
4283 return lastIndexOf(srcChars, 0, srcLength, start, length() - start);
4284}
4285
4286inline int32_t
4287UnicodeString::lastIndexOf(const UnicodeString& srcText,
4288 int32_t srcStart,
4289 int32_t srcLength,
4290 int32_t start,
4291 int32_t _length) const
4292{
4293 if(!srcText.isBogus()) {
4294 srcText.pinIndices(srcStart, srcLength);
4295 if(srcLength > 0) {
4296 return lastIndexOf(srcText.getArrayStart(), srcStart, srcLength, start, _length);
4297 }
4298 }
4299 return -1;
4300}
4301
4302inline int32_t
4303UnicodeString::lastIndexOf(const UnicodeString& text,
4304 int32_t start,
4305 int32_t _length) const
4306{ return lastIndexOf(text, 0, text.length(), start, _length); }
4307
4308inline int32_t
4309UnicodeString::lastIndexOf(const UnicodeString& text,
4310 int32_t start) const {
4311 pinIndex(start);
4312 return lastIndexOf(text, 0, text.length(), start, length() - start);
4313}
4314
4315inline int32_t
4316UnicodeString::lastIndexOf(const UnicodeString& text) const
4317{ return lastIndexOf(text, 0, text.length(), 0, length()); }
4318
4319inline int32_t
4320UnicodeString::lastIndexOf(char16_t c,
4321 int32_t start,
4322 int32_t _length) const
4323{ return doLastIndexOf(c, start, _length); }
4324
4325inline int32_t
4326UnicodeString::lastIndexOf(UChar32 c,
4327 int32_t start,
4328 int32_t _length) const {
4329 return doLastIndexOf(c, start, _length);
4330}
4331
4332inline int32_t
4333UnicodeString::lastIndexOf(char16_t c) const
4334{ return doLastIndexOf(c, 0, length()); }
4335
4336inline int32_t
4337UnicodeString::lastIndexOf(UChar32 c) const {
4338 return lastIndexOf(c, 0, length());
4339}
4340
4341inline int32_t
4342UnicodeString::lastIndexOf(char16_t c,
4343 int32_t start) const {
4344 pinIndex(start);
4345 return doLastIndexOf(c, start, length() - start);
4346}
4347
4348inline int32_t
4349UnicodeString::lastIndexOf(UChar32 c,
4350 int32_t start) const {
4351 pinIndex(start);
4352 return lastIndexOf(c, start, length() - start);
4353}
4354
4355inline UBool
4356UnicodeString::startsWith(const UnicodeString& text) const
4357{ return doEqualsSubstring(0, text.length(), text, 0, text.length()); }
4358
4359inline UBool
4360UnicodeString::startsWith(const UnicodeString& srcText,
4361 int32_t srcStart,
4362 int32_t srcLength) const
4363{ return doEqualsSubstring(0, srcLength, srcText, srcStart, srcLength); }
4364
4365inline UBool
4366UnicodeString::startsWith(ConstChar16Ptr srcChars, int32_t srcLength) const {
4367 if(srcLength < 0) {
4368 srcLength = u_strlen(toUCharPtr(srcChars));
4369 }
4370 return doEqualsSubstring(0, srcLength, srcChars, 0, srcLength);
4371}
4372
4373inline UBool
4374UnicodeString::startsWith(const char16_t *srcChars, int32_t srcStart, int32_t srcLength) const {
4375 if(srcLength < 0) {
4376 srcLength = u_strlen(toUCharPtr(srcChars));
4377 }
4378 return doEqualsSubstring(0, srcLength, srcChars, srcStart, srcLength);
4379}
4380
4381inline UBool
4382UnicodeString::endsWith(const UnicodeString& text) const
4383{ return doEqualsSubstring(length() - text.length(), text.length(),
4384 text, 0, text.length()); }
4385
4386inline UBool
4387UnicodeString::endsWith(const UnicodeString& srcText,
4388 int32_t srcStart,
4389 int32_t srcLength) const {
4390 srcText.pinIndices(srcStart, srcLength);
4391 return doEqualsSubstring(length() - srcLength, srcLength,
4392 srcText, srcStart, srcLength);
4393}
4394
4395inline UBool
4396UnicodeString::endsWith(ConstChar16Ptr srcChars,
4397 int32_t srcLength) const {
4398 if(srcLength < 0) {
4399 srcLength = u_strlen(toUCharPtr(srcChars));
4400 }
4401 return doEqualsSubstring(length() - srcLength, srcLength, srcChars, 0, srcLength);
4402}
4403
4404inline UBool
4405UnicodeString::endsWith(const char16_t *srcChars,
4406 int32_t srcStart,
4407 int32_t srcLength) const {
4408 if(srcLength < 0) {
4409 srcLength = u_strlen(toUCharPtr(srcChars + srcStart));
4410 }
4411 return doEqualsSubstring(length() - srcLength, srcLength,
4412 srcChars, srcStart, srcLength);
4413}
4414
4415//========================================
4416// replace
4417//========================================
4418inline UnicodeString&
4419UnicodeString::replace(int32_t start,
4420 int32_t _length,
4421 const UnicodeString& srcText)
4422{ return doReplace(start, _length, srcText, 0, srcText.length()); }
4423
4424inline UnicodeString&
4425UnicodeString::replace(int32_t start,
4426 int32_t _length,
4427 const UnicodeString& srcText,
4428 int32_t srcStart,
4429 int32_t srcLength)
4430{ return doReplace(start, _length, srcText, srcStart, srcLength); }
4431
4432inline UnicodeString&
4433UnicodeString::replace(int32_t start,
4434 int32_t _length,
4435 ConstChar16Ptr srcChars,
4436 int32_t srcLength)
4437{ return doReplace(start, _length, srcChars, 0, srcLength); }
4438
4439inline UnicodeString&
4440UnicodeString::replace(int32_t start,
4441 int32_t _length,
4442 const char16_t *srcChars,
4443 int32_t srcStart,
4444 int32_t srcLength)
4445{ return doReplace(start, _length, srcChars, srcStart, srcLength); }
4446
4447inline UnicodeString&
4448UnicodeString::replace(int32_t start,
4449 int32_t _length,
4450 char16_t srcChar)
4451{ return doReplace(start, _length, &srcChar, 0, 1); }
4452
4453inline UnicodeString&
4454UnicodeString::replaceBetween(int32_t start,
4455 int32_t limit,
4456 const UnicodeString& srcText)
4457{ return doReplace(start, limit - start, srcText, 0, srcText.length()); }
4458
4459inline UnicodeString&
4460UnicodeString::replaceBetween(int32_t start,
4461 int32_t limit,
4462 const UnicodeString& srcText,
4463 int32_t srcStart,
4464 int32_t srcLimit)
4465{ return doReplace(start, limit - start, srcText, srcStart, srcLimit - srcStart); }
4466
4467inline UnicodeString&
4468UnicodeString::findAndReplace(const UnicodeString& oldText,
4469 const UnicodeString& newText)
4470{ return findAndReplace(0, length(), oldText, 0, oldText.length(),
4471 newText, 0, newText.length()); }
4472
4473inline UnicodeString&
4474UnicodeString::findAndReplace(int32_t start,
4475 int32_t _length,
4476 const UnicodeString& oldText,
4477 const UnicodeString& newText)
4478{ return findAndReplace(start, _length, oldText, 0, oldText.length(),
4479 newText, 0, newText.length()); }
4480
4481// ============================
4482// extract
4483// ============================
4484inline void
4485UnicodeString::doExtract(int32_t start,
4486 int32_t _length,
4487 UnicodeString& target) const
4488{ target.replace(0, target.length(), *this, start, _length); }
4489
4490inline void
4491UnicodeString::extract(int32_t start,
4492 int32_t _length,
4493 Char16Ptr target,
4494 int32_t targetStart) const
4495{ doExtract(start, _length, target, targetStart); }
4496
4497inline void
4498UnicodeString::extract(int32_t start,
4499 int32_t _length,
4500 UnicodeString& target) const
4501{ doExtract(start, _length, target); }
4502
4503#if !UCONFIG_NO_CONVERSION
4504
4505inline int32_t
4506UnicodeString::extract(int32_t start,
4507 int32_t _length,
4508 char *dst,
4509 const char *codepage) const
4510
4511{
4512 // This dstSize value will be checked explicitly
4513 return extract(start, _length, dst, dst!=0 ? 0xffffffff : 0, codepage);
4514}
4515
4516#endif
4517
4518inline void
4519UnicodeString::extractBetween(int32_t start,
4520 int32_t limit,
4521 char16_t *dst,
4522 int32_t dstStart) const {
4523 pinIndex(start);
4524 pinIndex(limit);
4525 doExtract(start, limit - start, dst, dstStart);
4526}
4527
4528inline UnicodeString
4529UnicodeString::tempSubStringBetween(int32_t start, int32_t limit) const {
4530 return tempSubString(start, limit - start);
4531}
4532
4533inline char16_t
4534UnicodeString::doCharAt(int32_t offset) const
4535{
4536 if((uint32_t)offset < (uint32_t)length()) {
4537 return getArrayStart()[offset];
4538 } else {
4539 return kInvalidUChar;
4540 }
4541}
4542
4543inline char16_t
4544UnicodeString::charAt(int32_t offset) const
4545{ return doCharAt(offset); }
4546
4547inline char16_t
4548UnicodeString::operator[] (int32_t offset) const
4549{ return doCharAt(offset); }
4550
4551inline UBool
4552UnicodeString::isEmpty() const {
4553 // Arithmetic or logical right shift does not matter: only testing for 0.
4554 return (fUnion.fFields.fLengthAndFlags>>kLengthShift) == 0;
4555}
4556
4557//========================================
4558// Write implementation methods
4559//========================================
4560inline void
4561UnicodeString::setZeroLength() {
4562 fUnion.fFields.fLengthAndFlags &= kAllStorageFlags;
4563}
4564
4565inline void
4566UnicodeString::setShortLength(int32_t len) {
4567 // requires 0 <= len <= kMaxShortLength
4568 fUnion.fFields.fLengthAndFlags =
4569 (int16_t)((fUnion.fFields.fLengthAndFlags & kAllStorageFlags) | (len << kLengthShift));
4570}
4571
4572inline void
4573UnicodeString::setLength(int32_t len) {
4574 if(len <= kMaxShortLength) {
4575 setShortLength(len);
4576 } else {
4577 fUnion.fFields.fLengthAndFlags |= kLengthIsLarge;
4578 fUnion.fFields.fLength = len;
4579 }
4580}
4581
4582inline void
4583UnicodeString::setToEmpty() {
4584 fUnion.fFields.fLengthAndFlags = kShortString;
4585}
4586
4587inline void
4588UnicodeString::setArray(char16_t *array, int32_t len, int32_t capacity) {
4589 setLength(len);
4590 fUnion.fFields.fArray = array;
4591 fUnion.fFields.fCapacity = capacity;
4592}
4593
4594inline UnicodeString&
4595UnicodeString::operator= (char16_t ch)
4596{ return doReplace(0, length(), &ch, 0, 1); }
4597
4598inline UnicodeString&
4599UnicodeString::operator= (UChar32 ch)
4600{ return replace(0, length(), ch); }
4601
4602inline UnicodeString&
4603UnicodeString::setTo(const UnicodeString& srcText,
4604 int32_t srcStart,
4605 int32_t srcLength)
4606{
4607 unBogus();
4608 return doReplace(0, length(), srcText, srcStart, srcLength);
4609}
4610
4611inline UnicodeString&
4612UnicodeString::setTo(const UnicodeString& srcText,
4613 int32_t srcStart)
4614{
4615 unBogus();
4616 srcText.pinIndex(srcStart);
4617 return doReplace(0, length(), srcText, srcStart, srcText.length() - srcStart);
4618}
4619
4620inline UnicodeString&
4621UnicodeString::setTo(const UnicodeString& srcText)
4622{
4623 return copyFrom(srcText);
4624}
4625
4626inline UnicodeString&
4627UnicodeString::setTo(const char16_t *srcChars,
4628 int32_t srcLength)
4629{
4630 unBogus();
4631 return doReplace(0, length(), srcChars, 0, srcLength);
4632}
4633
4634inline UnicodeString&
4635UnicodeString::setTo(char16_t srcChar)
4636{
4637 unBogus();
4638 return doReplace(0, length(), &srcChar, 0, 1);
4639}
4640
4641inline UnicodeString&
4642UnicodeString::setTo(UChar32 srcChar)
4643{
4644 unBogus();
4645 return replace(0, length(), srcChar);
4646}
4647
4648inline UnicodeString&
4649UnicodeString::append(const UnicodeString& srcText,
4650 int32_t srcStart,
4651 int32_t srcLength)
4652{ return doAppend(srcText, srcStart, srcLength); }
4653
4654inline UnicodeString&
4655UnicodeString::append(const UnicodeString& srcText)
4656{ return doAppend(srcText, 0, srcText.length()); }
4657
4658inline UnicodeString&
4659UnicodeString::append(const char16_t *srcChars,
4660 int32_t srcStart,
4661 int32_t srcLength)
4662{ return doAppend(srcChars, srcStart, srcLength); }
4663
4664inline UnicodeString&
4665UnicodeString::append(ConstChar16Ptr srcChars,
4666 int32_t srcLength)
4667{ return doAppend(srcChars, 0, srcLength); }
4668
4669inline UnicodeString&
4670UnicodeString::append(char16_t srcChar)
4671{ return doAppend(&srcChar, 0, 1); }
4672
4673inline UnicodeString&
4674UnicodeString::operator+= (char16_t ch)
4675{ return doAppend(&ch, 0, 1); }
4676
4677inline UnicodeString&
4678UnicodeString::operator+= (UChar32 ch) {
4679 return append(ch);
4680}
4681
4682inline UnicodeString&
4683UnicodeString::operator+= (const UnicodeString& srcText)
4684{ return doAppend(srcText, 0, srcText.length()); }
4685
4686inline UnicodeString&
4687UnicodeString::insert(int32_t start,
4688 const UnicodeString& srcText,
4689 int32_t srcStart,
4690 int32_t srcLength)
4691{ return doReplace(start, 0, srcText, srcStart, srcLength); }
4692
4693inline UnicodeString&
4694UnicodeString::insert(int32_t start,
4695 const UnicodeString& srcText)
4696{ return doReplace(start, 0, srcText, 0, srcText.length()); }
4697
4698inline UnicodeString&
4699UnicodeString::insert(int32_t start,
4700 const char16_t *srcChars,
4701 int32_t srcStart,
4702 int32_t srcLength)
4703{ return doReplace(start, 0, srcChars, srcStart, srcLength); }
4704
4705inline UnicodeString&
4706UnicodeString::insert(int32_t start,
4707 ConstChar16Ptr srcChars,
4708 int32_t srcLength)
4709{ return doReplace(start, 0, srcChars, 0, srcLength); }
4710
4711inline UnicodeString&
4712UnicodeString::insert(int32_t start,
4713 char16_t srcChar)
4714{ return doReplace(start, 0, &srcChar, 0, 1); }
4715
4716inline UnicodeString&
4717UnicodeString::insert(int32_t start,
4718 UChar32 srcChar)
4719{ return replace(start, 0, srcChar); }
4720
4721
4722inline UnicodeString&
4723UnicodeString::remove()
4724{
4725 // remove() of a bogus string makes the string empty and non-bogus
4726 if(isBogus()) {
4727 setToEmpty();
4728 } else {
4729 setZeroLength();
4730 }
4731 return *this;
4732}
4733
4734inline UnicodeString&
4735UnicodeString::remove(int32_t start,
4736 int32_t _length)
4737{
4738 if(start <= 0 && _length == INT32_MAX) {
4739 // remove(guaranteed everything) of a bogus string makes the string empty and non-bogus
4740 return remove();
4741 }
4742 return doReplace(start, _length, nullptr, 0, 0);
4743}
4744
4745inline UnicodeString&
4746UnicodeString::removeBetween(int32_t start,
4747 int32_t limit)
4748{ return doReplace(start, limit - start, nullptr, 0, 0); }
4749
4750inline UnicodeString &
4751UnicodeString::retainBetween(int32_t start, int32_t limit) {
4752 truncate(limit);
4753 return doReplace(0, start, nullptr, 0, 0);
4754}
4755
4756inline UBool
4757UnicodeString::truncate(int32_t targetLength)
4758{
4759 if(isBogus() && targetLength == 0) {
4760 // truncate(0) of a bogus string makes the string empty and non-bogus
4761 unBogus();
4762 return false;
4763 } else if((uint32_t)targetLength < (uint32_t)length()) {
4764 setLength(targetLength);
4765 return true;
4766 } else {
4767 return false;
4768 }
4769}
4770
4771inline UnicodeString&
4772UnicodeString::reverse()
4773{ return doReverse(0, length()); }
4774
4775inline UnicodeString&
4776UnicodeString::reverse(int32_t start,
4777 int32_t _length)
4778{ return doReverse(start, _length); }
4779
4780U_NAMESPACE_END
4781
4782#endif /* U_SHOW_CPLUSPLUS_API */
4783
4784#endif
C++ API: Interface for writing bytes, and implementation classes.
C++ API: char16_t pointer wrappers with implicit conversion from bit-compatible raw pointer types.
The BreakIterator class implements methods for finding the location of boundaries in text.
Definition: brkiter.h:106
A ByteSink can be filled with bytes.
Definition: bytestream.h:53
char16_t * wrapper with implicit conversion from distinct but bit-compatible pointer types.
Definition: char16ptr.h:42
const char16_t * wrapper with implicit conversion from distinct but bit-compatible pointer types.
Definition: char16ptr.h:149
Records lengths of string edits but not replacement text.
Definition: edits.h:80
A Locale object represents a specific geographical, political, or cultural region.
Definition: locid.h:195
Replaceable is an abstract base class representing a string of characters that supports the replaceme...
Definition: rep.h:77
virtual void extractBetween(int32_t start, int32_t limit, UnicodeString &target) const =0
Copies characters in the range [start, limit) into the UnicodeString target.
char16_t charAt(int32_t offset) const
Returns the 16-bit code unit at the given offset into the text.
Definition: rep.h:251
int32_t length() const
Returns the number of 16-bit code units in the text.
Definition: rep.h:246
Implementation of ByteSink that writes to a "string".
Definition: bytestream.h:267
A string-like object that points to a sized piece of memory.
Definition: stringpiece.h:60
An Appendable implementation which writes to a UnicodeString.
Definition: appendable.h:156
UnicodeString is a string class that stores Unicode characters directly and provides similar function...
Definition: unistr.h:296
int32_t indexOf(const char16_t *srcChars, int32_t srcStart, int32_t srcLength, int32_t start, int32_t length) const
Locate in this the first occurrence in the range [start, start + length) of the characters in srcChar...
UnicodeString(const UnicodeString &that)
Copy constructor.
void swap(UnicodeString &other) noexcept
Swap strings.
virtual char16_t getCharAt(int32_t offset) const override
The change in Replaceable to use virtual getCharAt() allows UnicodeString::charAt() to be inline agai...
virtual int32_t getLength() const override
Implement Replaceable::getLength() (see jitterbug 1027).
UnicodeString & setTo(char16_t *buffer, int32_t buffLength, int32_t buffCapacity)
Aliasing setTo() function, analogous to the writable-aliasing char16_t* constructor.
UnicodeString & operator=(UnicodeString &&src) noexcept
Move assignment operator; might leave src in bogus state.
UChar32 unescapeAt(int32_t &offset) const
Unescape a single escape sequence and return the represented character.
UnicodeString(const wchar_t *text, int32_t textLength)
wchar_t * constructor.
Definition: unistr.h:3060
virtual void handleReplaceBetween(int32_t start, int32_t limit, const UnicodeString &text) override
Replace a substring of this object with the given text.
UBool hasMoreChar32Than(int32_t start, int32_t length, int32_t number) const
Check if the length char16_t code units of the string contain more Unicode code points than a certain...
UnicodeString(const UnicodeString &src, int32_t srcStart, int32_t srcLength)
'Substring' constructor from subrange of source string.
virtual ~UnicodeString()
Destructor.
UnicodeString(const char *codepageData, int32_t dataLength, const char *codepage)
char* constructor.
UnicodeString(const char *codepageData, const char *codepage)
char* constructor.
UnicodeString(UBool isTerminated, ConstChar16Ptr text, int32_t textLength)
Readonly-aliasing char16_t* constructor.
UnicodeString & toTitle(BreakIterator *titleIter, const Locale &locale, uint32_t options)
Titlecase this string, with options.
UnicodeString & append(UChar32 srcChar)
Append the code point srcChar to the UnicodeString object.
EInvariant
Constant to be used in the UnicodeString(char *, int32_t, EInvariant) constructor which constructs a ...
Definition: unistr.h:307
UnicodeString unescape() const
Unescape a string of characters and return a string containing the result.
UnicodeString(const UnicodeString &src, int32_t srcStart)
'Substring' constructor from tail of source string.
int32_t getChar32Limit(int32_t offset) const
Adjust a random-access offset so that it points behind a Unicode character.
UnicodeString(char16_t *buffer, int32_t buffLength, int32_t buffCapacity)
Writable-aliasing char16_t* constructor.
UnicodeString & findAndReplace(int32_t start, int32_t length, const UnicodeString &oldText, int32_t oldStart, int32_t oldLength, const UnicodeString &newText, int32_t newStart, int32_t newLength)
Replace all occurrences of characters in oldText in the range [oldStart, oldStart + oldLength) with t...
UnicodeString(int32_t capacity, UChar32 c, int32_t count)
Construct a UnicodeString with capacity to hold capacity char16_ts.
UNISTR_FROM_STRING_EXPLICIT UnicodeString(const uint16_t *text)
uint16_t * constructor.
Definition: unistr.h:2997
int32_t extract(int32_t start, int32_t startLength, char *target, uint32_t targetLength, const char *codepage) const
Copy the characters in the range [start, start + length) into an array of characters in a specified c...
virtual void copy(int32_t start, int32_t limit, int32_t dest) override
Copy a substring of this object, retaining attribute (out-of-band) information.
virtual void extractBetween(int32_t start, int32_t limit, UnicodeString &target) const override
Copy the characters in the range [start, limit) into the UnicodeString target.
UnicodeString & replace(int32_t start, int32_t length, const UnicodeString &srcText, int32_t srcStart, int32_t srcLength)
Replace the characters in the range [start, start + length) with the characters in srcText in the ran...
Definition: unistr.h:4425
UnicodeString & toTitle(BreakIterator *titleIter)
Titlecase this string, convenience function using the default locale.
UnicodeString & fastCopyFrom(const UnicodeString &src)
Almost the same as the assignment operator.
virtual UnicodeString * clone() const override
Clone this object, an instance of a subclass of Replaceable.
UBool padLeading(int32_t targetLength, char16_t padChar=0x0020)
Pad the start of this UnicodeString with the character padChar.
int32_t getChar32Start(int32_t offset) const
Adjust a random-access offset so that it points to the beginning of a Unicode character.
UChar32 char32At(int32_t offset) const
Return the code point that contains the code unit at offset offset.
UnicodeString(const char *src, int32_t textLength, enum EInvariant inv)
Constructs a Unicode string from an invariant-character char * string.
UnicodeString & trim(void)
Trims leading and trailing whitespace from this UnicodeString.
int32_t length(void) const
Return the length of the UnicodeString object.
Definition: unistr.h:3903
UnicodeString tempSubString(int32_t start=0, int32_t length=INT32_MAX) const
Create a temporary substring for the specified range.
int32_t extract(Char16Ptr dest, int32_t destCapacity, UErrorCode &errorCode) const
Copy the contents of the string into dest.
virtual UChar32 getChar32At(int32_t offset) const override
The change in Replaceable to use virtual getChar32At() allows UnicodeString::char32At() to be inline ...
int32_t extract(int32_t start, int32_t startLength, char *target, uint32_t targetLength) const
Copy the characters in the range [start, start + length) into an array of characters in the platform'...
static UnicodeString fromUTF8(StringPiece utf8)
Create a UnicodeString from a UTF-8 string.
UNISTR_FROM_CHAR_EXPLICIT UnicodeString(char16_t ch)
Single char16_t (code unit) constructor.
int32_t lastIndexOf(const char16_t *srcChars, int32_t srcStart, int32_t srcLength, int32_t start, int32_t length) const
Locate in this the last occurrence in the range [start, start + length) of the characters in srcChars...
UnicodeString(wchar_t *buffer, int32_t buffLength, int32_t buffCapacity)
Writable-aliasing wchar_t * constructor.
Definition: unistr.h:3142
void setToBogus()
Make this UnicodeString object invalid.
friend void swap(UnicodeString &s1, UnicodeString &s2) noexcept
Non-member UnicodeString swap function.
Definition: unistr.h:1925
int32_t moveIndex32(int32_t index, int32_t delta) const
Move the code unit index along the string by delta code points.
static UnicodeString fromUTF32(const UChar32 *utf32, int32_t length)
Create a UnicodeString from a UTF-32 string.
UnicodeString & replace(int32_t start, int32_t length, UChar32 srcChar)
Replace the characters in the range [start, start + length) with the code point srcChar.
int32_t countChar32(int32_t start=0, int32_t length=INT32_MAX) const
Count Unicode code points in the length char16_t code units of the string.
UnicodeString & toUpper(void)
Convert the characters in this to UPPER CASE following the conventions of the default locale.
UNISTR_FROM_CHAR_EXPLICIT UnicodeString(UChar32 ch)
Single UChar32 (code point) constructor.
UnicodeString & operator=(const UnicodeString &srcText)
Assignment operator.
UnicodeString & setTo(UBool isTerminated, ConstChar16Ptr text, int32_t textLength)
Aliasing setTo() function, analogous to the readonly-aliasing char16_t* constructor.
int32_t extract(char *dest, int32_t destCapacity, UConverter *cnv, UErrorCode &errorCode) const
Convert the UnicodeString into a codepage string using an existing UConverter.
StringClass & toUTF8String(StringClass &result) const
Convert the UnicodeString to UTF-8 and append the result to a standard string.
Definition: unistr.h:1729
UnicodeString(UnicodeString &&src) noexcept
Move constructor; might leave src in bogus state.
UnicodeString(const char16_t *text, int32_t textLength)
char16_t* constructor.
UnicodeString(const char *src, int32_t srcLength, UConverter *cnv, UErrorCode &errorCode)
char * / UConverter constructor.
UnicodeString(const char *codepageData, int32_t dataLength)
char* constructor.
static UClassID getStaticClassID()
ICU "poor man's RTTI", returns a UClassID for this class.
UnicodeString(uint16_t *buffer, int32_t buffLength, int32_t buffCapacity)
Writable-aliasing uint16_t * constructor.
Definition: unistr.h:3128
int32_t toUTF32(UChar32 *utf32, int32_t capacity, UErrorCode &errorCode) const
Convert the UnicodeString to UTF-32.
UnicodeString & toLower(const Locale &locale)
Convert the characters in this to lower case following the conventions of a specific locale.
UnicodeString & toTitle(BreakIterator *titleIter, const Locale &locale)
Titlecase this string.
UnicodeString & foldCase(uint32_t options=0)
Case-folds the characters in this string.
const char16_t * getTerminatedBuffer()
Get a read-only pointer to the internal buffer, making sure that it is NUL-terminated.
UnicodeString(const uint16_t *text, int32_t textLength)
uint16_t * constructor.
Definition: unistr.h:3047
UNISTR_FROM_STRING_EXPLICIT UnicodeString(const char16_t *text)
char16_t* constructor.
UnicodeString & toUpper(const Locale &locale)
Convert the characters in this to UPPER CASE following the conventions of a specific locale.
UnicodeString & setCharAt(int32_t offset, char16_t ch)
Set the character at the specified offset to the specified character.
virtual UClassID getDynamicClassID() const override
ICU "poor man's RTTI", returns a UClassID for the actual class.
int32_t extract(int32_t start, int32_t startLength, char *target, int32_t targetCapacity, enum EInvariant inv) const
Copy the characters in the range [start, start + startLength) into an array of characters.
UNISTR_FROM_STRING_EXPLICIT UnicodeString(const wchar_t *text)
wchar_t * constructor.
Definition: unistr.h:3013
UBool isBogus(void) const
Determine if this object contains a valid string.
Definition: unistr.h:3918
char16_t * getBuffer(int32_t minCapacity)
Get a read/write pointer to the internal buffer.
UBool padTrailing(int32_t targetLength, char16_t padChar=0x0020)
Pad the end of this UnicodeString with the character padChar.
UNISTR_FROM_STRING_EXPLICIT UnicodeString(const char *codepageData)
char* constructor.
void releaseBuffer(int32_t newLength=-1)
Release a read/write buffer on a UnicodeString object with an "open" getBuffer(minCapacity).
void toUTF8(ByteSink &sink) const
Convert the UnicodeString to UTF-8 and write the result to a ByteSink.
virtual UBool hasMetaData() const override
Replaceable API.
UnicodeString & toLower(void)
Convert the characters in this to lower case following the conventions of the default locale.
U_CAPI int32_t u_strlen(const UChar *s)
U_EXPORT UBool operator==(const StringPiece &x, const StringPiece &y)
Global operator == for StringPiece.
bool operator!=(const StringPiece &x, const StringPiece &y)
Global operator != for StringPiece.
Definition: stringpiece.h:335
U_COMMON_API UnicodeString operator+(const UnicodeString &s1, const UnicodeString &s2)
Create a new UnicodeString with the concatenation of two others.
const UChar * toUCharPtr(const char16_t *p)
Converts from const char16_t * to const UChar *.
Definition: char16ptr.h:260
#define U_CALLCONV
Similar to U_CDECL_BEGIN/U_CDECL_END, this qualifier is necessary in callback function typedefs to ma...
Definition: platform.h:866
C++ API: Replaceable String.
C++ API: Central ICU header for including the C++ standard <string> header and for related definition...
C++ API: StringPiece: Read-only byte string wrapper class.
struct UConverter UConverter
Definition: ucnv_err.h:96
#define UCONFIG_NO_BREAK_ITERATION
This switch turns off break iteration.
Definition: uconfig.h:358
int32_t UChar32
Define UChar32 as a type for single Unicode code points.
Definition: umachine.h:435
#define INT32_MAX
The largest value a 32 bit signed integer can hold.
Definition: umachine.h:186
int8_t UBool
The ICU boolean type, a signed-byte integer.
Definition: umachine.h:247
#define U_CAPI
This is used to declare a function as a public ICU C API.
Definition: umachine.h:110
char16_t UChar
The base type for UTF-16 code units and pointers.
Definition: umachine.h:386
#define U_SIZEOF_UCHAR
Number of bytes in a UChar (always 2).
Definition: umachine.h:330
#define UNISTR_FROM_CHAR_EXPLICIT
This can be defined to be empty or "explicit".
Definition: unistr.h:150
int32_t UStringCaseMapper(int32_t caseLocale, uint32_t options, icu::BreakIterator *iter, char16_t *dest, int32_t destCapacity, const char16_t *src, int32_t srcLength, icu::Edits *edits, UErrorCode &errorCode)
Internal string case mapping function type.
Definition: unistr.h:70
#define UNISTR_FROM_STRING_EXPLICIT
This can be defined to be empty or "explicit".
Definition: unistr.h:170
#define UNISTR_OBJECT_SIZE
Desired sizeof(UnicodeString) in bytes.
Definition: unistr.h:208
void * UClassID
UClassID is used to identify classes without using the compiler's RTTI.
Definition: uobject.h:96
Basic definitions for ICU, for both C and C++ APIs.
UErrorCode
Standard ICU4C error code type, a substitute for exceptions.
Definition: utypes.h:415
#define U_COMMON_API
Set to export library symbols from inside the common library, and to import them from outside.
Definition: utypes.h:300