UnicodeString is a string class that stores Unicode characters directly and provides similar functionality as the Java String and StringBuffer/StringBuilder classes. More...

#include <unistr.h>

Inheritance diagram for icu::UnicodeString:

Public Types
enum	EInvariant { kInvariant }
	Constant to be used in the UnicodeString(char , int32_t, EInvariant) constructor which constructs a Unicode string from an invariant-character char string. More...

using	value_type = char16_t
	C++ boilerplate. More...

Public Member Functions
bool	operator== (const UnicodeString &text) const
	Equality operator. More...

template<typename S , typename = std::enable_if_t<ConvertibleToU16StringView<S>>>
bool	operator== (const S &text) const
	Equality operator. More...

bool	operator!= (const UnicodeString &text) const
	Inequality operator. More...

template<typename S , typename = std::enable_if_t<ConvertibleToU16StringView<S>>>
bool	operator!= (const S &text) const
	Inequality operator. More...

UBool	operator> (const UnicodeString &text) const
	Greater than operator. More...

UBool	operator< (const UnicodeString &text) const
	Less than operator. More...

UBool	operator>= (const UnicodeString &text) const
	Greater than or equal operator. More...

UBool	operator<= (const UnicodeString &text) const
	Less than or equal operator. More...

int8_t	compare (const UnicodeString &text) const
	Compare the characters bitwise in this UnicodeString to the characters in `text`. More...

int8_t	compare (int32_t start, int32_t length, const UnicodeString &text) const
	Compare the characters bitwise in the range [`start`, `start + length`) with the characters in the entire string `text`. More...

int8_t	compare (int32_t start, int32_t length, const UnicodeString &srcText, int32_t srcStart, int32_t srcLength) const
	Compare the characters bitwise in the range [`start`, `start + length`) with the characters in `srcText` in the range [`srcStart`, `srcStart + srcLength`). More...

int8_t	compare (ConstChar16Ptr srcChars, int32_t srcLength) const
	Compare the characters bitwise in this UnicodeString with the first `srcLength` characters in `srcChars`. More...

int8_t	compare (int32_t start, int32_t length, const char16_t *srcChars) const
	Compare the characters bitwise in the range [`start`, `start + length`) with the first `length` characters in `srcChars` More...

int8_t	compare (int32_t start, int32_t length, const char16_t *srcChars, int32_t srcStart, int32_t srcLength) const
	Compare the characters bitwise in the range [`start`, `start + length`) with the characters in `srcChars` in the range [`srcStart`, `srcStart + srcLength`). More...

int8_t	compareBetween (int32_t start, int32_t limit, const UnicodeString &srcText, int32_t srcStart, int32_t srcLimit) const
	Compare the characters bitwise in the range [`start`, `limit`) with the characters in `srcText` in the range [`srcStart`, `srcLimit`). More...

int8_t	compareCodePointOrder (const UnicodeString &text) const
	Compare two Unicode strings in code point order. More...

int8_t	compareCodePointOrder (int32_t start, int32_t length, const UnicodeString &srcText) const
	Compare two Unicode strings in code point order. More...

int8_t	compareCodePointOrder (int32_t start, int32_t length, const UnicodeString &srcText, int32_t srcStart, int32_t srcLength) const
	Compare two Unicode strings in code point order. More...

int8_t	compareCodePointOrder (ConstChar16Ptr srcChars, int32_t srcLength) const
	Compare two Unicode strings in code point order. More...

int8_t	compareCodePointOrder (int32_t start, int32_t length, const char16_t *srcChars) const
	Compare two Unicode strings in code point order. More...

int8_t	compareCodePointOrder (int32_t start, int32_t length, const char16_t *srcChars, int32_t srcStart, int32_t srcLength) const
	Compare two Unicode strings in code point order. More...

int8_t	compareCodePointOrderBetween (int32_t start, int32_t limit, const UnicodeString &srcText, int32_t srcStart, int32_t srcLimit) const
	Compare two Unicode strings in code point order. More...

int8_t	caseCompare (const UnicodeString &text, uint32_t options) const
	Compare two strings case-insensitively using full case folding. More...

int8_t	caseCompare (int32_t start, int32_t length, const UnicodeString &srcText, uint32_t options) const
	Compare two strings case-insensitively using full case folding. More...

int8_t	caseCompare (int32_t start, int32_t length, const UnicodeString &srcText, int32_t srcStart, int32_t srcLength, uint32_t options) const
	Compare two strings case-insensitively using full case folding. More...

int8_t	caseCompare (ConstChar16Ptr srcChars, int32_t srcLength, uint32_t options) const
	Compare two strings case-insensitively using full case folding. More...

int8_t	caseCompare (int32_t start, int32_t length, const char16_t *srcChars, uint32_t options) const
	Compare two strings case-insensitively using full case folding. More...

int8_t	caseCompare (int32_t start, int32_t length, const char16_t *srcChars, int32_t srcStart, int32_t srcLength, uint32_t options) const
	Compare two strings case-insensitively using full case folding. More...

int8_t	caseCompareBetween (int32_t start, int32_t limit, const UnicodeString &srcText, int32_t srcStart, int32_t srcLimit, uint32_t options) const
	Compare two strings case-insensitively using full case folding. More...

UBool	startsWith (const UnicodeString &text) const
	Determine if this starts with the characters in `text` More...

UBool	startsWith (const UnicodeString &srcText, int32_t srcStart, int32_t srcLength) const
	Determine if this starts with the characters in `srcText` in the range [`srcStart`, `srcStart + srcLength`). More...

UBool	startsWith (ConstChar16Ptr srcChars, int32_t srcLength) const
	Determine if this starts with the characters in `srcChars` More...

UBool	startsWith (const char16_t *srcChars, int32_t srcStart, int32_t srcLength) const
	Determine if this ends with the characters in `srcChars` in the range [`srcStart`, `srcStart + srcLength`). More...

UBool	endsWith (const UnicodeString &text) const
	Determine if this ends with the characters in `text` More...

UBool	endsWith (const UnicodeString &srcText, int32_t srcStart, int32_t srcLength) const
	Determine if this ends with the characters in `srcText` in the range [`srcStart`, `srcStart + srcLength`). More...

UBool	endsWith (ConstChar16Ptr srcChars, int32_t srcLength) const
	Determine if this ends with the characters in `srcChars` More...

UBool	endsWith (const char16_t *srcChars, int32_t srcStart, int32_t srcLength) const
	Determine if this ends with the characters in `srcChars` in the range [`srcStart`, `srcStart + srcLength`). More...

int32_t	indexOf (const UnicodeString &text) const
	Locate in this the first occurrence of the characters in `text`, using bitwise comparison. More...

int32_t	indexOf (const UnicodeString &text, int32_t start) const
	Locate in this the first occurrence of the characters in `text` starting at offset `start`, using bitwise comparison. More...

int32_t	indexOf (const UnicodeString &text, int32_t start, int32_t length) const
	Locate in this the first occurrence in the range [`start`, `start + length`) of the characters in `text`, using bitwise comparison. More...

int32_t	indexOf (const UnicodeString &srcText, int32_t srcStart, int32_t srcLength, int32_t start, int32_t length) const
	Locate in this the first occurrence in the range [`start`, `start + length`) of the characters in `srcText` in the range [`srcStart`, `srcStart + srcLength`), using bitwise comparison. More...

int32_t	indexOf (const char16_t *srcChars, int32_t srcLength, int32_t start) const
	Locate in this the first occurrence of the characters in `srcChars` starting at offset `start`, using bitwise comparison. More...

int32_t	indexOf (ConstChar16Ptr srcChars, int32_t srcLength, int32_t start, int32_t length) const
	Locate in this the first occurrence in the range [`start`, `start + length`) of the characters in `srcChars`, using bitwise comparison. More...

int32_t	indexOf (const char16_t *srcChars, int32_t srcStart, int32_t srcLength, int32_t start, int32_t length) const
	Locate in this the first occurrence in the range [`start`, `start + length`) of the characters in `srcChars` in the range [`srcStart`, `srcStart + srcLength`), using bitwise comparison. More...

int32_t	indexOf (char16_t c) const
	Locate in this the first occurrence of the BMP code point `c`, using bitwise comparison. More...

int32_t	indexOf (UChar32 c) const
	Locate in this the first occurrence of the code point `c`, using bitwise comparison. More...

int32_t	indexOf (char16_t c, int32_t start) const
	Locate in this the first occurrence of the BMP code point `c`, starting at offset `start`, using bitwise comparison. More...

int32_t	indexOf (UChar32 c, int32_t start) const
	Locate in this the first occurrence of the code point `c` starting at offset `start`, using bitwise comparison. More...

int32_t	indexOf (char16_t c, int32_t start, int32_t length) const
	Locate in this the first occurrence of the BMP code point `c` in the range [`start`, `start + length`), using bitwise comparison. More...

int32_t	indexOf (UChar32 c, int32_t start, int32_t length) const
	Locate in this the first occurrence of the code point `c` in the range [`start`, `start + length`), using bitwise comparison. More...

int32_t	lastIndexOf (const UnicodeString &text) const
	Locate in this the last occurrence of the characters in `text`, using bitwise comparison. More...

int32_t	lastIndexOf (const UnicodeString &text, int32_t start) const
	Locate in this the last occurrence of the characters in `text` starting at offset `start`, using bitwise comparison. More...

int32_t	lastIndexOf (const UnicodeString &text, int32_t start, int32_t length) const
	Locate in this the last occurrence in the range [`start`, `start + length`) of the characters in `text`, using bitwise comparison. More...

int32_t	lastIndexOf (const UnicodeString &srcText, int32_t srcStart, int32_t srcLength, int32_t start, int32_t length) const
	Locate in this the last occurrence in the range [`start`, `start + length`) of the characters in `srcText` in the range [`srcStart`, `srcStart + srcLength`), using bitwise comparison. More...

int32_t	lastIndexOf (const char16_t *srcChars, int32_t srcLength, int32_t start) const
	Locate in this the last occurrence of the characters in `srcChars` starting at offset `start`, using bitwise comparison. More...

int32_t	lastIndexOf (ConstChar16Ptr srcChars, int32_t srcLength, int32_t start, int32_t length) const
	Locate in this the last occurrence in the range [`start`, `start + length`) of the characters in `srcChars`, using bitwise comparison. More...

int32_t	lastIndexOf (const char16_t *srcChars, int32_t srcStart, int32_t srcLength, int32_t start, int32_t length) const
	Locate in this the last occurrence in the range [`start`, `start + length`) of the characters in `srcChars` in the range [`srcStart`, `srcStart + srcLength`), using bitwise comparison. More...

int32_t	lastIndexOf (char16_t c) const
	Locate in this the last occurrence of the BMP code point `c`, using bitwise comparison. More...

int32_t	lastIndexOf (UChar32 c) const
	Locate in this the last occurrence of the code point `c`, using bitwise comparison. More...

int32_t	lastIndexOf (char16_t c, int32_t start) const
	Locate in this the last occurrence of the BMP code point `c` starting at offset `start`, using bitwise comparison. More...

int32_t	lastIndexOf (UChar32 c, int32_t start) const
	Locate in this the last occurrence of the code point `c` starting at offset `start`, using bitwise comparison. More...

int32_t	lastIndexOf (char16_t c, int32_t start, int32_t length) const
	Locate in this the last occurrence of the BMP code point `c` in the range [`start`, `start + length`), using bitwise comparison. More...

int32_t	lastIndexOf (UChar32 c, int32_t start, int32_t length) const
	Locate in this the last occurrence of the code point `c` in the range [`start`, `start + length`), using bitwise comparison. More...

char16_t	charAt (int32_t offset) const
	Return the code unit at offset `offset`. More...

char16_t	operator[] (int32_t offset) const
	Return the code unit at offset `offset`. More...

UChar32	char32At (int32_t offset) const
	Return the code point that contains the code unit at offset `offset`. More...

int32_t	getChar32Start (int32_t offset) const
	Adjust a random-access offset so that it points to the beginning of a Unicode character. More...

int32_t	getChar32Limit (int32_t offset) const
	Adjust a random-access offset so that it points behind a Unicode character. More...

int32_t	moveIndex32 (int32_t index, int32_t delta) const
	Move the code unit index along the string by delta code points. More...

void	extract (int32_t start, int32_t length, Char16Ptr dst, int32_t dstStart=0) const
	Copy the characters in the range [`start`, `start + length`) into the array `dst`, beginning at `dstStart`. More...

int32_t	extract (Char16Ptr dest, int32_t destCapacity, UErrorCode &errorCode) const
	Copy the contents of the string into dest. More...

void	extract (int32_t start, int32_t length, UnicodeString &target) const
	Copy the characters in the range [`start`, `start + length`) into the UnicodeString `target`. More...

void	extractBetween (int32_t start, int32_t limit, char16_t *dst, int32_t dstStart=0) const
	Copy the characters in the range [`start`, `limit`) into the array `dst`, beginning at `dstStart`. More...

virtual void	extractBetween (int32_t start, int32_t limit, UnicodeString &target) const override
	Copy the characters in the range [`start`, `limit`) into the UnicodeString `target`. More...

int32_t	extract (int32_t start, int32_t startLength, char *target, int32_t targetCapacity, enum EInvariant inv) const
	Copy the characters in the range [`start`, `start + startLength`) into an array of characters. More...

int32_t	extract (int32_t start, int32_t startLength, char *target, uint32_t targetLength) const
	Copy the characters in the range [`start`, `start + length`) into an array of characters in the platform's default codepage. More...

int32_t	extract (int32_t start, int32_t startLength, char target, const char codepage=nullptr) const
	Copy the characters in the range [`start`, `start + length`) into an array of characters in a specified codepage. More...

int32_t	extract (int32_t start, int32_t startLength, char target, uint32_t targetLength, const char codepage) const
	Copy the characters in the range [`start`, `start + length`) into an array of characters in a specified codepage. More...

int32_t	extract (char dest, int32_t destCapacity, UConverter cnv, UErrorCode &errorCode) const
	Convert the UnicodeString into a codepage string using an existing UConverter. More...

UnicodeString	tempSubString (int32_t start=0, int32_t length=INT32_MAX) const
	Create a temporary substring for the specified range. More...

UnicodeString	tempSubStringBetween (int32_t start, int32_t limit=INT32_MAX) const
	Create a temporary substring for the specified range. More...

void	toUTF8 (ByteSink &sink) const
	Convert the UnicodeString to UTF-8 and write the result to a ByteSink. More...

template<typename StringClass >
StringClass &	toUTF8String (StringClass &result) const
	Convert the UnicodeString to UTF-8 and append the result to a standard string. More...

template<typename StringClass >
StringClass	toUTF8String () const
	Convert the UnicodeString to a UTF-8 string. More...

int32_t	toUTF32 (UChar32 *utf32, int32_t capacity, UErrorCode &errorCode) const
	Convert the UnicodeString to UTF-32. More...

int32_t	length () const
	Return the length of the UnicodeString object. More...

int32_t	countChar32 (int32_t start=0, int32_t length=INT32_MAX) const
	Count Unicode code points in the length char16_t code units of the string. More...

UBool	hasMoreChar32Than (int32_t start, int32_t length, int32_t number) const
	Check if the length char16_t code units of the string contain more Unicode code points than a certain number. More...

UBool	isEmpty () const
	Determine if this string is empty. More...

int32_t	getCapacity () const
	Return the capacity of the internal buffer of the UnicodeString object. More...

int32_t	hashCode () const
	Generate a hash code for this object. More...

UBool	isBogus () const
	Determine if this object contains a valid string. More...

unspecified_iterator	begin () const

unspecified_iterator	end () const

unspecified_reverse_iterator	rbegin () const

unspecified_reverse_iterator	rend () const

UnicodeString &	operator= (const UnicodeString &srcText)
	Assignment operator. More...

UnicodeString &	fastCopyFrom (const UnicodeString &src)
	Almost the same as the assignment operator. More...

template<typename S , typename = std::enable_if_t<ConvertibleToU16StringView<S>>>
UnicodeString &	operator= (const S &src)
	Assignment operator. More...

UnicodeString &	operator= (UnicodeString &&src) noexcept
	Move assignment operator; might leave src in bogus state. More...

void	swap (UnicodeString &other) noexcept
	Swap strings. More...

UnicodeString &	operator= (char16_t ch)
	Assignment operator. More...

UnicodeString &	operator= (UChar32 ch)
	Assignment operator. More...

UnicodeString &	setTo (const UnicodeString &srcText, int32_t srcStart)
	Set the text in the UnicodeString object to the characters in `srcText` in the range [`srcStart`, `srcText.length()`). More...

UnicodeString &	setTo (const UnicodeString &srcText, int32_t srcStart, int32_t srcLength)
	Set the text in the UnicodeString object to the characters in `srcText` in the range [`srcStart`, `srcStart + srcLength`). More...

UnicodeString &	setTo (const UnicodeString &srcText)
	Set the text in the UnicodeString object to the characters in `srcText`. More...

UnicodeString &	setTo (const char16_t *srcChars, int32_t srcLength)
	Set the characters in the UnicodeString object to the characters in `srcChars`. More...

UnicodeString &	setTo (char16_t srcChar)
	Set the characters in the UnicodeString object to the code unit `srcChar`. More...

UnicodeString &	setTo (UChar32 srcChar)
	Set the characters in the UnicodeString object to the code point `srcChar`. More...

UnicodeString &	setTo (UBool isTerminated, ConstChar16Ptr text, int32_t textLength)
	Aliasing setTo() function, analogous to the readonly-aliasing char16_t* constructor. More...

UnicodeString &	setTo (char16_t *buffer, int32_t buffLength, int32_t buffCapacity)
	Aliasing setTo() function, analogous to the writable-aliasing char16_t* constructor. More...

void	setToBogus ()
	Make this UnicodeString object invalid. More...

UnicodeString &	setCharAt (int32_t offset, char16_t ch)
	Set the character at the specified offset to the specified character. More...

UnicodeString &	operator+= (char16_t ch)
	Append operator. More...

UnicodeString &	operator+= (UChar32 ch)
	Append operator. More...

UnicodeString &	operator+= (const UnicodeString &srcText)
	Append operator. More...

template<typename S , typename = std::enable_if_t<ConvertibleToU16StringView<S>>>
UnicodeString &	operator+= (const S &src)
	Append operator. More...

UnicodeString &	append (const UnicodeString &srcText, int32_t srcStart, int32_t srcLength)
	Append the characters in `srcText` in the range [`srcStart`, `srcStart + srcLength`) to the UnicodeString object at offset `start`. More...

UnicodeString &	append (const UnicodeString &srcText)
	Append the characters in `srcText` to the UnicodeString object. More...

UnicodeString &	append (const char16_t *srcChars, int32_t srcStart, int32_t srcLength)
	Append the characters in `srcChars` in the range [`srcStart`, `srcStart + srcLength`) to the UnicodeString object at offset `start`. More...

UnicodeString &	append (ConstChar16Ptr srcChars, int32_t srcLength)
	Append the characters in `srcChars` to the UnicodeString object. More...

template<typename S , typename = std::enable_if_t<ConvertibleToU16StringView<S>>>
UnicodeString &	append (const S &src)
	Appends the characters in `src` which is, or which is implicitly convertible to, a std::u16string_view or (if U_SIZEOF_WCHAR_T==2) std::wstring_view, to the UnicodeString object. More...

UnicodeString &	append (char16_t srcChar)
	Append the code unit `srcChar` to the UnicodeString object. More...

UnicodeString &	append (UChar32 srcChar)
	Append the code point `srcChar` to the UnicodeString object. More...

void	push_back (char16_t c)
	Appends the code unit `c` to the UnicodeString object. More...

UnicodeString &	insert (int32_t start, const UnicodeString &srcText, int32_t srcStart, int32_t srcLength)
	Insert the characters in `srcText` in the range [`srcStart`, `srcStart + srcLength`) into the UnicodeString object at offset `start`. More...

UnicodeString &	insert (int32_t start, const UnicodeString &srcText)
	Insert the characters in `srcText` into the UnicodeString object at offset `start`. More...

UnicodeString &	insert (int32_t start, const char16_t *srcChars, int32_t srcStart, int32_t srcLength)
	Insert the characters in `srcChars` in the range [`srcStart`, `srcStart + srcLength`) into the UnicodeString object at offset `start`. More...

UnicodeString &	insert (int32_t start, ConstChar16Ptr srcChars, int32_t srcLength)
	Insert the characters in `srcChars` into the UnicodeString object at offset `start`. More...

UnicodeString &	insert (int32_t start, char16_t srcChar)
	Insert the code unit `srcChar` into the UnicodeString object at offset `start`. More...

UnicodeString &	insert (int32_t start, UChar32 srcChar)
	Insert the code point `srcChar` into the UnicodeString object at offset `start`. More...

UnicodeString &	replace (int32_t start, int32_t length, const UnicodeString &srcText, int32_t srcStart, int32_t srcLength)
	Replace the characters in the range [`start`, `start + length`) with the characters in `srcText` in the range [`srcStart`, `srcStart + srcLength`). More...

UnicodeString &	replace (int32_t start, int32_t length, const UnicodeString &srcText)
	Replace the characters in the range [`start`, `start + length`) with the characters in `srcText`. More...

UnicodeString &	replace (int32_t start, int32_t length, const char16_t *srcChars, int32_t srcStart, int32_t srcLength)
	Replace the characters in the range [`start`, `start + length`) with the characters in `srcChars` in the range [`srcStart`, `srcStart + srcLength`). More...

UnicodeString &	replace (int32_t start, int32_t length, ConstChar16Ptr srcChars, int32_t srcLength)
	Replace the characters in the range [`start`, `start + length`) with the characters in `srcChars`. More...

UnicodeString &	replace (int32_t start, int32_t length, char16_t srcChar)
	Replace the characters in the range [`start`, `start + length`) with the code unit `srcChar`. More...

UnicodeString &	replace (int32_t start, int32_t length, UChar32 srcChar)
	Replace the characters in the range [`start`, `start + length`) with the code point `srcChar`. More...

UnicodeString &	replaceBetween (int32_t start, int32_t limit, const UnicodeString &srcText)
	Replace the characters in the range [`start`, `limit`) with the characters in `srcText`. More...

UnicodeString &	replaceBetween (int32_t start, int32_t limit, const UnicodeString &srcText, int32_t srcStart, int32_t srcLimit)
	Replace the characters in the range [`start`, `limit`) with the characters in `srcText` in the range [`srcStart`, `srcLimit`). More...

virtual void	handleReplaceBetween (int32_t start, int32_t limit, const UnicodeString &text) override
	Replace a substring of this object with the given text. More...

virtual UBool	hasMetaData () const override
	Replaceable API. More...

virtual void	copy (int32_t start, int32_t limit, int32_t dest) override
	Copy a substring of this object, retaining attribute (out-of-band) information. More...

UnicodeString &	findAndReplace (const UnicodeString &oldText, const UnicodeString &newText)
	Replace all occurrences of characters in oldText with the characters in newText. More...

UnicodeString &	findAndReplace (int32_t start, int32_t length, const UnicodeString &oldText, const UnicodeString &newText)
	Replace all occurrences of characters in oldText with characters in newText in the range [`start`, `start + length`). More...

UnicodeString &	findAndReplace (int32_t start, int32_t length, const UnicodeString &oldText, int32_t oldStart, int32_t oldLength, const UnicodeString &newText, int32_t newStart, int32_t newLength)
	Replace all occurrences of characters in oldText in the range [`oldStart`, `oldStart + oldLength`) with the characters in newText in the range [`newStart`, `newStart + newLength`) in the range [`start`, `start + length`). More...

UnicodeString &	remove ()
	Removes all characters from the UnicodeString object and clears the bogus flag. More...

UnicodeString &	remove (int32_t start, int32_t length=static_cast< int32_t >(INT32_MAX))
	Remove the characters in the range [`start`, `start + length`) from the UnicodeString object. More...

UnicodeString &	removeBetween (int32_t start, int32_t limit=static_cast< int32_t >(INT32_MAX))
	Remove the characters in the range [`start`, `limit`) from the UnicodeString object. More...

UnicodeString &	retainBetween (int32_t start, int32_t limit=INT32_MAX)
	Retain only the characters in the range [`start`, `limit`) from the UnicodeString object. More...

UBool	padLeading (int32_t targetLength, char16_t padChar=0x0020)
	Pad the start of this UnicodeString with the character `padChar`. More...

UBool	padTrailing (int32_t targetLength, char16_t padChar=0x0020)
	Pad the end of this UnicodeString with the character `padChar`. More...

UBool	truncate (int32_t targetLength)
	Truncate this UnicodeString to the `targetLength`. More...

UnicodeString &	trim ()
	Trims leading and trailing whitespace from this UnicodeString. More...

UnicodeString &	reverse ()
	Reverse this UnicodeString in place. More...

UnicodeString &	reverse (int32_t start, int32_t length)
	Reverse the range [`start`, `start + length`) in this UnicodeString. More...

UnicodeString &	toUpper ()
	Convert the characters in this to UPPER CASE following the conventions of the default locale. More...

UnicodeString &	toUpper (const Locale &locale)
	Convert the characters in this to UPPER CASE following the conventions of a specific locale. More...

UnicodeString &	toLower ()
	Convert the characters in this to lower case following the conventions of the default locale. More...

UnicodeString &	toLower (const Locale &locale)
	Convert the characters in this to lower case following the conventions of a specific locale. More...

UnicodeString &	toTitle (BreakIterator *titleIter)
	Titlecase this string, convenience function using the default locale. More...

UnicodeString &	toTitle (BreakIterator *titleIter, const Locale &locale)
	Titlecase this string. More...

UnicodeString &	toTitle (BreakIterator *titleIter, const Locale &locale, uint32_t options)
	Titlecase this string, with options. More...

UnicodeString &	foldCase (uint32_t options=0)
	Case-folds the characters in this string. More...

char16_t *	getBuffer (int32_t minCapacity)
	Get a read/write pointer to the internal buffer. More...

void	releaseBuffer (int32_t newLength=-1)
	Release a read/write buffer on a UnicodeString object with an "open" getBuffer(minCapacity). More...

const char16_t *	getBuffer () const
	Get a read-only pointer to the internal buffer. More...

const char16_t *	getTerminatedBuffer ()
	Get a read-only pointer to the internal buffer, making sure that it is NUL-terminated. More...

	operator std::u16string_view () const
	Converts to a std::u16string_view. More...

	operator std::wstring_view () const
	Converts to a std::wstring_view. More...

	UnicodeString ()
	Construct an empty UnicodeString. More...

	UnicodeString (int32_t capacity, UChar32 c, int32_t count)
	Construct a UnicodeString with capacity to hold `capacity` char16_ts. More...

UNISTR_FROM_CHAR_EXPLICIT	UnicodeString (char16_t ch)
	Single char16_t (code unit) constructor. More...

UNISTR_FROM_CHAR_EXPLICIT	UnicodeString (UChar32 ch)
	Single UChar32 (code point) constructor. More...

UNISTR_FROM_STRING_EXPLICIT	UnicodeString (const std::nullptr_t text)
	nullptr_t constructor. More...

	UnicodeString (const char16_t *text, int32_t textLength)
	char16_t* constructor. More...

	UnicodeString (const uint16_t *text, int32_t textLength)
	uint16_t * constructor. More...

	UnicodeString (const wchar_t *text, int32_t textLength)
	wchar_t * constructor. More...

	UnicodeString (const std::nullptr_t text, int32_t textLength)
	nullptr_t constructor. More...

template<typename S , typename = std::enable_if_t<ConvertibleToU16StringView<S>>>
UNISTR_FROM_STRING_EXPLICIT	UnicodeString (const S &text)
	Constructor from `text` which is, or which is implicitly convertible to, a std::u16string_view or (if U_SIZEOF_WCHAR_T==2) std::wstring_view. More...

	UnicodeString (UBool isTerminated, ConstChar16Ptr text, int32_t textLength)
	Readonly-aliasing char16_t* constructor. More...

	UnicodeString (char16_t *buffer, int32_t buffLength, int32_t buffCapacity)
	Writable-aliasing char16_t* constructor. More...

	UnicodeString (uint16_t *buffer, int32_t buffLength, int32_t buffCapacity)
	Writable-aliasing uint16_t * constructor. More...

	UnicodeString (wchar_t *buffer, int32_t buffLength, int32_t buffCapacity)
	Writable-aliasing wchar_t * constructor. More...

	UnicodeString (std::nullptr_t buffer, int32_t buffLength, int32_t buffCapacity)
	Writable-aliasing nullptr_t constructor. More...

UNISTR_FROM_STRING_EXPLICIT	UnicodeString (const char *codepageData)
	char* constructor. More...

	UnicodeString (const char *codepageData, int32_t dataLength)
	char* constructor. More...

	UnicodeString (const char codepageData, const char codepage)
	char* constructor. More...

	UnicodeString (const char codepageData, int32_t dataLength, const char codepage)
	char* constructor. More...

	UnicodeString (const char src, int32_t srcLength, UConverter cnv, UErrorCode &errorCode)
	char * / UConverter constructor. More...

	UnicodeString (const char *src, int32_t textLength, enum EInvariant inv)
	Constructs a Unicode string from an invariant-character char * string. More...

	UnicodeString (const UnicodeString &that)
	Copy constructor. More...

	UnicodeString (UnicodeString &&src) noexcept
	Move constructor; might leave src in bogus state. More...

	UnicodeString (const UnicodeString &src, int32_t srcStart)
	'Substring' constructor from tail of source string. More...

	UnicodeString (const UnicodeString &src, int32_t srcStart, int32_t srcLength)
	'Substring' constructor from subrange of source string. More...

virtual UnicodeString *	clone () const override
	Clone this object, an instance of a subclass of Replaceable. More...

virtual	~UnicodeString ()
	Destructor. More...

UnicodeString	unescape () const
	Unescape a string of characters and return a string containing the result. More...

UChar32	unescapeAt (int32_t &offset) const
	Unescape a single escape sequence and return the represented character. More...

virtual UClassID	getDynamicClassID () const override
	ICU "poor man's RTTI", returns a UClassID for the actual class. More...

Public Member Functions inherited from icu::Replaceable
virtual	~Replaceable ()
	Destructor. More...

int32_t	length () const
	Returns the number of 16-bit code units in the text. More...

char16_t	charAt (int32_t offset) const
	Returns the 16-bit code unit at the given offset into the text. More...

UChar32	char32At (int32_t offset) const
	Returns the 32-bit code point at the given 16-bit offset into the text. More...

Public Member Functions inherited from icu::UObject
virtual	~UObject ()
	Destructor. More...

Static Public Member Functions
template<typename S , typename = std::enable_if_t<ConvertibleToU16StringView<S>>>
static UnicodeString	readOnlyAlias (const S &text)
	Readonly-aliasing factory method. More...

static UnicodeString	readOnlyAlias (const UnicodeString &text)
	Readonly-aliasing factory method. More...

static UnicodeString	fromUTF8 (StringPiece utf8)
	Create a UnicodeString from a UTF-8 string. More...

static UnicodeString	fromUTF32 (const UChar32 *utf32, int32_t length)
	Create a UnicodeString from a UTF-32 string. More...

static UClassID	getStaticClassID ()
	ICU "poor man's RTTI", returns a UClassID for this class. More...

Protected Member Functions
virtual int32_t	getLength () const override
	Implement Replaceable::getLength() (see jitterbug 1027). More...

virtual char16_t	getCharAt (int32_t offset) const override
	The change in Replaceable to use virtual getCharAt() allows UnicodeString::charAt() to be inline again (see jitterbug 709). More...

virtual UChar32	getChar32At (int32_t offset) const override
	The change in Replaceable to use virtual getChar32At() allows UnicodeString::char32At() to be inline again (see jitterbug 709). More...

Protected Member Functions inherited from icu::Replaceable
	Replaceable ()
	Default constructor. More...

Friends
class	UnicodeStringAppendable

union	StackBufferOrFields

void	swap (UnicodeString &s1, UnicodeString &s2) noexcept
	Non-member UnicodeString swap function. More...

Detailed Description

UnicodeString is a string class that stores Unicode characters directly and provides similar functionality as the Java String and StringBuffer/StringBuilder classes.

It is a concrete implementation of the abstract class Replaceable (for transliteration).

The UnicodeString equivalent of std::string’s clear() is remove().

Starting with ICU 78, a UnicodeString is a C++ "range" of char16_t code units. utfStringCodePoints() and unsafeUTFStringCodePoints() can be used to iterate over the code points.

A UnicodeString may "alias" an external array of characters (that is, point to it, rather than own the array) whose lifetime must then at least match the lifetime of the aliasing object. This aliasing may be preserved when returning a UnicodeString by value, depending on the compiler and the function implementation, via Return Value Optimization (RVO) or the move assignment operator. (However, the copy assignment operator does not preserve aliasing.) For details see the description of storage models at the end of the class API docs and in the User Guide chapter linked from there.

The UnicodeString class is not suitable for subclassing.

For an overview of Unicode strings in C and C++ see the User Guide Strings chapter.

In ICU, a Unicode string consists of 16-bit Unicode code units. A Unicode character may be stored with either one code unit (the most common case) or with a matched pair of special code units ("surrogates"). The data type for code units is char16_t. For single-character handling, a Unicode character code point is a value in the range 0..0x10ffff. ICU uses the UChar32 type for code points.

Indexes and offsets into and lengths of strings always count code units, not code points. This is the same as with multi-byte char* strings in traditional string handling. Operations on partial strings typically do not test for code point boundaries. If necessary, the user needs to take care of such boundaries by testing for the code unit values or by using functions like UnicodeString::getChar32Start() and UnicodeString::getChar32Limit() (or, in C, the equivalent macros U16_SET_CP_START() and U16_SET_CP_LIMIT(), see utf.h).

UnicodeString methods are more lenient with regard to input parameter values than other ICU APIs. In particular:

If indexes are out of bounds for a UnicodeString object (< 0 or > length()) then they are "pinned" to the nearest boundary.
If the buffer passed to an insert/append/replace operation is owned by the target object, e.g., calling str.append(str), an extra copy may take place to ensure safety.
If primitive string pointer values (e.g., const char16_t * or char *) for input strings are nullptr, then those input string parameters are treated as if they pointed to an empty string. However, this is not the case for char * parameters for charset names or other IDs.
Most UnicodeString methods do not take a UErrorCode parameter because there are usually very few opportunities for failure other than a shortage of memory, error codes in low-level C++ string methods would be inconvenient, and the error code as the last parameter (ICU convention) would prevent the use of default parameter values. Instead, such methods set the UnicodeString into a "bogus" state (see isBogus()) if an error occurs.

In string comparisons, two UnicodeString objects that are both "bogus" compare equal (to be transitive and prevent endless loops in sorting), and a "bogus" string compares less than any non-"bogus" one.

Const UnicodeString methods are thread-safe. Multiple threads can use const methods on the same UnicodeString object simultaneously, but non-const methods must not be called concurrently (in multiple threads) with any other (const or non-const) methods.

Similarly, const UnicodeString & parameters are thread-safe. One object may be passed in as such a parameter concurrently in multiple threads. This includes the const UnicodeString & parameters for copy construction, assignment, and cloning.

UnicodeString uses several storage methods. String contents can be stored inside the UnicodeString object itself, in an allocated and shared buffer, or in an outside buffer that is "aliased". Most of this is done transparently, but careful aliasing in particular provides significant performance improvements. Also, the internal buffer is accessible via special functions. For details see the User Guide Strings chapter.

See also: utf.h; utfiterator.h; utfStringCodePoints; unsafeUTFStringCodePoints; CharacterIterator

Stable:: ICU 2.0

Definition at line 302 of file unistr.h.

Member Typedef Documentation

◆ value_type

using icu::UnicodeString::value_type = char16_t

C++ boilerplate.

Internal:: Do not use.

This API is for internal use only.

Definition at line 306 of file unistr.h.

Member Enumeration Documentation

◆ EInvariant

enum icu::UnicodeString::EInvariant

Constant to be used in the UnicodeString(char *, int32_t, EInvariant) constructor which constructs a Unicode string from an invariant-character char * string.

Use the macro US_INV instead of the full qualification for this value.

See also: US_INV

Stable:: ICU 3.2

Enumerator

kInvariant

See also: EInvariant

Stable:: ICU 3.2

Definition at line 316 of file unistr.h.

Constructor & Destructor Documentation

◆ UnicodeString() [1/25]

icu::UnicodeString::UnicodeString ( )

inline

Construct an empty UnicodeString.

Stable:: ICU 2.0

Definition at line 4241 of file unistr.h.

◆ UnicodeString() [2/25]

icu::UnicodeString::UnicodeString	(	int32_t	capacity,
		UChar32	c,
		int32_t	count
	)

Construct a UnicodeString with capacity to hold capacity char16_ts.

Parameters

capacity	the number of char16_ts this UnicodeString should hold before a resize is necessary; if count is greater than 0 and count code points c take up more space than capacity, then capacity is adjusted accordingly.
c	is used to initially fill the string
count	specifies how many code points c are to be written in the string

Stable:: ICU 2.0

◆ UnicodeString() [3/25]

UNISTR_FROM_CHAR_EXPLICIT icu::UnicodeString::UnicodeString ( char16_t ch )

Single char16_t (code unit) constructor.

It is recommended to mark this constructor "explicit" by -DUNISTR_FROM_CHAR_EXPLICIT=explicit on the compiler command line or similar.

Parameters

ch	the character to place in the UnicodeString

Stable:: ICU 2.0

◆ UnicodeString() [4/25]

UNISTR_FROM_CHAR_EXPLICIT icu::UnicodeString::UnicodeString ( UChar32 ch )

Single UChar32 (code point) constructor.

It is recommended to mark this constructor "explicit" by -DUNISTR_FROM_CHAR_EXPLICIT=explicit on the compiler command line or similar.

Parameters

ch	the character to place in the UnicodeString

Stable:: ICU 2.0

◆ UnicodeString() [5/25]

icu::UnicodeString::UnicodeString ( const std::nullptr_t text )

inline

nullptr_t constructor.

Effectively the same as the default constructor, makes an empty string object.

It is recommended to mark this constructor "explicit" by -DUNISTR_FROM_STRING_EXPLICIT=explicit on the compiler command line or similar.

Parameters

text nullptr

Stable:: ICU 59

Definition at line 4245 of file unistr.h.

◆ UnicodeString() [6/25]

icu::UnicodeString::UnicodeString	(	const char16_t *	text,
		int32_t	textLength
	)

char16_t* constructor.

Note, for string literals: Since C++17 and ICU 76, you can use UTF-16 string literals with compile-time length determination:

UnicodeString str(u"literal");

if (str == u"other literal") { ... }

icu::UnicodeString::UnicodeString

UnicodeString()

Construct an empty UnicodeString.

Definition: unistr.h:4241

Parameters

text	The characters to place in the UnicodeString.
textLength	The number of Unicode characters in `text` to copy.

Stable:: ICU 2.0

◆ UnicodeString() [7/25]

icu::UnicodeString::UnicodeString	(	const uint16_t *	text,
		int32_t	textLength
	)

inline

uint16_t * constructor.

Delegates to UnicodeString(const char16_t *, int32_t).

Note, for string literals: Since C++17 and ICU 76, you can use UTF-16 string literals with compile-time length determination:

UnicodeString str(u"literal");

if (str == u"other literal") { ... }

Parameters

text	UTF-16 string
textLength	string length

Stable:: ICU 59

Definition at line 3290 of file unistr.h.

◆ UnicodeString() [8/25]

icu::UnicodeString::UnicodeString	(	const wchar_t *	text,
		int32_t	textLength
	)

inline

wchar_t * constructor.

(Only defined if U_SIZEOF_WCHAR_T==2.) Delegates to UnicodeString(const char16_t *, int32_t).

Note, for string literals: Since C++17 and ICU 76, you can use UTF-16 string literals with compile-time length determination:

UnicodeString str(u"literal");

if (str == u"other literal") { ... }

Parameters

text	UTF-16 string
textLength	string length

Stable:: ICU 59

Definition at line 3312 of file unistr.h.

◆ UnicodeString() [9/25]

icu::UnicodeString::UnicodeString	(	const std::nullptr_t	text,
		int32_t	textLength
	)

inline

nullptr_t constructor.

Effectively the same as the default constructor, makes an empty string object.

Parameters

text	nullptr
textLength	ignored

Stable:: ICU 59

Definition at line 4249 of file unistr.h.

◆ UnicodeString() [10/25]

template<typename S , typename = std::enable_if_t<ConvertibleToU16StringView<S>>>

UNISTR_FROM_STRING_EXPLICIT icu::UnicodeString::UnicodeString ( const S & text )

inline

Constructor from text which is, or which is implicitly convertible to, a std::u16string_view or (if U_SIZEOF_WCHAR_T==2) std::wstring_view.

The string is bogus if the string view is too long.

If you need a UnicodeString but need not copy the string view contents, then you can call the UnicodeString::readOnlyAlias() function instead of this constructor.

Parameters

text	UTF-16 string

Stable:: ICU 76

Definition at line 3338 of file unistr.h.

◆ UnicodeString() [11/25]

icu::UnicodeString::UnicodeString	(	UBool	isTerminated,
		ConstChar16Ptr	text,
		int32_t	textLength
	)

Readonly-aliasing char16_t* constructor.

The text will be used for the UnicodeString object, but it will not be released when the UnicodeString is destroyed. This has copy-on-write semantics: When the string is modified, then the buffer is first copied into newly allocated memory. The aliased buffer is never modified.

In an assignment to another UnicodeString, when using the copy constructor or the assignment operator, the text will be copied. When using fastCopyFrom(), the text will be aliased again, so that both strings then alias the same readonly-text.

Note, for string literals: Since C++17 and ICU 76, you can use UTF-16 string literals with compile-time length determination:

UnicodeString alias = UnicodeString::readOnlyAlias(u"literal");

if (str == u"other literal") { ... }

icu::UnicodeString::readOnlyAlias

static UnicodeString readOnlyAlias(const S &text)

Readonly-aliasing factory method.

Definition: unistr.h:3662

Parameters

isTerminated	specifies if `text` is `NUL`-terminated. This must be true if `textLength==-1`.
text	The characters to alias for the UnicodeString.
textLength	The number of Unicode characters in `text` to alias. If -1, then this constructor will determine the length by calling `u_strlen()`.

Stable:: ICU 2.0

◆ UnicodeString() [12/25]

icu::UnicodeString::UnicodeString	(	char16_t *	buffer,
		int32_t	buffLength,
		int32_t	buffCapacity
	)

Writable-aliasing char16_t* constructor.

The text will be used for the UnicodeString object, but it will not be released when the UnicodeString is destroyed. This has write-through semantics: For as long as the capacity of the buffer is sufficient, write operations will directly affect the buffer. When more capacity is necessary, then a new buffer will be allocated and the contents copied as with regularly constructed strings. In an assignment to another UnicodeString, the buffer will be copied. The extract(Char16Ptr dst) function detects whether the dst pointer is the same as the string buffer itself and will in this case not copy the contents.

Parameters

buffer	The characters to alias for the UnicodeString.
buffLength	The number of Unicode characters in `buffer` to alias.
buffCapacity	The size of `buffer` in char16_ts.

Stable:: ICU 2.0

◆ UnicodeString() [13/25]

icu::UnicodeString::UnicodeString	(	uint16_t *	buffer,
		int32_t	buffLength,
		int32_t	buffCapacity
	)

inline

Writable-aliasing uint16_t * constructor.

Delegates to UnicodeString(const char16_t *, int32_t, int32_t).

Parameters

buffer	writable buffer of/for UTF-16 text
buffLength	length of the current buffer contents
buffCapacity	buffer capacity

Stable:: ICU 59

Definition at line 3406 of file unistr.h.

◆ UnicodeString() [14/25]

icu::UnicodeString::UnicodeString	(	wchar_t *	buffer,
		int32_t	buffLength,
		int32_t	buffCapacity
	)

inline

Writable-aliasing wchar_t * constructor.

(Only defined if U_SIZEOF_WCHAR_T==2.) Delegates to UnicodeString(const char16_t *, int32_t, int32_t).

Parameters

buffer	writable buffer of/for UTF-16 text
buffLength	length of the current buffer contents
buffCapacity	buffer capacity

Stable:: ICU 59

Definition at line 3420 of file unistr.h.

◆ UnicodeString() [15/25]

icu::UnicodeString::UnicodeString	(	std::nullptr_t	buffer,
		int32_t	buffLength,
		int32_t	buffCapacity
	)

inline

Writable-aliasing nullptr_t constructor.

Effectively the same as the default constructor, makes an empty string object.

Parameters

buffer	nullptr
buffLength	ignored
buffCapacity	ignored

Stable:: ICU 59

Definition at line 4253 of file unistr.h.

◆ UnicodeString() [16/25]

UNISTR_FROM_STRING_EXPLICIT icu::UnicodeString::UnicodeString ( const char * codepageData )

char* constructor.

Uses the default converter (and thus depends on the ICU conversion code) unless U_CHARSET_IS_UTF8 is set to 1.

For ASCII (really "invariant character") strings it is more efficient to use the constructor that takes a US_INV (for its enum EInvariant).

Note, for string literals: Since C++17 and ICU 76, you can use UTF-16 string literals with compile-time length determination:

UnicodeString str(u"literal");

if (str == u"other literal") { ... }

It is recommended to mark this constructor "explicit" by -DUNISTR_FROM_STRING_EXPLICIT=explicit on the compiler command line or similar.

Parameters

codepageData an array of bytes, null-terminated, in the platform's default codepage.

Stable:: ICU 2.0

◆ UnicodeString() [17/25]

icu::UnicodeString::UnicodeString	(	const char *	codepageData,
		int32_t	dataLength
	)

char* constructor.

Uses the default converter (and thus depends on the ICU conversion code) unless U_CHARSET_IS_UTF8 is set to 1.

Parameters

codepageData	an array of bytes in the platform's default codepage.
dataLength	The number of bytes in `codepageData`.

Stable:: ICU 2.0

◆ UnicodeString() [18/25]

icu::UnicodeString::UnicodeString	(	const char *	codepageData,
		const char *	codepage
	)

char* constructor.

Parameters

codepageData	an array of bytes, null-terminated
codepage	the encoding of `codepageData`. The special value 0 for `codepage` indicates that the text is in the platform's default codepage.

If codepage is an empty string (""), then a simple conversion is performed on the codepage-invariant subset ("invariant characters") of the platform encoding. See utypes.h. Recommendation: For invariant-character strings use the constructor UnicodeString(const char *src, int32_t length, enum EInvariant inv) because it avoids object code dependencies of UnicodeString on the conversion code.

Stable:: ICU 2.0

◆ UnicodeString() [19/25]

icu::UnicodeString::UnicodeString	(	const char *	codepageData,
		int32_t	dataLength,
		const char *	codepage
	)

char* constructor.

Parameters

codepageData	an array of bytes.
dataLength	The number of bytes in `codepageData`.
codepage	the encoding of `codepageData`. The special value 0 for `codepage` indicates that the text is in the platform's default codepage. If `codepage` is an empty string (`""`), then a simple conversion is performed on the codepage-invariant subset ("invariant characters") of the platform encoding. See utypes.h. Recommendation: For invariant-character strings use the constructor UnicodeString(const char *src, int32_t length, enum EInvariant inv) because it avoids object code dependencies of UnicodeString on the conversion code.

Stable:: ICU 2.0

◆ UnicodeString() [20/25]

icu::UnicodeString::UnicodeString	(	const char *	src,
		int32_t	srcLength,
		UConverter *	cnv,
		UErrorCode &	errorCode
	)

char * / UConverter constructor.

This constructor uses an existing UConverter object to convert the codepage string to Unicode and construct a UnicodeString from that.

The converter is reset at first. If the error code indicates a failure before this constructor is called, or if an error occurs during conversion or construction, then the string will be bogus.

This function avoids the overhead of opening and closing a converter if multiple strings are constructed.

Parameters

src	input codepage string
srcLength	length of the input string, can be -1 for NUL-terminated strings
cnv	converter object (ucnv_resetToUnicode() will be called), can be nullptr for the default converter
errorCode	normal ICU error code

Stable:: ICU 2.0

◆ UnicodeString() [21/25]

icu::UnicodeString::UnicodeString	(	const char *	src,
		int32_t	textLength,
		enum EInvariant	inv
	)

Constructs a Unicode string from an invariant-character char * string.

About invariant characters see utypes.h. This constructor has no runtime dependency on conversion code and is therefore recommended over ones taking a charset name string (where the empty string "" indicates invariant-character conversion).

Use the macro US_INV as the third, signature-distinguishing parameter.

For example:

void fn(const char *s) {
  UnicodeString ustr(s, -1, US_INV);
  // use ustr ...
}

Note, for string literals: Since C++17 and ICU 76, you can use UTF-16 string literals with compile-time length determination:

UnicodeString str(u"literal");

if (str == u"other literal") { ... }

Parameters

src	String using only invariant characters.
textLength	Length of src, or -1 if NUL-terminated.
inv	Signature-distinguishing parameter, use US_INV.

See also: US_INV

Stable:: ICU 3.2

◆ UnicodeString() [22/25]

icu::UnicodeString::UnicodeString ( const UnicodeString & that )

Copy constructor.

Starting with ICU 2.4, the assignment operator and the copy constructor allocate a new buffer and copy the buffer contents even for readonly aliases. By contrast, the fastCopyFrom() function implements the old, more efficient but less safe behavior of making this string also a readonly alias to the same buffer.

If the source object has an "open" buffer from getBuffer(minCapacity), then the copy is an empty string.

Parameters

that	The UnicodeString object to copy.

Stable:: ICU 2.0

See also: fastCopyFrom

◆ UnicodeString() [23/25]

icu::UnicodeString::UnicodeString ( UnicodeString && src )

noexcept

Move constructor; might leave src in bogus state.

This string will have the same contents and state that the source string had.

Parameters

src	source string

Stable:: ICU 56

◆ UnicodeString() [24/25]

icu::UnicodeString::UnicodeString	(	const UnicodeString &	src,
		int32_t	srcStart
	)

'Substring' constructor from tail of source string.

Parameters

src	The UnicodeString object to copy.
srcStart	The offset into `src` at which to start copying.

Stable:: ICU 2.2

◆ UnicodeString() [25/25]

icu::UnicodeString::UnicodeString	(	const UnicodeString &	src,
		int32_t	srcStart,
		int32_t	srcLength
	)

'Substring' constructor from subrange of source string.

Parameters

src	The UnicodeString object to copy.
srcStart	The offset into `src` at which to start copying.
srcLength	The number of characters from `src` to copy.

Stable:: ICU 2.2

◆ ~UnicodeString()

virtual icu::UnicodeString::~UnicodeString ( )

virtual

Destructor.

Stable:: ICU 2.0

Member Function Documentation

◆ append() [1/7]

UnicodeString & icu::UnicodeString::append ( char16_t srcChar )

inline

Append the code unit srcChar to the UnicodeString object.

Parameters

srcChar the code unit to append

Returns: a reference to this

Stable:: ICU 2.0

Definition at line 5040 of file unistr.h.

◆ append() [2/7]

UnicodeString & icu::UnicodeString::append	(	const char16_t *	srcChars,
		int32_t	srcStart,
		int32_t	srcLength
	)

inline

Append the characters in srcChars in the range [srcStart, srcStart + srcLength) to the UnicodeString object at offset start.

srcChars is not modified.

Parameters

srcChars	the source for the new characters
srcStart	the offset into `srcChars` where new characters will be obtained
srcLength	the number of characters in `srcChars` in the append string; can be -1 if `srcChars` is NUL-terminated

Returns: a reference to this

Stable:: ICU 2.0

Definition at line 5029 of file unistr.h.

◆ append() [3/7]

template<typename S , typename = std::enable_if_t<ConvertibleToU16StringView<S>>>

UnicodeString& icu::UnicodeString::append ( const S & src )

inline

Appends the characters in src which is, or which is implicitly convertible to, a std::u16string_view or (if U_SIZEOF_WCHAR_T==2) std::wstring_view, to the UnicodeString object.

Parameters

src	the source for the new characters

Returns: a reference to this

Stable:: ICU 76

Definition at line 2358 of file unistr.h.

◆ append() [4/7]

UnicodeString & icu::UnicodeString::append ( const UnicodeString & srcText )

inline

Append the characters in srcText to the UnicodeString object.

srcText is not modified.

Parameters

srcText the source for the new characters

Returns: a reference to this

Stable:: ICU 2.0

Definition at line 5025 of file unistr.h.

References length().

◆ append() [5/7]

UnicodeString & icu::UnicodeString::append	(	const UnicodeString &	srcText,
		int32_t	srcStart,
		int32_t	srcLength
	)

inline

Append the characters in srcText in the range [srcStart, srcStart + srcLength) to the UnicodeString object at offset start.

srcText is not modified.

Parameters

srcText	the source for the new characters
srcStart	the offset into `srcText` where new characters will be obtained
srcLength	the number of characters in `srcText` in the append string

Returns: a reference to this

Stable:: ICU 2.0

Definition at line 5019 of file unistr.h.

Referenced by icu::Transliterator::setID().

◆ append() [6/7]

UnicodeString & icu::UnicodeString::append	(	ConstChar16Ptr	srcChars,
		int32_t	srcLength
	)

inline

Append the characters in srcChars to the UnicodeString object.

srcChars is not modified.

Parameters

srcChars	the source for the new characters
srcLength	the number of Unicode characters in `srcChars`; can be -1 if `srcChars` is NUL-terminated

Returns: a reference to this

Stable:: ICU 2.0

Definition at line 5035 of file unistr.h.

◆ append() [7/7]

UnicodeString& icu::UnicodeString::append ( UChar32 srcChar )

Append the code point srcChar to the UnicodeString object.

Parameters

srcChar the code point to append

Returns: a reference to this

Stable:: ICU 2.0

◆ begin()

unspecified_iterator icu::UnicodeString::begin ( ) const

inline

Returns: an iterator to the first code unit in this string. The iterator may be a pointer or a contiguous-iterator object.

Draft:: This API may be changed in the future versions and was introduced in ICU 78

Definition at line 1937 of file unistr.h.

◆ caseCompare() [1/6]

int8_t icu::UnicodeString::caseCompare	(	const UnicodeString &	text,
		uint32_t	options
	)		const

inline

Compare two strings case-insensitively using full case folding.

This is equivalent to this->foldCase(options).compare(text.foldCase(options)).

Parameters

text	Another string to compare this one to.
options	A bit set of options: U_FOLD_CASE_DEFAULT or 0 is used for default options: Comparison in code unit order with default case folding. U_COMPARE_CODE_POINT_ORDER Set to choose code point order instead of code unit order (see u_strCompare for details). U_FOLD_CASE_EXCLUDE_SPECIAL_I

Returns: A negative, zero, or positive integer indicating the comparison result.

Stable:: ICU 2.0

Definition at line 4502 of file unistr.h.

References length().

◆ caseCompare() [2/6]

int8_t icu::UnicodeString::caseCompare	(	ConstChar16Ptr	srcChars,
		int32_t	srcLength,
		uint32_t	options
	)		const

inline

Compare two strings case-insensitively using full case folding.

This is equivalent to this->foldCase(options).compare(srcChars.foldCase(options)).

Parameters

srcChars	A pointer to another string to compare this one to.
srcLength	The number of code units from that string to compare.
options	A bit set of options: U_FOLD_CASE_DEFAULT or 0 is used for default options: Comparison in code unit order with default case folding. U_COMPARE_CODE_POINT_ORDER Set to choose code point order instead of code unit order (see u_strCompare for details). U_FOLD_CASE_EXCLUDE_SPECIAL_I

Returns: A negative, zero, or positive integer indicating the comparison result.

Stable:: ICU 2.0

Definition at line 4515 of file unistr.h.

◆ caseCompare() [3/6]

int8_t icu::UnicodeString::caseCompare	(	int32_t	start,
		int32_t	length,
		const char16_t *	srcChars,
		int32_t	srcStart,
		int32_t	srcLength,
		uint32_t	options
	)		const

inline

Compare two strings case-insensitively using full case folding.

This is equivalent to this->foldCase(options).compare(srcChars.foldCase(options)).

Parameters

start	The start offset in this string at which the compare operation begins.
length	The number of code units from this string to compare.
srcChars	A pointer to another string to compare this one to.
srcStart	The start offset in that string at which the compare operation begins.
srcLength	The number of code units from that string to compare.
options	A bit set of options: U_FOLD_CASE_DEFAULT or 0 is used for default options: Comparison in code unit order with default case folding. U_COMPARE_CODE_POINT_ORDER Set to choose code point order instead of code unit order (see u_strCompare for details). U_FOLD_CASE_EXCLUDE_SPECIAL_I

Returns: A negative, zero, or positive integer indicating the comparison result.

Stable:: ICU 2.0

Definition at line 4540 of file unistr.h.

◆ caseCompare() [4/6]

int8_t icu::UnicodeString::caseCompare	(	int32_t	start,
		int32_t	length,
		const char16_t *	srcChars,
		uint32_t	options
	)		const

inline

Compare two strings case-insensitively using full case folding.

This is equivalent to this->foldCase(options).compare(srcChars.foldCase(options)).

Parameters

start	The start offset in this string at which the compare operation begins.
length	The number of code units from this string to compare.
srcChars	A pointer to another string to compare this one to.
options	A bit set of options: U_FOLD_CASE_DEFAULT or 0 is used for default options: Comparison in code unit order with default case folding. U_COMPARE_CODE_POINT_ORDER Set to choose code point order instead of code unit order (see u_strCompare for details). U_FOLD_CASE_EXCLUDE_SPECIAL_I

Returns: A negative, zero, or positive integer indicating the comparison result.

Stable:: ICU 2.0

Definition at line 4532 of file unistr.h.

◆ caseCompare() [5/6]

int8_t icu::UnicodeString::caseCompare	(	int32_t	start,
		int32_t	length,
		const UnicodeString &	srcText,
		int32_t	srcStart,
		int32_t	srcLength,
		uint32_t	options
	)		const

inline

Compare two strings case-insensitively using full case folding.

This is equivalent to this->foldCase(options).compare(srcText.foldCase(options)).

Parameters

start	The start offset in this string at which the compare operation begins.
length	The number of code units from this string to compare.
srcText	Another string to compare this one to.
srcStart	The start offset in that string at which the compare operation begins.
srcLength	The number of code units from that string to compare.
options	A bit set of options: U_FOLD_CASE_DEFAULT or 0 is used for default options: Comparison in code unit order with default case folding. U_COMPARE_CODE_POINT_ORDER Set to choose code point order instead of code unit order (see u_strCompare for details). U_FOLD_CASE_EXCLUDE_SPECIAL_I

Returns: A negative, zero, or positive integer indicating the comparison result.

Stable:: ICU 2.0

Definition at line 4522 of file unistr.h.

◆ caseCompare() [6/6]

int8_t icu::UnicodeString::caseCompare	(	int32_t	start,
		int32_t	length,
		const UnicodeString &	srcText,
		uint32_t	options
	)		const

inline

Compare two strings case-insensitively using full case folding.

This is equivalent to this->foldCase(options).compare(srcText.foldCase(options)).

Parameters

start	The start offset in this string at which the compare operation begins.
length	The number of code units from this string to compare.
srcText	Another string to compare this one to.
options	A bit set of options: U_FOLD_CASE_DEFAULT or 0 is used for default options: Comparison in code unit order with default case folding. U_COMPARE_CODE_POINT_ORDER Set to choose code point order instead of code unit order (see u_strCompare for details). U_FOLD_CASE_EXCLUDE_SPECIAL_I

Returns: A negative, zero, or positive integer indicating the comparison result.

Stable:: ICU 2.0

Definition at line 4507 of file unistr.h.

References length().

◆ caseCompareBetween()

int8_t icu::UnicodeString::caseCompareBetween	(	int32_t	start,
		int32_t	limit,
		const UnicodeString &	srcText,
		int32_t	srcStart,
		int32_t	srcLimit,
		uint32_t	options
	)		const

inline

Compare two strings case-insensitively using full case folding.

This is equivalent to this->foldCase(options).compareBetween(text.foldCase(options)).

Parameters

start	The start offset in this string at which the compare operation begins.
limit	The offset after the last code unit from this string to compare.
srcText	Another string to compare this one to.
srcStart	The start offset in that string at which the compare operation begins.
srcLimit	The offset after the last code unit from that string to compare.
options	A bit set of options: U_FOLD_CASE_DEFAULT or 0 is used for default options: Comparison in code unit order with default case folding. U_COMPARE_CODE_POINT_ORDER Set to choose code point order instead of code unit order (see u_strCompare for details). U_FOLD_CASE_EXCLUDE_SPECIAL_I

Returns: A negative, zero, or positive integer indicating the comparison result.

Stable:: ICU 2.0

Definition at line 4550 of file unistr.h.

◆ char32At()

UChar32 icu::UnicodeString::char32At ( int32_t offset ) const

Return the code point that contains the code unit at offset offset.

If the offset is not valid (0..length()-1) then U+ffff is returned.

Parameters

offset a valid offset into the text that indicates the text offset of any of the code units that will be assembled into a code point (21-bit value) and returned

Returns: the code point of text at offset or 0xffff if the offset is not valid for this string

Stable:: ICU 2.0

Referenced by icu::DecimalFormatSymbols::setSymbol().

◆ charAt()

char16_t icu::UnicodeString::charAt ( int32_t offset ) const

inline

Return the code unit at offset offset.

If the offset is not valid (0..length()-1) then U+ffff is returned.

Parameters

offset a valid offset into the text

Returns: the code unit at offset offset or 0xffff if the offset is not valid for this string

Stable:: ICU 2.0

Definition at line 4914 of file unistr.h.

◆ clone()

virtual UnicodeString* icu::UnicodeString::clone ( ) const

overridevirtual

Clone this object, an instance of a subclass of Replaceable.

Clones can be used concurrently in multiple threads. If a subclass does not implement clone(), or if an error occurs, then nullptr is returned. The caller must delete the clone.

Returns: a clone of this object

See also: Replaceable::clone; getDynamicClassID

Stable:: ICU 2.6

Reimplemented from icu::Replaceable.

◆ compare() [1/6]

int8_t icu::UnicodeString::compare ( const UnicodeString & text ) const

inline

Compare the characters bitwise in this UnicodeString to the characters in text.

Parameters

text	The UnicodeString to compare to this one.

Returns: The result of bitwise character comparison: 0 if this contains the same characters as text, -1 if the characters in this are bitwise less than the characters in text, +1 if the characters in this are bitwise greater than the characters in text.

Stable:: ICU 2.0

Definition at line 4379 of file unistr.h.

References length().

◆ compare() [2/6]

int8_t icu::UnicodeString::compare	(	ConstChar16Ptr	srcChars,
		int32_t	srcLength
	)		const

inline

Compare the characters bitwise in this UnicodeString with the first srcLength characters in srcChars.

Parameters

srcChars	The characters to compare to this UnicodeString.
srcLength	the number of characters in `srcChars` to compare

Returns: The result of bitwise character comparison: 0 if this contains the same characters as srcChars, -1 if the characters in this are bitwise less than the characters in srcChars, +1 if the characters in this are bitwise greater than the characters in srcChars.

Stable:: ICU 2.0

Definition at line 4389 of file unistr.h.

◆ compare() [3/6]

int8_t icu::UnicodeString::compare	(	int32_t	start,
		int32_t	length,
		const char16_t *	srcChars
	)		const

inline

Compare the characters bitwise in the range [start, start + length) with the first length characters in srcChars

Parameters

start	the offset at which the compare operation begins
length	the number of characters to compare.
srcChars	the characters to be compared

Returns: The result of bitwise character comparison: 0 if this contains the same characters as srcChars, -1 if the characters in this are bitwise less than the characters in srcChars, +1 if the characters in this are bitwise greater than the characters in srcChars.

Stable:: ICU 2.0

Definition at line 4402 of file unistr.h.

◆ compare() [4/6]

int8_t icu::UnicodeString::compare	(	int32_t	start,
		int32_t	length,
		const char16_t *	srcChars,
		int32_t	srcStart,
		int32_t	srcLength
	)		const

inline

Compare the characters bitwise in the range [start, start + length) with the characters in srcChars in the range [srcStart, srcStart + srcLength).

Parameters

start	the offset at which the compare operation begins
length	the number of characters in this to compare
srcChars	the characters to be compared
srcStart	the offset into `srcChars` to start comparison
srcLength	the number of characters in `srcChars` to compare

Returns: The result of bitwise character comparison: 0 if this contains the same characters as srcChars, -1 if the characters in this are bitwise less than the characters in srcChars, +1 if the characters in this are bitwise greater than the characters in srcChars.

Stable:: ICU 2.0

Definition at line 4408 of file unistr.h.

◆ compare() [5/6]

int8_t icu::UnicodeString::compare	(	int32_t	start,
		int32_t	length,
		const UnicodeString &	srcText,
		int32_t	srcStart,
		int32_t	srcLength
	)		const

inline

Compare the characters bitwise in the range [start, start + length) with the characters in srcText in the range [srcStart, srcStart + srcLength).

Parameters

start	the offset at which the compare operation begins
length	the number of characters in this to compare.
srcText	the text to be compared
srcStart	the offset into `srcText` to start comparison
srcLength	the number of characters in `src` to compare

Returns: The result of bitwise character comparison: 0 if this contains the same characters as srcText, -1 if the characters in this are bitwise less than the characters in srcText, +1 if the characters in this are bitwise greater than the characters in srcText.

Stable:: ICU 2.0

Definition at line 4394 of file unistr.h.

◆ compare() [6/6]

int8_t icu::UnicodeString::compare	(	int32_t	start,
		int32_t	length,
		const UnicodeString &	text
	)		const

inline

Compare the characters bitwise in the range [start, start + length) with the characters in the entire string text.

(The parameters "start" and "length" are not applied to the other text "text".)

Parameters

start	the offset at which the compare operation begins
length	the number of characters of text to compare.
text	the other text to be compared against this string.

Returns: The result of bitwise character comparison: 0 if this contains the same characters as text, -1 if the characters in this are bitwise less than the characters in text, +1 if the characters in this are bitwise greater than the characters in text.

Stable:: ICU 2.0

Definition at line 4383 of file unistr.h.

References length().

◆ compareBetween()

int8_t icu::UnicodeString::compareBetween	(	int32_t	start,
		int32_t	limit,
		const UnicodeString &	srcText,
		int32_t	srcStart,
		int32_t	srcLimit
	)		const

inline

Compare the characters bitwise in the range [start, limit) with the characters in srcText in the range [srcStart, srcLimit).

Parameters

start	the offset at which the compare operation begins
limit	the offset immediately following the compare operation
srcText	the text to be compared
srcStart	the offset into `srcText` to start comparison
srcLimit	the offset into `srcText` to limit comparison

Returns: The result of bitwise character comparison: 0 if this contains the same characters as srcText, -1 if the characters in this are bitwise less than the characters in srcText, +1 if the characters in this are bitwise greater than the characters in srcText.

Stable:: ICU 2.0

Definition at line 4416 of file unistr.h.

◆ compareCodePointOrder() [1/6]

int8_t icu::UnicodeString::compareCodePointOrder ( const UnicodeString & text ) const

inline

Compare two Unicode strings in code point order.

The result may be different from the results of compare(), operator<, etc. if supplementary characters are present:

In UTF-16, supplementary characters (with code points U+10000 and above) are stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff, which means that they compare as less than some other BMP characters like U+feff. This function compares Unicode strings in code point order. If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.

Parameters

text	Another string to compare this one to.

Returns: a negative/zero/positive integer corresponding to whether this string is less than/equal to/greater than the second one in code point order

Stable:: ICU 2.0

Definition at line 4440 of file unistr.h.

References length().

◆ compareCodePointOrder() [2/6]

int8_t icu::UnicodeString::compareCodePointOrder	(	ConstChar16Ptr	srcChars,
		int32_t	srcLength
	)		const

inline

Compare two Unicode strings in code point order.

The result may be different from the results of compare(), operator<, etc. if supplementary characters are present:

In UTF-16, supplementary characters (with code points U+10000 and above) are stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff, which means that they compare as less than some other BMP characters like U+feff. This function compares Unicode strings in code point order. If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.

Parameters

srcChars	A pointer to another string to compare this one to.
srcLength	The number of code units from that string to compare.

Returns: a negative/zero/positive integer corresponding to whether this string is less than/equal to/greater than the second one in code point order

Stable:: ICU 2.0

Definition at line 4450 of file unistr.h.

◆ compareCodePointOrder() [3/6]

int8_t icu::UnicodeString::compareCodePointOrder	(	int32_t	start,
		int32_t	length,
		const char16_t *	srcChars
	)		const

inline

Compare two Unicode strings in code point order.

The result may be different from the results of compare(), operator<, etc. if supplementary characters are present:

In UTF-16, supplementary characters (with code points U+10000 and above) are stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff, which means that they compare as less than some other BMP characters like U+feff. This function compares Unicode strings in code point order. If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.

Parameters

start	The start offset in this string at which the compare operation begins.
length	The number of code units from this string to compare.
srcChars	A pointer to another string to compare this one to.

Returns: a negative/zero/positive integer corresponding to whether this string is less than/equal to/greater than the second one in code point order

Stable:: ICU 2.0

Definition at line 4463 of file unistr.h.

◆ compareCodePointOrder() [4/6]

int8_t icu::UnicodeString::compareCodePointOrder	(	int32_t	start,
		int32_t	length,
		const char16_t *	srcChars,
		int32_t	srcStart,
		int32_t	srcLength
	)		const

inline

Compare two Unicode strings in code point order.

The result may be different from the results of compare(), operator<, etc. if supplementary characters are present:

In UTF-16, supplementary characters (with code points U+10000 and above) are stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff, which means that they compare as less than some other BMP characters like U+feff. This function compares Unicode strings in code point order. If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.

Parameters

start	The start offset in this string at which the compare operation begins.
length	The number of code units from this string to compare.
srcChars	A pointer to another string to compare this one to.
srcStart	The start offset in that string at which the compare operation begins.
srcLength	The number of code units from that string to compare.

Returns: a negative/zero/positive integer corresponding to whether this string is less than/equal to/greater than the second one in code point order

Stable:: ICU 2.0

Definition at line 4469 of file unistr.h.

◆ compareCodePointOrder() [5/6]

int8_t icu::UnicodeString::compareCodePointOrder	(	int32_t	start,
		int32_t	length,
		const UnicodeString &	srcText
	)		const

inline

Compare two Unicode strings in code point order.

The result may be different from the results of compare(), operator<, etc. if supplementary characters are present:

In UTF-16, supplementary characters (with code points U+10000 and above) are stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff, which means that they compare as less than some other BMP characters like U+feff. This function compares Unicode strings in code point order. If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.

Parameters

start	The start offset in this string at which the compare operation begins.
length	The number of code units from this string to compare.
srcText	Another string to compare this one to.

Returns: a negative/zero/positive integer corresponding to whether this string is less than/equal to/greater than the second one in code point order

Stable:: ICU 2.0

Definition at line 4444 of file unistr.h.

References length().

◆ compareCodePointOrder() [6/6]

int8_t icu::UnicodeString::compareCodePointOrder	(	int32_t	start,
		int32_t	length,
		const UnicodeString &	srcText,
		int32_t	srcStart,
		int32_t	srcLength
	)		const

inline

Compare two Unicode strings in code point order.

The result may be different from the results of compare(), operator<, etc. if supplementary characters are present:

In UTF-16, supplementary characters (with code points U+10000 and above) are stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff, which means that they compare as less than some other BMP characters like U+feff. This function compares Unicode strings in code point order. If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.

Parameters

start	The start offset in this string at which the compare operation begins.
length	The number of code units from this string to compare.
srcText	Another string to compare this one to.
srcStart	The start offset in that string at which the compare operation begins.
srcLength	The number of code units from that string to compare.

Returns: a negative/zero/positive integer corresponding to whether this string is less than/equal to/greater than the second one in code point order

Stable:: ICU 2.0

Definition at line 4455 of file unistr.h.

◆ compareCodePointOrderBetween()

int8_t icu::UnicodeString::compareCodePointOrderBetween	(	int32_t	start,
		int32_t	limit,
		const UnicodeString &	srcText,
		int32_t	srcStart,
		int32_t	srcLimit
	)		const

inline

Compare two Unicode strings in code point order.

The result may be different from the results of compare(), operator<, etc. if supplementary characters are present:

In UTF-16, supplementary characters (with code points U+10000 and above) are stored with pairs of surrogate code units. These have values from 0xd800 to 0xdfff, which means that they compare as less than some other BMP characters like U+feff. This function compares Unicode strings in code point order. If either of the UTF-16 strings is malformed (i.e., it contains unpaired surrogates), then the result is not defined.

Parameters

start	The start offset in this string at which the compare operation begins.
limit	The offset after the last code unit from this string to compare.
srcText	Another string to compare this one to.
srcStart	The start offset in that string at which the compare operation begins.
srcLimit	The offset after the last code unit from that string to compare.

Returns: a negative/zero/positive integer corresponding to whether this string is less than/equal to/greater than the second one in code point order

Stable:: ICU 2.0

Definition at line 4477 of file unistr.h.

◆ copy()

virtual void icu::UnicodeString::copy	(	int32_t	start,
		int32_t	limit,
		int32_t	dest
	)

overridevirtual

Copy a substring of this object, retaining attribute (out-of-band) information.

This method is used to duplicate or reorder substrings. The destination index must not overlap the source range.

Parameters

start	the beginning index, inclusive; `0 <= start <= limit`.
limit	the ending index, exclusive; `start <= limit <= length()`.
dest	the destination index. The characters from `start..limit-1` will be copied to `dest`. Implementations of this method may assume that `dest <= start \|\| dest >= limit`.

Stable:: ICU 2.0

Implements icu::Replaceable.

◆ countChar32()

int32_t icu::UnicodeString::countChar32	(	int32_t	start = `0`,
		int32_t	length = `INT32_MAX`
	)		const

Count Unicode code points in the length char16_t code units of the string.

A code point may occupy either one or two char16_t code units. Counting code points involves reading all code units.

This functions is basically the inverse of moveIndex32().

Parameters

start	the index of the first code unit to check
length	the number of char16_t code units to check

Returns: the number of code points in the specified code units

See also: length

Stable:: ICU 2.0

Referenced by icu::DecimalFormatSymbols::setSymbol().

◆ end()

unspecified_iterator icu::UnicodeString::end ( ) const

inline

Returns: an iterator to just past the last code unit in this string. The iterator may be a pointer or a contiguous-iterator object.

Draft:: This API may be changed in the future versions and was introduced in ICU 78

Definition at line 1943 of file unistr.h.

◆ endsWith() [1/4]

UBool icu::UnicodeString::endsWith	(	const char16_t *	srcChars,
		int32_t	srcStart,
		int32_t	srcLength
	)		const

inline

Determine if this ends with the characters in srcChars in the range [srcStart, srcStart + srcLength).

Parameters

srcChars	The characters to match.
srcStart	the offset into `srcText` to start matching
srcLength	the number of characters in `srcChars` to match

Returns: true if this ends with the characters in srcChars, false otherwise

Stable:: ICU 2.0

Definition at line 4775 of file unistr.h.

References u_strlen().

◆ endsWith() [2/4]

UBool icu::UnicodeString::endsWith	(	const UnicodeString &	srcText,
		int32_t	srcStart,
		int32_t	srcLength
	)		const

inline

Determine if this ends with the characters in srcText in the range [srcStart, srcStart + srcLength).

Parameters

srcText	The text to match.
srcStart	the offset into `srcText` to start matching
srcLength	the number of characters in `srcText` to match

Returns: true if this ends with the characters in text, false otherwise

Stable:: ICU 2.0

Definition at line 4757 of file unistr.h.

◆ endsWith() [3/4]

UBool icu::UnicodeString::endsWith ( const UnicodeString & text ) const

inline

Determine if this ends with the characters in text

Parameters

text	The text to match.

Returns: true if this ends with the characters in text, false otherwise

Stable:: ICU 2.0

Definition at line 4752 of file unistr.h.

References length().

◆ endsWith() [4/4]

UBool icu::UnicodeString::endsWith	(	ConstChar16Ptr	srcChars,
		int32_t	srcLength
	)		const

inline

Determine if this ends with the characters in srcChars

Parameters

srcChars	The characters to match.
srcLength	the number of characters in `srcChars`

Returns: true if this ends with the characters in srcChars, false otherwise

Stable:: ICU 2.0

Definition at line 4766 of file unistr.h.

References u_strlen().

◆ extract() [1/8]

int32_t icu::UnicodeString::extract	(	char *	dest,
		int32_t	destCapacity,
		UConverter *	cnv,
		UErrorCode &	errorCode
	)		const

Convert the UnicodeString into a codepage string using an existing UConverter.

The output string is NUL-terminated if possible.

This function avoids the overhead of opening and closing a converter if multiple strings are extracted.

Parameters

dest	destination string buffer, can be nullptr if destCapacity==0
destCapacity	the number of chars available at dest
cnv	the converter object to be used (ucnv_resetFromUnicode() will be called), or nullptr for the default converter
errorCode	normal ICU error code

Returns: the length of the output string, not counting the terminating NUL; if the length is greater than destCapacity, then the string will not fit and a buffer of the indicated length would need to be passed in

Stable:: ICU 2.0

◆ extract() [2/8]

int32_t icu::UnicodeString::extract	(	Char16Ptr	dest,
		int32_t	destCapacity,
		UErrorCode &	errorCode
	)		const

Copy the contents of the string into dest.

This is a convenience function that checks if there is enough space in dest, extracts the entire string if possible, and NUL-terminates dest if possible.

If the string fits into dest but cannot be NUL-terminated (length()==destCapacity) then the error code is set to U_STRING_NOT_TERMINATED_WARNING. If the string itself does not fit into dest (length()>destCapacity) then the error code is set to U_BUFFER_OVERFLOW_ERROR.

If the string aliases to dest itself as an external buffer, then extract() will not copy the contents.

Parameters

dest	Destination string buffer.
destCapacity	Number of char16_ts available at dest.
errorCode	ICU error code.

Returns: length()

Stable:: ICU 2.0

◆ extract() [3/8]

void icu::UnicodeString::extract	(	int32_t	start,
		int32_t	length,
		Char16Ptr	dst,
		int32_t	dstStart = `0`
	)		const

inline

Copy the characters in the range [start, start + length) into the array dst, beginning at dstStart.

If the string aliases to dst itself as an external buffer, then extract() will not copy the contents.

Parameters

start	offset of first character which will be copied into the array
length	the number of characters to extract
dst	array in which to copy characters. The length of `dst` must be at least (`dstStart + length`).
dstStart	the offset in `dst` where the first character will be extracted

Stable:: ICU 2.0

Definition at line 4861 of file unistr.h.

◆ extract() [4/8]

void icu::UnicodeString::extract	(	int32_t	start,
		int32_t	length,
		UnicodeString &	target
	)		const

inline

Copy the characters in the range [start, start + length) into the UnicodeString target.

Parameters

start	offset of first character which will be copied
length	the number of characters to extract
target	UnicodeString into which to copy characters.

Stable:: ICU 2.0

Definition at line 4868 of file unistr.h.

◆ extract() [5/8]

int32_t icu::UnicodeString::extract	(	int32_t	start,
		int32_t	startLength,
		char *	target,
		const char *	codepage = `nullptr`
	)		const

inline

Copy the characters in the range [start, start + length) into an array of characters in a specified codepage.

The output string is NUL-terminated.

Recommendation: For invariant-character strings use extract(int32_t start, int32_t length, char *target, int32_t targetCapacity, enum EInvariant inv) const because it avoids object code dependencies of UnicodeString on the conversion code.

Parameters

start	offset of first character which will be copied
startLength	the number of characters to extract
target	the target buffer for extraction
codepage	the desired codepage for the characters. 0 has the special meaning of the default codepage If `codepage` is an empty string (`""`), then a simple conversion is performed on the codepage-invariant subset ("invariant characters") of the platform encoding. See utypes.h. If `target` is nullptr, then the number of bytes required for `target` is returned. It is assumed that the target is big enough to fit all of the characters.

Returns: the output string length, not including the terminating NUL

Stable:: ICU 2.0

Definition at line 4876 of file unistr.h.

◆ extract() [6/8]

int32_t icu::UnicodeString::extract	(	int32_t	start,
		int32_t	startLength,
		char *	target,
		int32_t	targetCapacity,
		enum EInvariant	inv
	)		const

Copy the characters in the range [start, start + startLength) into an array of characters.

All characters must be invariant (see utypes.h). Use US_INV as the last, signature-distinguishing parameter.

This function does not write any more than targetCapacity characters but returns the length of the entire output string so that one can allocate a larger buffer and call the function again if necessary. The output string is NUL-terminated if possible.

Parameters

start	offset of first character which will be copied
startLength	the number of characters to extract
target	the target buffer for extraction, can be nullptr if targetLength is 0
targetCapacity	the length of the target buffer
inv	Signature-distinguishing parameter, use US_INV.

Returns: the output string length, not including the terminating NUL

Stable:: ICU 3.2

◆ extract() [7/8]

int32_t icu::UnicodeString::extract	(	int32_t	start,
		int32_t	startLength,
		char *	target,
		uint32_t	targetLength
	)		const

Copy the characters in the range [start, start + length) into an array of characters in the platform's default codepage.

This function does not write any more than targetLength characters but returns the length of the entire output string so that one can allocate a larger buffer and call the function again if necessary. The output string is NUL-terminated if possible.

Parameters

start	offset of first character which will be copied
startLength	the number of characters to extract
target	the target buffer for extraction
targetLength	the length of the target buffer If `target` is nullptr, then the number of bytes required for `target` is returned.

Returns: the output string length, not including the terminating NUL

Stable:: ICU 2.0

◆ extract() [8/8]

int32_t icu::UnicodeString::extract	(	int32_t	start,
		int32_t	startLength,
		char *	target,
		uint32_t	targetLength,
		const char *	codepage
	)		const

Copy the characters in the range [start, start + length) into an array of characters in a specified codepage.

This function does not write any more than targetLength characters but returns the length of the entire output string so that one can allocate a larger buffer and call the function again if necessary. The output string is NUL-terminated if possible.

Recommendation: For invariant-character strings use extract(int32_t start, int32_t length, char *target, int32_t targetCapacity, enum EInvariant inv) const because it avoids object code dependencies of UnicodeString on the conversion code.

Parameters

start	offset of first character which will be copied
startLength	the number of characters to extract
target	the target buffer for extraction
targetLength	the length of the target buffer
codepage	the desired codepage for the characters. 0 has the special meaning of the default codepage If `codepage` is an empty string (`""`), then a simple conversion is performed on the codepage-invariant subset ("invariant characters") of the platform encoding. See utypes.h. If `target` is nullptr, then the number of bytes required for `target` is returned.

Returns: the output string length, not including the terminating NUL

Stable:: ICU 2.0

◆ extractBetween() [1/2]

void icu::UnicodeString::extractBetween	(	int32_t	start,
		int32_t	limit,
		char16_t *	dst,
		int32_t	dstStart = `0`
	)		const

inline

Copy the characters in the range [start, limit) into the array dst, beginning at dstStart.

Parameters

start	offset of first character which will be copied into the array
limit	offset immediately following the last character to be copied
dst	array in which to copy characters. The length of `dst` must be at least (`dstStart + (limit - start)`).
dstStart	the offset in `dst` where the first character will be extracted

Stable:: ICU 2.0

Definition at line 4889 of file unistr.h.

◆ extractBetween() [2/2]

virtual void icu::UnicodeString::extractBetween	(	int32_t	start,
		int32_t	limit,
		UnicodeString &	target
	)		const

overridevirtual

Copy the characters in the range [start, limit) into the UnicodeString target.

Replaceable API.

Parameters

start	offset of first character which will be copied
limit	offset immediately following the last character to be copied
target	UnicodeString into which to copy characters.

Stable:: ICU 2.0

Implements icu::Replaceable.

◆ fastCopyFrom()

UnicodeString& icu::UnicodeString::fastCopyFrom ( const UnicodeString & src )

Almost the same as the assignment operator.

Replace the characters in this UnicodeString with the characters from srcText.

This function works the same as the assignment operator for all strings except for ones that are readonly aliases.

Starting with ICU 2.4, the assignment operator and the copy constructor allocate a new buffer and copy the buffer contents even for readonly aliases. This function implements the old, more efficient but less safe behavior of making this string also a readonly alias to the same buffer.

The fastCopyFrom function must be used only if it is known that the lifetime of this UnicodeString does not exceed the lifetime of the aliased buffer including its contents, for example for strings from resource bundles or aliases to string constants.

If the source object has an "open" buffer from getBuffer(minCapacity), then the copy is an empty string.

Parameters

src	The text containing the characters to replace.

Returns: a reference to this

Stable:: ICU 2.4

◆ findAndReplace() [1/3]

UnicodeString & icu::UnicodeString::findAndReplace	(	const UnicodeString &	oldText,
		const UnicodeString &	newText
	)

inline

Replace all occurrences of characters in oldText with the characters in newText.

Parameters

oldText	the text containing the search text
newText	the text containing the replacement text

Returns: a reference to this

Stable:: ICU 2.0

Definition at line 4838 of file unistr.h.

References length().

◆ findAndReplace() [2/3]

UnicodeString & icu::UnicodeString::findAndReplace	(	int32_t	start,
		int32_t	length,
		const UnicodeString &	oldText,
		const UnicodeString &	newText
	)

inline

Replace all occurrences of characters in oldText with characters in newText in the range [start, start + length).

Parameters

start	the start of the range in which replace will performed
length	the length of the range in which replace will be performed
oldText	the text containing the search text
newText	the text containing the replacement text

Returns: a reference to this

Stable:: ICU 2.0

Definition at line 4844 of file unistr.h.

References length().

◆ findAndReplace() [3/3]

UnicodeString& icu::UnicodeString::findAndReplace	(	int32_t	start,
		int32_t	length,
		const UnicodeString &	oldText,
		int32_t	oldStart,
		int32_t	oldLength,
		const UnicodeString &	newText,
		int32_t	newStart,
		int32_t	newLength
	)

Replace all occurrences of characters in oldText in the range [oldStart, oldStart + oldLength) with the characters in newText in the range [newStart, newStart + newLength) in the range [start, start + length).

Parameters

start	the start of the range in which replace will performed
length	the length of the range in which replace will be performed
oldText	the text containing the search text
oldStart	the start of the search range in `oldText`
oldLength	the length of the search range in `oldText`
newText	the text containing the replacement text
newStart	the start of the replacement range in `newText`
newLength	the length of the replacement range in `newText`

Returns: a reference to this

Stable:: ICU 2.0

◆ foldCase()

UnicodeString& icu::UnicodeString::foldCase ( uint32_t options = 0 )

Case-folds the characters in this string.

Case-folding is locale-independent and not context-sensitive, but there is an option for whether to include or exclude mappings for dotted I and dotless i that are marked with 'T' in CaseFolding.txt.

The result may be longer or shorter than the original.

Parameters

options Either U_FOLD_CASE_DEFAULT or U_FOLD_CASE_EXCLUDE_SPECIAL_I

Returns: A reference to this.

Stable:: ICU 2.0

◆ fromUTF32()

static UnicodeString icu::UnicodeString::fromUTF32	(	const UChar32 *	utf32,
		int32_t	length
	)

static

Create a UnicodeString from a UTF-32 string.

Illegal input is replaced with U+FFFD. Otherwise, errors result in a bogus string. Calls u_strFromUTF32WithSub().

Parameters

utf32	UTF-32 input string. Must not be nullptr.
length	Length of the input string, or -1 if NUL-terminated.

Returns: A UnicodeString with equivalent UTF-16 contents.

See also: toUTF32

Stable:: ICU 4.2

◆ fromUTF8()

static UnicodeString icu::UnicodeString::fromUTF8 ( StringPiece utf8 )

static

Create a UnicodeString from a UTF-8 string.

Illegal input is replaced with U+FFFD. Otherwise, errors result in a bogus string. Calls u_strFromUTF8WithSub().

Parameters

utf8	UTF-8 input string. Note that a StringPiece can be implicitly constructed from a std::string or a NUL-terminated const char * string.

Returns: A UnicodeString with equivalent UTF-16 contents.

See also: toUTF8; toUTF8String

Stable:: ICU 4.2

◆ getBuffer() [1/2]

const char16_t * icu::UnicodeString::getBuffer ( ) const

inline

Get a read-only pointer to the internal buffer.

This can be called at any time on a valid UnicodeString.

It returns 0 if the string is bogus, or during an "open" getBuffer(minCapacity).

It can be called as many times as desired. The pointer that it returns will remain valid until the UnicodeString object is modified, at which time the pointer is semantically invalidated and must not be used any more.

The capacity of the buffer can be determined with getCapacity(). The part after length() may or may not be initialized and valid, depending on the history of the UnicodeString object.

The buffer contents is (probably) not NUL-terminated. You can check if it is with (s.length() < s.getCapacity() && buffer[s.length()]==0). (See getTerminatedBuffer().)

The buffer may reside in read-only memory. Its contents must not be modified.

Returns: a read-only pointer to the internal string buffer, or nullptr if the string is empty or bogus

See also: getBuffer(int32_t minCapacity); getTerminatedBuffer()

Stable:: ICU 2.0

Definition at line 4304 of file unistr.h.

◆ getBuffer() [2/2]

char16_t* icu::UnicodeString::getBuffer ( int32_t minCapacity )

Get a read/write pointer to the internal buffer.

The buffer is guaranteed to be large enough for at least minCapacity char16_ts, writable, and is still owned by the UnicodeString object. Calls to getBuffer(minCapacity) must not be nested, and must be matched with calls to releaseBuffer(newLength). If the string buffer was read-only or shared, then it will be reallocated and copied.

An attempted nested call will return 0, and will not further modify the state of the UnicodeString object. It also returns 0 if the string is bogus.

The actual capacity of the string buffer may be larger than minCapacity. getCapacity() returns the actual capacity. For many operations, the full capacity should be used to avoid reallocations.

While the buffer is "open" between getBuffer(minCapacity) and releaseBuffer(newLength), the following applies:

The string length is set to 0.
Any read API call on the UnicodeString object will behave like on a 0-length string.
Any write API call on the UnicodeString object is disallowed and will have no effect.
You can read from and write to the returned buffer.
The previous string contents will still be in the buffer; if you want to use it, then you need to call length() before getBuffer(minCapacity). If the length() was greater than minCapacity, then any contents after minCapacity may be lost. The buffer contents is not NUL-terminated by getBuffer(). If length() < getCapacity() then you can terminate it by writing a NUL at index length().
You must call releaseBuffer(newLength) before and in order to return to normal UnicodeString operation.

Parameters

minCapacity the minimum number of char16_ts that are to be available in the buffer, starting at the returned pointer; default to the current string capacity if minCapacity==-1

Returns: a writable pointer to the internal string buffer, or nullptr if an error occurs (nested calls, out of memory)

See also: releaseBuffer; getTerminatedBuffer()

Stable:: ICU 2.0

Referenced by icu::Normalizer::compare(), icu::UnicodeSet::span(), and icu::UnicodeSet::spanBack().

◆ getCapacity()

int32_t icu::UnicodeString::getCapacity ( ) const

inline

Return the capacity of the internal buffer of the UnicodeString object.

This is useful together with the getBuffer functions. See there for details.

Returns: the number of char16_ts available in the internal buffer

See also: getBuffer

Stable:: ICU 2.0

Definition at line 4278 of file unistr.h.

◆ getChar32At()

virtual UChar32 icu::UnicodeString::getChar32At ( int32_t offset ) const

overrideprotectedvirtual

The change in Replaceable to use virtual getChar32At() allows UnicodeString::char32At() to be inline again (see jitterbug 709).

Stable:: ICU 2.4

Implements icu::Replaceable.

◆ getChar32Limit()

int32_t icu::UnicodeString::getChar32Limit ( int32_t offset ) const

Adjust a random-access offset so that it points behind a Unicode character.

The offset that is passed in points behind any code unit of a code point, while the returned offset will point behind the last code unit of the same code point. In UTF-16, if the input offset points behind the first surrogate (i.e., to the second surrogate) of a surrogate pair, then the returned offset will point behind the second surrogate (i.e., to the first surrogate).

Parameters

offset a valid offset after any code unit of a code point of the text

Returns: offset of the first code unit after the same code point

See also: U16_SET_CP_LIMIT

Stable:: ICU 2.0

◆ getChar32Start()

int32_t icu::UnicodeString::getChar32Start ( int32_t offset ) const

Adjust a random-access offset so that it points to the beginning of a Unicode character.

The offset that is passed in points to any code unit of a code point, while the returned offset will point to the first code unit of the same code point. In UTF-16, if the input offset points to a second surrogate of a surrogate pair, then the returned offset will point to the first surrogate.

Parameters

offset a valid offset into one code point of the text

Returns: offset of the first code unit of the same code point

See also: U16_SET_CP_START

Stable:: ICU 2.0

◆ getCharAt()

virtual char16_t icu::UnicodeString::getCharAt ( int32_t offset ) const

overrideprotectedvirtual

The change in Replaceable to use virtual getCharAt() allows UnicodeString::charAt() to be inline again (see jitterbug 709).

Stable:: ICU 2.4

Implements icu::Replaceable.

◆ getDynamicClassID()

virtual UClassID icu::UnicodeString::getDynamicClassID ( ) const

overridevirtual

ICU "poor man's RTTI", returns a UClassID for the actual class.

Stable:: ICU 2.2

Reimplemented from icu::UObject.

◆ getLength()

virtual int32_t icu::UnicodeString::getLength ( ) const

overrideprotectedvirtual

Implement Replaceable::getLength() (see jitterbug 1027).

Stable:: ICU 2.4

Implements icu::Replaceable.

◆ getStaticClassID()

static UClassID icu::UnicodeString::getStaticClassID ( )

static

ICU "poor man's RTTI", returns a UClassID for this class.

Stable:: ICU 2.2

◆ getTerminatedBuffer()

const char16_t* icu::UnicodeString::getTerminatedBuffer ( )

Get a read-only pointer to the internal buffer, making sure that it is NUL-terminated.

This can be called at any time on a valid UnicodeString.

It returns 0 if the string is bogus, or during an "open" getBuffer(minCapacity), or if the buffer cannot be NUL-terminated (because memory allocation failed).

It can be called as many times as desired. The pointer that it returns will remain valid until the UnicodeString object is modified, at which time the pointer is semantically invalidated and must not be used any more.

The capacity of the buffer can be determined with getCapacity(). The part after length()+1 may or may not be initialized and valid, depending on the history of the UnicodeString object.

The buffer contents is guaranteed to be NUL-terminated. getTerminatedBuffer() may reallocate the buffer if a terminating NUL is written. For this reason, this function is not const, unlike getBuffer(). Note that a UnicodeString may also contain NUL characters as part of its contents.

The buffer may reside in read-only memory. Its contents must not be modified.

Returns: a read-only pointer to the internal string buffer, or 0 if the string is empty or bogus

See also: getBuffer(int32_t minCapacity); getBuffer()

Stable:: ICU 2.2

◆ handleReplaceBetween()

virtual void icu::UnicodeString::handleReplaceBetween	(	int32_t	start,
		int32_t	limit,
		const UnicodeString &	text
	)

overridevirtual

Replace a substring of this object with the given text.

Parameters

start	the beginning index, inclusive; `0 <= start <= limit`.
limit	the ending index, exclusive; `start <= limit <= length()`.
text	the text to replace characters `start` to `limit - 1`

Stable:: ICU 2.0

Implements icu::Replaceable.

◆ hashCode()

int32_t icu::UnicodeString::hashCode ( ) const

inline

Generate a hash code for this object.

Returns: The hash code of this UnicodeString.

Stable:: ICU 2.0

Definition at line 4284 of file unistr.h.

◆ hasMetaData()

virtual UBool icu::UnicodeString::hasMetaData ( ) const

overridevirtual

Replaceable API.

Returns: true if it has MetaData

Stable:: ICU 2.4

Reimplemented from icu::Replaceable.

◆ hasMoreChar32Than()

UBool icu::UnicodeString::hasMoreChar32Than	(	int32_t	start,
		int32_t	length,
		int32_t	number
	)		const

Check if the length char16_t code units of the string contain more Unicode code points than a certain number.

This is more efficient than counting all code points in this part of the string and comparing that number with a threshold. This function may not need to scan the string at all if the length falls within a certain range, and never needs to count more than 'number+1' code points. Logically equivalent to (countChar32(start, length)>number). A Unicode code point may occupy either one or two char16_t code units.

Parameters

start	the index of the first code unit to check (0 for the entire string)
length	the number of char16_t code units to check (use INT32_MAX for the entire string; remember that start/length values are pinned)
number	The number of code points in the (sub)string is compared against the 'number' parameter.

Returns: Boolean value for whether the string contains more Unicode code points than 'number'. Same as (u_countChar32(s, length)>number).

See also: countChar32; u_strHasMoreChar32Than

Stable:: ICU 2.4

◆ indexOf() [1/13]

int32_t icu::UnicodeString::indexOf ( char16_t c ) const

inline

Locate in this the first occurrence of the BMP code point c, using bitwise comparison.

Parameters

c	The code unit to search for.

Returns: The offset into this of c, or -1 if not found.

Stable:: ICU 2.0

Definition at line 4620 of file unistr.h.

◆ indexOf() [2/13]

int32_t icu::UnicodeString::indexOf	(	char16_t	c,
		int32_t	start
	)		const

inline

Locate in this the first occurrence of the BMP code point c, starting at offset start, using bitwise comparison.

Parameters

c	The code unit to search for.
start	The offset at which searching will start.

Returns: The offset into this of c, or -1 if not found.

Stable:: ICU 2.0

Definition at line 4628 of file unistr.h.

◆ indexOf() [3/13]

int32_t icu::UnicodeString::indexOf	(	char16_t	c,
		int32_t	start,
		int32_t	length
	)		const

inline

Locate in this the first occurrence of the BMP code point c in the range [start, start + length), using bitwise comparison.

Parameters

c	The code unit to search for.
start	the offset into this at which to start matching
length	the number of characters in this to search

Returns: The offset into this of c, or -1 if not found.

Stable:: ICU 2.0

Definition at line 4608 of file unistr.h.

◆ indexOf() [4/13]

int32_t icu::UnicodeString::indexOf	(	const char16_t *	srcChars,
		int32_t	srcLength,
		int32_t	start
	)		const

inline

Locate in this the first occurrence of the characters in srcChars starting at offset start, using bitwise comparison.

Parameters

srcChars	The text to search for.
srcLength	the number of characters in `srcChars` to match
start	the offset into this at which to start matching

Returns: The offset into this of the start of text, or -1 if not found.

Stable:: ICU 2.0

Definition at line 4593 of file unistr.h.

◆ indexOf() [5/13]

int32_t icu::UnicodeString::indexOf	(	const char16_t *	srcChars,
		int32_t	srcStart,
		int32_t	srcLength,
		int32_t	start,
		int32_t	length
	)		const

Locate in this the first occurrence in the range [start, start + length) of the characters in srcChars in the range [srcStart, srcStart + srcLength), using bitwise comparison.

Parameters

srcChars	The text to search for.
srcStart	the offset into `srcChars` at which to start matching
srcLength	the number of characters in `srcChars` to match
start	the offset into this at which to start matching
length	the number of characters in this to search

Returns: The offset into this of the start of text, or -1 if not found.

Stable:: ICU 2.0

◆ indexOf() [6/13]

int32_t icu::UnicodeString::indexOf	(	const UnicodeString &	srcText,
		int32_t	srcStart,
		int32_t	srcLength,
		int32_t	start,
		int32_t	length
	)		const

inline

Locate in this the first occurrence in the range [start, start + length) of the characters in srcText in the range [srcStart, srcStart + srcLength), using bitwise comparison.

Parameters

srcText	The text to search for.
srcStart	the offset into `srcText` at which to start matching
srcLength	the number of characters in `srcText` to match
start	the offset into this at which to start matching
length	the number of characters in this to search

Returns: The offset into this of the start of text, or -1 if not found.

Stable:: ICU 2.0

Definition at line 4560 of file unistr.h.

References isBogus().

◆ indexOf() [7/13]

int32_t icu::UnicodeString::indexOf ( const UnicodeString & text ) const

inline

Locate in this the first occurrence of the characters in text, using bitwise comparison.

Parameters

text	The text to search for.

Returns: The offset into this of the start of text, or -1 if not found.

Stable:: ICU 2.0

Definition at line 4576 of file unistr.h.

References length().

◆ indexOf() [8/13]

int32_t icu::UnicodeString::indexOf	(	const UnicodeString &	text,
		int32_t	start
	)		const

inline

Locate in this the first occurrence of the characters in text starting at offset start, using bitwise comparison.

Parameters

text	The text to search for.
start	The offset at which searching will start.

Returns: The offset into this of the start of text, or -1 if not found.

Stable:: ICU 2.0

Definition at line 4580 of file unistr.h.

References length().

◆ indexOf() [9/13]

int32_t icu::UnicodeString::indexOf	(	const UnicodeString &	text,
		int32_t	start,
		int32_t	length
	)		const

inline

Locate in this the first occurrence in the range [start, start + length) of the characters in text, using bitwise comparison.

Parameters

text	The text to search for.
start	The offset at which searching will start.
length	The number of characters to search

Returns: The offset into this of the start of text, or -1 if not found.

Stable:: ICU 2.0

Definition at line 4587 of file unistr.h.

References length().

◆ indexOf() [10/13]

int32_t icu::UnicodeString::indexOf	(	ConstChar16Ptr	srcChars,
		int32_t	srcLength,
		int32_t	start,
		int32_t	length
	)		const

inline

Locate in this the first occurrence in the range [start, start + length) of the characters in srcChars, using bitwise comparison.

Parameters

srcChars	The text to search for.
srcLength	the number of characters in `srcChars`
start	The offset at which searching will start.
length	The number of characters to search

Returns: The offset into this of the start of srcChars, or -1 if not found.

Stable:: ICU 2.0

Definition at line 4601 of file unistr.h.

◆ indexOf() [11/13]

int32_t icu::UnicodeString::indexOf ( UChar32 c ) const

inline

Locate in this the first occurrence of the code point c, using bitwise comparison.

Parameters

c	The code point to search for.

Returns: The offset into this of c, or -1 if not found.

Stable:: ICU 2.0

Definition at line 4624 of file unistr.h.

◆ indexOf() [12/13]

int32_t icu::UnicodeString::indexOf	(	UChar32	c,
		int32_t	start
	)		const

inline

Locate in this the first occurrence of the code point c starting at offset start, using bitwise comparison.

Parameters

c	The code point to search for.
start	The offset at which searching will start.

Returns: The offset into this of c, or -1 if not found.

Stable:: ICU 2.0

Definition at line 4635 of file unistr.h.

◆ indexOf() [13/13]

int32_t icu::UnicodeString::indexOf	(	UChar32	c,
		int32_t	start,
		int32_t	length
	)		const

inline

Locate in this the first occurrence of the code point c in the range [start, start + length), using bitwise comparison.

Parameters

c	The code point to search for.
start	the offset into this at which to start matching
length	the number of characters in this to search

Returns: The offset into this of c, or -1 if not found.

Stable:: ICU 2.0

Definition at line 4614 of file unistr.h.

◆ insert() [1/6]

UnicodeString & icu::UnicodeString::insert	(	int32_t	start,
		char16_t	srcChar
	)

inline

Insert the code unit srcChar into the UnicodeString object at offset start.

Parameters

start	the offset at which the insertion occurs
srcChar	the code unit to insert

Returns: a reference to this

Stable:: ICU 2.0

Definition at line 5082 of file unistr.h.

◆ insert() [2/6]

UnicodeString & icu::UnicodeString::insert	(	int32_t	start,
		const char16_t *	srcChars,
		int32_t	srcStart,
		int32_t	srcLength
	)

inline

Insert the characters in srcChars in the range [srcStart, srcStart + srcLength) into the UnicodeString object at offset start.

srcChars is not modified.

Parameters

start	the offset at which the insertion begins
srcChars	the source for the new characters
srcStart	the offset into `srcChars` where new characters will be obtained
srcLength	the number of characters in `srcChars` in the insert string

Returns: a reference to this

Stable:: ICU 2.0

Definition at line 5069 of file unistr.h.

◆ insert() [3/6]

UnicodeString & icu::UnicodeString::insert	(	int32_t	start,
		const UnicodeString &	srcText
	)

inline

Insert the characters in srcText into the UnicodeString object at offset start.

srcText is not modified.

Parameters

start	the offset where the insertion begins
srcText	the source for the new characters

Returns: a reference to this

Stable:: ICU 2.0

Definition at line 5064 of file unistr.h.

References length().

◆ insert() [4/6]

UnicodeString & icu::UnicodeString::insert	(	int32_t	start,
		const UnicodeString &	srcText,
		int32_t	srcStart,
		int32_t	srcLength
	)

inline

Insert the characters in srcText in the range [srcStart, srcStart + srcLength) into the UnicodeString object at offset start.

srcText is not modified.

Parameters

start	the offset where the insertion begins
srcText	the source for the new characters
srcStart	the offset into `srcText` where new characters will be obtained
srcLength	the number of characters in `srcText` in the insert string

Returns: a reference to this

Stable:: ICU 2.0

Definition at line 5057 of file unistr.h.

◆ insert() [5/6]

UnicodeString & icu::UnicodeString::insert	(	int32_t	start,
		ConstChar16Ptr	srcChars,
		int32_t	srcLength
	)

inline

Insert the characters in srcChars into the UnicodeString object at offset start.

srcChars is not modified.

Parameters

start	the offset where the insertion begins
srcChars	the source for the new characters
srcLength	the number of Unicode characters in srcChars.

Returns: a reference to this

Stable:: ICU 2.0

Definition at line 5076 of file unistr.h.

◆ insert() [6/6]

UnicodeString & icu::UnicodeString::insert	(	int32_t	start,
		UChar32	srcChar
	)

inline

Insert the code point srcChar into the UnicodeString object at offset start.

Parameters

start	the offset at which the insertion occurs
srcChar	the code point to insert

Returns: a reference to this

Stable:: ICU 2.0

Definition at line 5087 of file unistr.h.

◆ isBogus()

UBool icu::UnicodeString::isBogus ( ) const

inline

Determine if this object contains a valid string.

A bogus string has no value. It is different from an empty string, although in both cases isEmpty() returns true and length() returns 0. setToBogus() and isBogus() can be used to indicate that no string value is available. For a bogus string, getBuffer() and getTerminatedBuffer() return nullptr, and length() returns 0.

Returns: true if the string is bogus/invalid, false otherwise

See also: setToBogus()

Stable:: ICU 2.0

Definition at line 4288 of file unistr.h.

Referenced by indexOf(), lastIndexOf(), and operator==().

◆ isEmpty()

UBool icu::UnicodeString::isEmpty ( ) const

inline

Determine if this string is empty.

Returns: true if this string contains 0 characters, false otherwise.

Stable:: ICU 2.0

Definition at line 4922 of file unistr.h.

◆ lastIndexOf() [1/13]

int32_t icu::UnicodeString::lastIndexOf ( char16_t c ) const

inline

Locate in this the last occurrence of the BMP code point c, using bitwise comparison.

Parameters

c	The code unit to search for.

Returns: The offset into this of c, or -1 if not found.

Stable:: ICU 2.0

Definition at line 4703 of file unistr.h.

◆ lastIndexOf() [2/13]

int32_t icu::UnicodeString::lastIndexOf	(	char16_t	c,
		int32_t	start
	)		const

inline

Locate in this the last occurrence of the BMP code point c starting at offset start, using bitwise comparison.

Parameters

c	The code unit to search for.
start	The offset at which searching will start.

Returns: The offset into this of c, or -1 if not found.

Stable:: ICU 2.0

Definition at line 4712 of file unistr.h.

◆ lastIndexOf() [3/13]

int32_t icu::UnicodeString::lastIndexOf	(	char16_t	c,
		int32_t	start,
		int32_t	length
	)		const

inline

Locate in this the last occurrence of the BMP code point c in the range [start, start + length), using bitwise comparison.

Parameters

c	The code unit to search for.
start	the offset into this at which to start matching
length	the number of characters in this to search

Returns: The offset into this of c, or -1 if not found.

Stable:: ICU 2.0

Definition at line 4690 of file unistr.h.

◆ lastIndexOf() [4/13]

int32_t icu::UnicodeString::lastIndexOf	(	const char16_t *	srcChars,
		int32_t	srcLength,
		int32_t	start
	)		const

inline

Locate in this the last occurrence of the characters in srcChars starting at offset start, using bitwise comparison.

Parameters

srcChars	The text to search for.
srcLength	the number of characters in `srcChars` to match
start	the offset into this at which to start matching

Returns: The offset into this of the start of text, or -1 if not found.

Stable:: ICU 2.0

Definition at line 4649 of file unistr.h.

◆ lastIndexOf() [5/13]

int32_t icu::UnicodeString::lastIndexOf	(	const char16_t *	srcChars,
		int32_t	srcStart,
		int32_t	srcLength,
		int32_t	start,
		int32_t	length
	)		const

Locate in this the last occurrence in the range [start, start + length) of the characters in srcChars in the range [srcStart, srcStart + srcLength), using bitwise comparison.

Parameters

srcChars	The text to search for.
srcStart	the offset into `srcChars` at which to start matching
srcLength	the number of characters in `srcChars` to match
start	the offset into this at which to start matching
length	the number of characters in this to search

Returns: The offset into this of the start of text, or -1 if not found.

Stable:: ICU 2.0

◆ lastIndexOf() [6/13]

int32_t icu::UnicodeString::lastIndexOf	(	const UnicodeString &	srcText,
		int32_t	srcStart,
		int32_t	srcLength,
		int32_t	start,
		int32_t	length
	)		const

inline

Locate in this the last occurrence in the range [start, start + length) of the characters in srcText in the range [srcStart, srcStart + srcLength), using bitwise comparison.

Parameters

srcText	The text to search for.
srcStart	the offset into `srcText` at which to start matching
srcLength	the number of characters in `srcText` to match
start	the offset into this at which to start matching
length	the number of characters in this to search

Returns: The offset into this of the start of text, or -1 if not found.

Stable:: ICU 2.0

Definition at line 4657 of file unistr.h.

References isBogus().

◆ lastIndexOf() [7/13]

int32_t icu::UnicodeString::lastIndexOf ( const UnicodeString & text ) const

inline

Locate in this the last occurrence of the characters in text, using bitwise comparison.

Parameters

text	The text to search for.

Returns: The offset into this of the start of text, or -1 if not found.

Stable:: ICU 2.0

Definition at line 4686 of file unistr.h.

References length().

◆ lastIndexOf() [8/13]

int32_t icu::UnicodeString::lastIndexOf	(	const UnicodeString &	text,
		int32_t	start
	)		const

inline

Locate in this the last occurrence of the characters in text starting at offset start, using bitwise comparison.

Parameters

text	The text to search for.
start	The offset at which searching will start.

Returns: The offset into this of the start of text, or -1 if not found.

Stable:: ICU 2.0

Definition at line 4679 of file unistr.h.

References length().

◆ lastIndexOf() [9/13]

int32_t icu::UnicodeString::lastIndexOf	(	const UnicodeString &	text,
		int32_t	start,
		int32_t	length
	)		const

inline

Locate in this the last occurrence in the range [start, start + length) of the characters in text, using bitwise comparison.

Parameters

text	The text to search for.
start	The offset at which searching will start.
length	The number of characters to search

Returns: The offset into this of the start of text, or -1 if not found.

Stable:: ICU 2.0

Definition at line 4673 of file unistr.h.

References length().

◆ lastIndexOf() [10/13]

int32_t icu::UnicodeString::lastIndexOf	(	ConstChar16Ptr	srcChars,
		int32_t	srcLength,
		int32_t	start,
		int32_t	length
	)		const

inline

Locate in this the last occurrence in the range [start, start + length) of the characters in srcChars, using bitwise comparison.

Parameters

srcChars	The text to search for.
srcLength	the number of characters in `srcChars`
start	The offset at which searching will start.
length	The number of characters to search

Returns: The offset into this of the start of srcChars, or -1 if not found.

Stable:: ICU 2.0

Definition at line 4642 of file unistr.h.

◆ lastIndexOf() [11/13]

int32_t icu::UnicodeString::lastIndexOf ( UChar32 c ) const

inline

Locate in this the last occurrence of the code point c, using bitwise comparison.

Parameters

c	The code point to search for.

Returns: The offset into this of c, or -1 if not found.

Stable:: ICU 2.0

Definition at line 4707 of file unistr.h.

◆ lastIndexOf() [12/13]

int32_t icu::UnicodeString::lastIndexOf	(	UChar32	c,
		int32_t	start
	)		const

inline

Locate in this the last occurrence of the code point c starting at offset start, using bitwise comparison.

Parameters

c	The code point to search for.
start	The offset at which searching will start.

Returns: The offset into this of c, or -1 if not found.

Stable:: ICU 2.0

Definition at line 4719 of file unistr.h.

◆ lastIndexOf() [13/13]

int32_t icu::UnicodeString::lastIndexOf	(	UChar32	c,
		int32_t	start,
		int32_t	length
	)		const

inline

Locate in this the last occurrence of the code point c in the range [start, start + length), using bitwise comparison.

Parameters

c	The code point to search for.
start	the offset into this at which to start matching
length	the number of characters in this to search

Returns: The offset into this of c, or -1 if not found.

Stable:: ICU 2.0

Definition at line 4696 of file unistr.h.

◆ length()

int32_t icu::UnicodeString::length ( ) const

inline

Return the length of the UnicodeString object.

The length is the number of char16_t code units are in the UnicodeString. If you want the number of code points, please use countChar32().

Returns: the length of the UnicodeString object

See also: countChar32

Stable:: ICU 2.0

Definition at line 4273 of file unistr.h.

Referenced by append(), caseCompare(), icu::Normalizer::compare(), compare(), compareCodePointOrder(), endsWith(), findAndReplace(), indexOf(), insert(), lastIndexOf(), operator+=(), operator<(), operator<=(), operator==(), operator>(), operator>=(), replace(), replaceBetween(), setTo(), icu::UnicodeSet::span(), icu::UnicodeSet::spanBack(), and startsWith().

◆ moveIndex32()

int32_t icu::UnicodeString::moveIndex32	(	int32_t	index,
		int32_t	delta
	)		const

Move the code unit index along the string by delta code points.

Interpret the input index as a code unit-based offset into the string, move the index forward or backward by delta code points, and return the resulting index. The input index should point to the first code unit of a code point, if there is more than one.

Both input and output indexes are code unit-based as for all string indexes/offsets in ICU (and other libraries, like MBCS char*). If delta<0 then the index is moved backward (toward the start of the string). If delta>0 then the index is moved forward (toward the end of the string).

This behaves like CharacterIterator::move32(delta, kCurrent).

Behavior for out-of-bounds indexes: moveIndex32 pins the input index to 0..length(), i.e., if the input index<0 then it is pinned to 0; if it is index>length() then it is pinned to length(). Afterwards, the index is moved by delta code points forward or backward, but no further backward than to 0 and no further forward than to length(). The resulting index return value will be in between 0 and length(), inclusively.

Examples:

// s has code points 'a' U+10000 'b' U+10ffff U+2029
UnicodeString s(u"a\U00010000b\U0010ffff\u2029");
 
// initial index: position of U+10000
int32_t index=1;
 
// the following examples will all result in index==4, position of U+10ffff
 
// skip 2 code points from some position in the string
index=s.moveIndex32(index, 2); // skips U+10000 and 'b'
 
// go to the 3rd code point from the start of s (0-based)
index=s.moveIndex32(0, 3); // skips 'a', U+10000, and 'b'
 
// go to the next-to-last code point of s
index=s.moveIndex32(s.length(), -2); // backward-skips U+2029 and U+10ffff

Parameters

index	input code unit index
delta	(signed) code point count to move the index forward or backward in the string

Returns: the resulting code unit index

Stable:: ICU 2.0

◆ operator std::u16string_view()

icu::UnicodeString::operator std::u16string_view ( ) const

inline

Converts to a std::u16string_view.

Returns: a string view of the contents of this string

Stable:: ICU 76

Definition at line 3101 of file unistr.h.

References icu::Replaceable::length().

◆ operator std::wstring_view()

icu::UnicodeString::operator std::wstring_view ( ) const

inline

Converts to a std::wstring_view.

Note: This should remain draft until C++ standard plans about char16_t vs. wchar_t become clearer.

Returns: a string view of the contents of this string

Stable:: ICU 76

Definition at line 3115 of file unistr.h.

References icu::Replaceable::length(), and U_ALIASING_BARRIER.

◆ operator!=() [1/2]

template<typename S , typename = std::enable_if_t<ConvertibleToU16StringView<S>>>

bool icu::UnicodeString::operator!= ( const S & text ) const

inline

Inequality operator.

Performs only bitwise comparison with text which is, or which is implicitly convertible to, a std::u16string_view or (if U_SIZEOF_WCHAR_T==2) std::wstring_view.

For performance, you can use std::u16string_view literals with compile-time length determination:

#include &lt;string_view&gt;
using namespace std::string_view_literals;
UnicodeString str = ...;
if (str != u"literal"sv) { ... }

Parameters

text	The string view to compare to this string.

Returns: false if text contains the same characters as this one, true otherwise.

Stable:: ICU 76

Definition at line 388 of file unistr.h.

References icu::operator==().

◆ operator!=() [2/2]

bool icu::UnicodeString::operator!= ( const UnicodeString & text ) const

inline

Inequality operator.

Performs only bitwise comparison.

Parameters

text	The UnicodeString to compare to this one.

Returns: false if text contains the same characters as this one, true otherwise.

Stable:: ICU 2.0

Definition at line 4359 of file unistr.h.

◆ operator+=() [1/4]

UnicodeString & icu::UnicodeString::operator+= ( char16_t ch )

inline

Append operator.

Append the code unit ch to the UnicodeString object.

Parameters

ch	the code unit to be appended

Returns: a reference to this

Stable:: ICU 2.0

Definition at line 5044 of file unistr.h.

◆ operator+=() [2/4]

template<typename S , typename = std::enable_if_t<ConvertibleToU16StringView<S>>>

UnicodeString& icu::UnicodeString::operator+= ( const S & src )

inline

Append operator.

Appends the characters in src which is, or which is implicitly convertible to, a std::u16string_view or (if U_SIZEOF_WCHAR_T==2) std::wstring_view, to the UnicodeString object.

Parameters

src	the source for the new characters

Returns: a reference to this

Stable:: ICU 76

Definition at line 2287 of file unistr.h.

◆ operator+=() [3/4]

UnicodeString & icu::UnicodeString::operator+= ( const UnicodeString & srcText )

inline

Append operator.

Append the characters in srcText to the UnicodeString object. srcText is not modified.

Parameters

srcText the source for the new characters

Returns: a reference to this

Stable:: ICU 2.0

Definition at line 5053 of file unistr.h.

References length().

◆ operator+=() [4/4]

UnicodeString & icu::UnicodeString::operator+= ( UChar32 ch )

inline

Append operator.

Append the code point ch to the UnicodeString object.

Parameters

ch	the code point to be appended

Returns: a reference to this

Stable:: ICU 2.0

Definition at line 5048 of file unistr.h.

◆ operator<()

UBool icu::UnicodeString::operator< ( const UnicodeString & text ) const

inline

Less than operator.

Performs only bitwise comparison.

Parameters

text	The UnicodeString to compare to this one.

Returns: true if the characters in this are bitwise less than the characters in text, false otherwise

Stable:: ICU 2.0

Definition at line 4366 of file unistr.h.

References length().

◆ operator<=()

UBool icu::UnicodeString::operator<= ( const UnicodeString & text ) const

inline

Less than or equal operator.

Performs only bitwise comparison.

Parameters

text	The UnicodeString to compare to this one.

Returns: true if the characters in this are bitwise less than or equal to the characters in text, false otherwise

Stable:: ICU 2.0

Definition at line 4374 of file unistr.h.

References length().

◆ operator=() [1/5]

UnicodeString & icu::UnicodeString::operator= ( char16_t ch )

inline

Assignment operator.

Replace the characters in this UnicodeString with the code unit ch.

Parameters

ch	the code unit to replace

Returns: a reference to this

Stable:: ICU 2.0

Definition at line 4965 of file unistr.h.

◆ operator=() [2/5]

template<typename S , typename = std::enable_if_t<ConvertibleToU16StringView<S>>>

UnicodeString& icu::UnicodeString::operator= ( const S & src )

inline

Assignment operator.

Replaces the characters in this UnicodeString with a copy of the characters from the src which is, or which is implicitly convertible to, a std::u16string_view or (if U_SIZEOF_WCHAR_T==2) std::wstring_view.

Parameters

src	The string view containing the characters to copy.

Returns: a reference to this

Stable:: ICU 76

Definition at line 2022 of file unistr.h.

References icu::Replaceable::length().

◆ operator=() [3/5]

UnicodeString& icu::UnicodeString::operator= ( const UnicodeString & srcText )

Assignment operator.

Replace the characters in this UnicodeString with the characters from srcText.

Starting with ICU 2.4, the assignment operator and the copy constructor allocate a new buffer and copy the buffer contents even for readonly aliases. By contrast, the fastCopyFrom() function implements the old, more efficient but less safe behavior of making this string also a readonly alias to the same buffer.

If the source object has an "open" buffer from getBuffer(minCapacity), then the copy is an empty string.

Parameters

srcText The text containing the characters to replace

Returns: a reference to this

Stable:: ICU 2.0

See also: fastCopyFrom

◆ operator=() [4/5]

UnicodeString & icu::UnicodeString::operator= ( UChar32 ch )

inline

Assignment operator.

Replace the characters in this UnicodeString with the code point ch.

Parameters

ch	the code point to replace

Returns: a reference to this

Stable:: ICU 2.0

Definition at line 4969 of file unistr.h.

◆ operator=() [5/5]

UnicodeString& icu::UnicodeString::operator= ( UnicodeString && src )

noexcept

Move assignment operator; might leave src in bogus state.

This string will have the same contents and state that the source string had. The behavior is undefined if *this and src are the same object.

Parameters

src	source string

Returns: *this

Stable:: ICU 56

◆ operator==() [1/2]

template<typename S , typename = std::enable_if_t<ConvertibleToU16StringView<S>>>

bool icu::UnicodeString::operator== ( const S & text ) const

inline

Equality operator.

Performs only bitwise comparison with text which is, or which is implicitly convertible to, a std::u16string_view or (if U_SIZEOF_WCHAR_T==2) std::wstring_view.

For performance, you can use UTF-16 string literals with compile-time length determination:

UnicodeString str = ...;

if (str == u"literal") { ... }

Parameters

text	The string view to compare to this string.

Returns: true if text contains the same characters as this one, false otherwise.

Stable:: ICU 76

Definition at line 355 of file unistr.h.

References icu::Replaceable::length().

◆ operator==() [2/2]

bool icu::UnicodeString::operator== ( const UnicodeString & text ) const

inline

Equality operator.

Performs only bitwise comparison.

Parameters

text	The UnicodeString to compare to this one.

Returns: true if text contains the same characters as this one, false otherwise.

Stable:: ICU 2.0

Definition at line 4348 of file unistr.h.

References isBogus(), and length().

◆ operator>()

UBool icu::UnicodeString::operator> ( const UnicodeString & text ) const

inline

Greater than operator.

Performs only bitwise comparison.

Parameters

text	The UnicodeString to compare to this one.

Returns: true if the characters in this are bitwise greater than the characters in text, false otherwise

Stable:: ICU 2.0

Definition at line 4363 of file unistr.h.

References length().

◆ operator>=()

UBool icu::UnicodeString::operator>= ( const UnicodeString & text ) const

inline

Greater than or equal operator.

Performs only bitwise comparison.

Parameters

text	The UnicodeString to compare to this one.

Returns: true if the characters in this are bitwise greater than or equal to the characters in text, false otherwise

Stable:: ICU 2.0

Definition at line 4371 of file unistr.h.

References length().

◆ operator[]()

char16_t icu::UnicodeString::operator[] ( int32_t offset ) const

inline

Return the code unit at offset offset.

If the offset is not valid (0..length()-1) then U+ffff is returned.

Parameters

offset a valid offset into the text

Returns: the code unit at offset offset

Stable:: ICU 2.0

Definition at line 4918 of file unistr.h.

◆ padLeading()

UBool icu::UnicodeString::padLeading	(	int32_t	targetLength,
		char16_t	padChar = `0x0020`
	)

Pad the start of this UnicodeString with the character padChar.

If the length of this UnicodeString is less than targetLength, length() - targetLength copies of padChar will be added to the beginning of this UnicodeString.

Parameters

targetLength	the desired length of the string
padChar	the character to use for padding. Defaults to space (U+0020)

Returns: true if the text was padded, false otherwise.

Stable:: ICU 2.0

◆ padTrailing()

UBool icu::UnicodeString::padTrailing	(	int32_t	targetLength,
		char16_t	padChar = `0x0020`
	)

Pad the end of this UnicodeString with the character padChar.

If the length of this UnicodeString is less than targetLength, length() - targetLength copies of padChar will be added to the end of this UnicodeString.

Parameters

targetLength	the desired length of the string
padChar	the character to use for padding. Defaults to space (U+0020)

Returns: true if the text was padded, false otherwise.

Stable:: ICU 2.0

◆ push_back()

void icu::UnicodeString::push_back ( char16_t c )

inline

Appends the code unit c to the UnicodeString object.

Same as append(c) except does not return *this.

Parameters

c	the code unit to append

Draft:: This API may be changed in the future versions and was introduced in ICU 78

Definition at line 2386 of file unistr.h.

◆ rbegin()

unspecified_reverse_iterator icu::UnicodeString::rbegin ( ) const

inline

Returns: a reverse iterator to the last code unit in this string. The iterator may be a pointer or a contiguous-iterator object.

Draft:: This API may be changed in the future versions and was introduced in ICU 78

Definition at line 1949 of file unistr.h.

◆ readOnlyAlias() [1/2]

template<typename S , typename = std::enable_if_t<ConvertibleToU16StringView<S>>>

static UnicodeString icu::UnicodeString::readOnlyAlias ( const S & text )

inlinestatic

Readonly-aliasing factory method.

Aliases the same buffer as the input text which is, or which is implicitly convertible to, a std::u16string_view or (if U_SIZEOF_WCHAR_T==2) std::wstring_view. The string is bogus if the string view is too long.

The text will be used for the UnicodeString object, but it will not be released when the UnicodeString is destroyed. This has copy-on-write semantics: When the string is modified, then the buffer is first copied into newly allocated memory. The aliased buffer is never modified.

In an assignment to another UnicodeString, when using the copy constructor or the assignment operator, the text will be copied. When using fastCopyFrom(), the text will be aliased again, so that both strings then alias the same readonly-text.

Parameters

text	The string view to alias for the UnicodeString.

Stable:: ICU 76

Definition at line 3662 of file unistr.h.

◆ readOnlyAlias() [2/2]

static UnicodeString icu::UnicodeString::readOnlyAlias ( const UnicodeString & text )

inlinestatic

Readonly-aliasing factory method.

Aliases the same buffer as the input text.

The text will be used for the UnicodeString object, but it will not be released when the UnicodeString is destroyed. This has copy-on-write semantics: When the string is modified, then the buffer is first copied into newly allocated memory. The aliased buffer is never modified.

In an assignment to another UnicodeString, when using the copy constructor or the assignment operator, the text will be copied. When using fastCopyFrom(), the text will be aliased again, so that both strings then alias the same readonly-text.

Parameters

text	The UnicodeString to alias.

Stable:: ICU 76

Definition at line 3685 of file unistr.h.

◆ releaseBuffer()

void icu::UnicodeString::releaseBuffer ( int32_t newLength = -1 )

Release a read/write buffer on a UnicodeString object with an "open" getBuffer(minCapacity).

This function must be called in a matched pair with getBuffer(minCapacity). releaseBuffer(newLength) must be called if and only if a getBuffer(minCapacity) is "open".

It will set the string length to newLength, at most to the current capacity. If newLength==-1 then it will set the length according to the first NUL in the buffer, or to the capacity if there is no NUL.

After calling releaseBuffer(newLength) the UnicodeString is back to normal operation.

Parameters

newLength the new length of the UnicodeString object; defaults to the current capacity if newLength is greater than that; if newLength==-1, it defaults to u_strlen(buffer) but not more than the current capacity of the string

See also: getBuffer(int32_t minCapacity)

Stable:: ICU 2.0

◆ remove() [1/2]

UnicodeString & icu::UnicodeString::remove ( )

inline

Removes all characters from the UnicodeString object and clears the bogus flag.

This is the UnicodeString equivalent of std::string’s clear().

Returns: a reference to this

See also: setToBogus

Stable:: ICU 2.0

Definition at line 5093 of file unistr.h.

◆ remove() [2/2]

UnicodeString & icu::UnicodeString::remove	(	int32_t	start,
		int32_t	length = `static_cast<int32_t>(INT32_MAX)`
	)

inline

Remove the characters in the range [start, start + length) from the UnicodeString object.

Parameters

start	the offset of the first character to remove
length	the number of characters to remove

Returns: a reference to this

Stable:: ICU 2.0

Definition at line 5105 of file unistr.h.

References INT32_MAX.

◆ removeBetween()

UnicodeString & icu::UnicodeString::removeBetween	(	int32_t	start,
		int32_t	limit = `static_cast<int32_t>(INT32_MAX)`
	)

inline

Remove the characters in the range [start, limit) from the UnicodeString object.

Parameters

start	the offset of the first character to remove
limit	the offset immediately following the range to remove

Returns: a reference to this

Stable:: ICU 2.0

Definition at line 5116 of file unistr.h.

◆ rend()

unspecified_reverse_iterator icu::UnicodeString::rend ( ) const

inline

Returns: a reverse iterator to just before the first code unit in this string. The iterator may be a pointer or a contiguous-iterator object.

Draft:: This API may be changed in the future versions and was introduced in ICU 78

Definition at line 1955 of file unistr.h.

◆ replace() [1/6]

UnicodeString & icu::UnicodeString::replace	(	int32_t	start,
		int32_t	length,
		char16_t	srcChar
	)

inline

Replace the characters in the range [start, start + length) with the code unit srcChar.

Parameters

start	the offset at which the replace operation begins
length	the number of characters to replace. The character at `start + length` is not modified.
srcChar	the new code unit

Returns: a reference to this

Stable:: ICU 2.0

Definition at line 4818 of file unistr.h.

◆ replace() [2/6]

UnicodeString & icu::UnicodeString::replace	(	int32_t	start,
		int32_t	length,
		const char16_t *	srcChars,
		int32_t	srcStart,
		int32_t	srcLength
	)

inline

Replace the characters in the range [start, start + length) with the characters in srcChars in the range [srcStart, srcStart + srcLength).

srcChars is not modified.

Parameters

start	the offset at which the replace operation begins
length	the number of characters to replace. The character at `start + length` is not modified.
srcChars	the source for the new characters
srcStart	the offset into `srcChars` where new characters will be obtained
srcLength	the number of characters in `srcChars` in the replace string

Returns: a reference to this

Stable:: ICU 2.0

Definition at line 4810 of file unistr.h.

◆ replace() [3/6]

UnicodeString & icu::UnicodeString::replace	(	int32_t	start,
		int32_t	length,
		const UnicodeString &	srcText
	)

inline

Replace the characters in the range [start, start + length) with the characters in srcText.

srcText is not modified.

Parameters

start	the offset at which the replace operation begins
length	the number of characters to replace. The character at `start + length` is not modified.
srcText	the source for the new characters

Returns: a reference to this

Stable:: ICU 2.0

Definition at line 4789 of file unistr.h.

References length().

◆ replace() [4/6]

UnicodeString & icu::UnicodeString::replace	(	int32_t	start,
		int32_t	length,
		const UnicodeString &	srcText,
		int32_t	srcStart,
		int32_t	srcLength
	)

inline

Replace the characters in the range [start, start + length) with the characters in srcText in the range [srcStart, srcStart + srcLength).

srcText is not modified.

Parameters

start	the offset at which the replace operation begins
length	the number of characters to replace. The character at `start + length` is not modified.
srcText	the source for the new characters
srcStart	the offset into `srcText` where new characters will be obtained
srcLength	the number of characters in `srcText` in the replace string

Returns: a reference to this

Stable:: ICU 2.0

Definition at line 4795 of file unistr.h.

◆ replace() [5/6]

UnicodeString & icu::UnicodeString::replace	(	int32_t	start,
		int32_t	length,
		ConstChar16Ptr	srcChars,
		int32_t	srcLength
	)

inline

Replace the characters in the range [start, start + length) with the characters in srcChars.

srcChars is not modified.

Parameters

start	the offset at which the replace operation begins
length	number of characters to replace. The character at `start + length` is not modified.
srcChars	the source for the new characters
srcLength	the number of Unicode characters in srcChars

Returns: a reference to this

Stable:: ICU 2.0

Definition at line 4803 of file unistr.h.

◆ replace() [6/6]

UnicodeString& icu::UnicodeString::replace	(	int32_t	start,
		int32_t	length,
		UChar32	srcChar
	)

Replace the characters in the range [start, start + length) with the code point srcChar.

Parameters

start	the offset at which the replace operation begins
length	the number of characters to replace. The character at `start + length` is not modified.
srcChar	the new code point

Returns: a reference to this

Stable:: ICU 2.0

◆ replaceBetween() [1/2]

UnicodeString & icu::UnicodeString::replaceBetween	(	int32_t	start,
		int32_t	limit,
		const UnicodeString &	srcText
	)

inline

Replace the characters in the range [start, limit) with the characters in srcText.

srcText is not modified.

Parameters

start	the offset at which the replace operation begins
limit	the offset immediately following the replace range
srcText	the source for the new characters

Returns: a reference to this

Stable:: ICU 2.0

Definition at line 4824 of file unistr.h.

References length().

◆ replaceBetween() [2/2]

UnicodeString & icu::UnicodeString::replaceBetween	(	int32_t	start,
		int32_t	limit,
		const UnicodeString &	srcText,
		int32_t	srcStart,
		int32_t	srcLimit
	)

inline

Replace the characters in the range [start, limit) with the characters in srcText in the range [srcStart, srcLimit).

srcText is not modified.

Parameters

start	the offset at which the replace operation begins
limit	the offset immediately following the replace range
srcText	the source for the new characters
srcStart	the offset into `srcChars` where new characters will be obtained
srcLimit	the offset immediately following the range to copy in `srcText`

Returns: a reference to this

Stable:: ICU 2.0

Definition at line 4830 of file unistr.h.

◆ retainBetween()

UnicodeString & icu::UnicodeString::retainBetween	(	int32_t	start,
		int32_t	limit = `INT32_MAX`
	)

inline

Retain only the characters in the range [start, limit) from the UnicodeString object.

Removes characters before start and at and after limit.

Parameters

start	the offset of the first character to retain
limit	the offset immediately following the range to retain

Returns: a reference to this

Stable:: ICU 4.4

Definition at line 5121 of file unistr.h.

◆ reverse() [1/2]

UnicodeString & icu::UnicodeString::reverse ( )

inline

Reverse this UnicodeString in place.

Returns: a reference to this

Stable:: ICU 2.0

Definition at line 5142 of file unistr.h.

◆ reverse() [2/2]

UnicodeString & icu::UnicodeString::reverse	(	int32_t	start,
		int32_t	length
	)

inline

Reverse the range [start, start + length) in this UnicodeString.

Parameters

start	the start of the range to reverse
length	the number of characters to to reverse

Returns: a reference to this

Stable:: ICU 2.0

Definition at line 5146 of file unistr.h.

◆ setCharAt()

UnicodeString& icu::UnicodeString::setCharAt	(	int32_t	offset,
		char16_t	ch
	)

Set the character at the specified offset to the specified character.

Parameters

offset	A valid offset into the text of the character to set
ch	The new character

Returns: A reference to this

Stable:: ICU 2.0

◆ setTo() [1/8]

UnicodeString& icu::UnicodeString::setTo	(	char16_t *	buffer,
		int32_t	buffLength,
		int32_t	buffCapacity
	)

Aliasing setTo() function, analogous to the writable-aliasing char16_t* constructor.

The text will be used for the UnicodeString object, but it will not be released when the UnicodeString is destroyed. This has write-through semantics: For as long as the capacity of the buffer is sufficient, write operations will directly affect the buffer. When more capacity is necessary, then a new buffer will be allocated and the contents copied as with regularly constructed strings. In an assignment to another UnicodeString, the buffer will be copied. The extract(Char16Ptr dst) function detects whether the dst pointer is the same as the string buffer itself and will in this case not copy the contents.

Parameters

buffer	The characters to alias for the UnicodeString.
buffLength	The number of Unicode characters in `buffer` to alias.
buffCapacity	The size of `buffer` in char16_ts.

Returns: a reference to this

Stable:: ICU 2.0

◆ setTo() [2/8]

UnicodeString & icu::UnicodeString::setTo ( char16_t srcChar )

inline

Set the characters in the UnicodeString object to the code unit srcChar.

Parameters

srcChar the code unit which becomes the UnicodeString's character content

Returns: a reference to this

Stable:: ICU 2.0

Definition at line 5005 of file unistr.h.

◆ setTo() [3/8]

UnicodeString & icu::UnicodeString::setTo	(	const char16_t *	srcChars,
		int32_t	srcLength
	)

inline

Set the characters in the UnicodeString object to the characters in srcChars.

srcChars is not modified.

Parameters

srcChars	the source for the new characters
srcLength	the number of Unicode characters in srcChars.

Returns: a reference to this

Stable:: ICU 2.0

Definition at line 4997 of file unistr.h.

◆ setTo() [4/8]

UnicodeString & icu::UnicodeString::setTo ( const UnicodeString & srcText )

inline

Set the text in the UnicodeString object to the characters in srcText.

srcText is not modified.

Parameters

srcText the source for the new characters

Returns: a reference to this

Stable:: ICU 2.0

Definition at line 4991 of file unistr.h.

◆ setTo() [5/8]

UnicodeString & icu::UnicodeString::setTo	(	const UnicodeString &	srcText,
		int32_t	srcStart
	)

inline

Set the text in the UnicodeString object to the characters in srcText in the range [srcStart, srcText.length()).

srcText is not modified.

Parameters

srcText	the source for the new characters
srcStart	the offset into `srcText` where new characters will be obtained

Returns: a reference to this

Stable:: ICU 2.2

Definition at line 4982 of file unistr.h.

References length().

Referenced by icu::ures_getNextUnicodeString(), icu::ures_getUnicodeString(), icu::ures_getUnicodeStringByIndex(), and icu::ures_getUnicodeStringByKey().

◆ setTo() [6/8]

UnicodeString & icu::UnicodeString::setTo	(	const UnicodeString &	srcText,
		int32_t	srcStart,
		int32_t	srcLength
	)

inline

Set the text in the UnicodeString object to the characters in srcText in the range [srcStart, srcStart + srcLength).

srcText is not modified.

Parameters

srcText	the source for the new characters
srcStart	the offset into `srcText` where new characters will be obtained
srcLength	the number of characters in `srcText` in the replace string.

Returns: a reference to this

Stable:: ICU 2.0

Definition at line 4973 of file unistr.h.

◆ setTo() [7/8]

UnicodeString& icu::UnicodeString::setTo	(	UBool	isTerminated,
		ConstChar16Ptr	text,
		int32_t	textLength
	)

Aliasing setTo() function, analogous to the readonly-aliasing char16_t* constructor.

The text will be used for the UnicodeString object, but it will not be released when the UnicodeString is destroyed. This has copy-on-write semantics: When the string is modified, then the buffer is first copied into newly allocated memory. The aliased buffer is never modified.

In an assignment to another UnicodeString, when using the copy constructor or the assignment operator, the text will be copied. When using fastCopyFrom(), the text will be aliased again, so that both strings then alias the same readonly-text.

Parameters

isTerminated	specifies if `text` is `NUL`-terminated. This must be true if `textLength==-1`.
text	The characters to alias for the UnicodeString.
textLength	The number of Unicode characters in `text` to alias. If -1, then this constructor will determine the length by calling `u_strlen()`.

Returns: a reference to this

Stable:: ICU 2.0

◆ setTo() [8/8]

UnicodeString & icu::UnicodeString::setTo ( UChar32 srcChar )

inline

Set the characters in the UnicodeString object to the code point srcChar.

Parameters

srcChar the code point which becomes the UnicodeString's character content

Returns: a reference to this

Stable:: ICU 2.0

Definition at line 5012 of file unistr.h.

◆ setToBogus()

void icu::UnicodeString::setToBogus ( )

Make this UnicodeString object invalid.

The string will test true with isBogus().

A bogus string has no value. It is different from an empty string. It can be used to indicate that no string value is available. getBuffer() and getTerminatedBuffer() return nullptr, and length() returns 0.

This utility function is used throughout the UnicodeString implementation to indicate that a UnicodeString operation failed, and may be used in other functions, especially but not exclusively when such functions do not take a UErrorCode for simplicity.

The following methods, and no others, will clear a string object's bogus flag:

remove()
remove(0, INT32_MAX)
truncate(0)
operator=() (assignment operator)
setTo(...)

The simplest ways to turn a bogus string into an empty one is to use the remove() function. Examples for other functions that are equivalent to "set to empty string":

if(s.isBogus()) {
  s.remove();           // set to an empty string (remove all), or
  s.remove(0, INT32_MAX); // set to an empty string (remove all), or
  s.truncate(0);        // set to an empty string (complete truncation), or
  s=UnicodeString();    // assign an empty string, or
  s.setTo((UChar32)-1); // set to a pseudo code point that is out of range, or
  s.setTo(u"", 0);      // set to an empty C Unicode string
}

See also: isBogus()

Stable:: ICU 2.0

Referenced by icu::ures_getNextUnicodeString(), icu::ures_getUnicodeString(), icu::ures_getUnicodeStringByIndex(), and icu::ures_getUnicodeStringByKey().

◆ startsWith() [1/4]

UBool icu::UnicodeString::startsWith	(	const char16_t *	srcChars,
		int32_t	srcStart,
		int32_t	srcLength
	)		const

inline

Determine if this ends with the characters in srcChars in the range [srcStart, srcStart + srcLength).

Parameters

srcChars	The characters to match.
srcStart	the offset into `srcText` to start matching
srcLength	the number of characters in `srcChars` to match

Returns: true if this ends with the characters in srcChars, false otherwise

Stable:: ICU 2.0

Definition at line 4744 of file unistr.h.

References u_strlen().

◆ startsWith() [2/4]

UBool icu::UnicodeString::startsWith	(	const UnicodeString &	srcText,
		int32_t	srcStart,
		int32_t	srcLength
	)		const

inline

Determine if this starts with the characters in srcText in the range [srcStart, srcStart + srcLength).

Parameters

srcText	The text to match.
srcStart	the offset into `srcText` to start matching
srcLength	the number of characters in `srcText` to match

Returns: true if this starts with the characters in text, false otherwise

Stable:: ICU 2.0

Definition at line 4730 of file unistr.h.

◆ startsWith() [3/4]

UBool icu::UnicodeString::startsWith ( const UnicodeString & text ) const

inline

Determine if this starts with the characters in text

Parameters

text	The text to match.

Returns: true if this starts with the characters in text, false otherwise

Stable:: ICU 2.0

Definition at line 4726 of file unistr.h.

References length().

◆ startsWith() [4/4]

UBool icu::UnicodeString::startsWith	(	ConstChar16Ptr	srcChars,
		int32_t	srcLength
	)		const

inline

Determine if this starts with the characters in srcChars

Parameters

srcChars	The characters to match.
srcLength	the number of characters in `srcChars`

Returns: true if this starts with the characters in srcChars, false otherwise

Stable:: ICU 2.0

Definition at line 4736 of file unistr.h.

References u_strlen().

◆ swap()

void icu::UnicodeString::swap ( UnicodeString & other )

noexcept

Swap strings.

Parameters

other other string

Stable:: ICU 56

◆ tempSubString()

UnicodeString icu::UnicodeString::tempSubString	(	int32_t	start = `0`,
		int32_t	length = `INT32_MAX`
	)		const

Create a temporary substring for the specified range.

Unlike the substring constructor and setTo() functions, the object returned here will be a read-only alias (using getBuffer()) rather than copying the text. As a result, this substring operation is much faster but requires that the original string not be modified or deleted during the lifetime of the returned substring object.

Parameters

start	offset of the first character visible in the substring
length	length of the substring

Returns: a read-only alias UnicodeString object for the substring

Stable:: ICU 4.4

Referenced by icu::MessagePattern::getSubstring().

◆ tempSubStringBetween()

UnicodeString icu::UnicodeString::tempSubStringBetween	(	int32_t	start,
		int32_t	limit = `INT32_MAX`
	)		const

inline

Create a temporary substring for the specified range.

Same as tempSubString(start, length) except that the substring range is specified as a (start, limit) pair (with an exclusive limit index) rather than a (start, length) pair.

Parameters

start	offset of the first character visible in the substring
limit	offset immediately following the last character visible in the substring

Returns: a read-only alias UnicodeString object for the substring

Stable:: ICU 4.4

Definition at line 4899 of file unistr.h.

◆ toLower() [1/2]

UnicodeString& icu::UnicodeString::toLower ( )

Convert the characters in this to lower case following the conventions of the default locale.

Returns: A reference to this.

Stable:: ICU 2.0

◆ toLower() [2/2]

UnicodeString& icu::UnicodeString::toLower ( const Locale & locale )

Convert the characters in this to lower case following the conventions of a specific locale.

Parameters

locale The locale containing the conventions to use.

Returns: A reference to this.

Stable:: ICU 2.0

◆ toTitle() [1/3]

UnicodeString& icu::UnicodeString::toTitle ( BreakIterator * titleIter )

Titlecase this string, convenience function using the default locale.

Casing is locale-dependent and context-sensitive. Titlecasing uses a break iterator to find the first characters of words that are to be titlecased. It titlecases those characters and lowercases all others.

The titlecase break iterator can be provided to customize for arbitrary styles, using rules and dictionaries beyond the standard iterators. It may be more efficient to always provide an iterator to avoid opening and closing one for each string. If the break iterator passed in is null, the default Unicode algorithm will be used to determine the titlecase positions.

This function uses only the setText(), first() and next() methods of the provided break iterator.

Parameters

titleIter A break iterator to find the first characters of words that are to be titlecased. If none is provided (0), then a standard titlecase break iterator is opened. Otherwise the provided iterator is set to the string's text.

Returns: A reference to this.

Stable:: ICU 2.1

◆ toTitle() [2/3]

UnicodeString& icu::UnicodeString::toTitle	(	BreakIterator *	titleIter,
		const Locale &	locale
	)

Titlecase this string.

Casing is locale-dependent and context-sensitive. Titlecasing uses a break iterator to find the first characters of words that are to be titlecased. It titlecases those characters and lowercases all others.

The titlecase break iterator can be provided to customize for arbitrary styles, using rules and dictionaries beyond the standard iterators. It may be more efficient to always provide an iterator to avoid opening and closing one for each string. If the break iterator passed in is null, the default Unicode algorithm will be used to determine the titlecase positions.

This function uses only the setText(), first() and next() methods of the provided break iterator.

Parameters

titleIter	A break iterator to find the first characters of words that are to be titlecased. If none is provided (0), then a standard titlecase break iterator is opened. Otherwise the provided iterator is set to the string's text.
locale	The locale to consider.

Returns: A reference to this.

Stable:: ICU 2.1

◆ toTitle() [3/3]

UnicodeString& icu::UnicodeString::toTitle	(	BreakIterator *	titleIter,
		const Locale &	locale,
		uint32_t	options
	)

Titlecase this string, with options.

Casing is locale-dependent and context-sensitive. Titlecasing uses a break iterator to find the first characters of words that are to be titlecased. It titlecases those characters and lowercases all others. (This can be modified with options.)

The titlecase break iterator can be provided to customize for arbitrary styles, using rules and dictionaries beyond the standard iterators. It may be more efficient to always provide an iterator to avoid opening and closing one for each string. If the break iterator passed in is null, the default Unicode algorithm will be used to determine the titlecase positions.

This function uses only the setText(), first() and next() methods of the provided break iterator.

Parameters

titleIter	A break iterator to find the first characters of words that are to be titlecased. If none is provided (0), then a standard titlecase break iterator is opened. Otherwise the provided iterator is set to the string's text.
locale	The locale to consider.
options	Options bit set, usually 0. See U_TITLECASE_NO_LOWERCASE, U_TITLECASE_NO_BREAK_ADJUSTMENT, U_TITLECASE_ADJUST_TO_CASED, U_TITLECASE_WHOLE_STRING, U_TITLECASE_SENTENCES.

Returns: A reference to this.

Stable:: ICU 3.8

◆ toUpper() [1/2]

UnicodeString& icu::UnicodeString::toUpper ( )

Convert the characters in this to UPPER CASE following the conventions of the default locale.

Returns: A reference to this.

Stable:: ICU 2.0

◆ toUpper() [2/2]

UnicodeString& icu::UnicodeString::toUpper ( const Locale & locale )

Convert the characters in this to UPPER CASE following the conventions of a specific locale.

Parameters

locale The locale containing the conventions to use.

Returns: A reference to this.

Stable:: ICU 2.0

◆ toUTF32()

int32_t icu::UnicodeString::toUTF32	(	UChar32 *	utf32,
		int32_t	capacity,
		UErrorCode &	errorCode
	)		const

Convert the UnicodeString to UTF-32.

Unpaired surrogates are replaced with U+FFFD. Calls u_strToUTF32WithSub().

Parameters

utf32	destination string buffer, can be nullptr if capacity==0
capacity	the number of UChar32s available at utf32
errorCode	Standard ICU error code. Its input value must pass the U_SUCCESS() test, or else the function returns immediately. Check for U_FAILURE() on output or use with function chaining. (See User Guide for details.)

Returns: The length of the UTF-32 string.

See also: fromUTF32

Stable:: ICU 4.2

◆ toUTF8()

void icu::UnicodeString::toUTF8 ( ByteSink & sink ) const

Convert the UnicodeString to UTF-8 and write the result to a ByteSink.

This is called by toUTF8String(). Unpaired surrogates are replaced with U+FFFD. Calls u_strToUTF8WithSub().

Parameters

sink	A ByteSink to which the UTF-8 version of the string is written. sink.Flush() is called at the end.

Stable:: ICU 4.2

See also: toUTF8String

◆ toUTF8String() [1/2]

template<typename StringClass >

StringClass icu::UnicodeString::toUTF8String ( ) const

inline

Convert the UnicodeString to a UTF-8 string.

Unpaired surrogates are replaced with U+FFFD. Calls toUTF8().

Template Parameters

StringClass A std::string or a std::u8string (or a compatible type)

Returns: A std::string or a std::u8string (or a compatible object) with the UTF-8 version of the string.

Draft:: This API may be changed in the future versions and was introduced in ICU 78

See also: toUTF8

Definition at line 1802 of file unistr.h.

References icu::Replaceable::length().

◆ toUTF8String() [2/2]

template<typename StringClass >

StringClass& icu::UnicodeString::toUTF8String ( StringClass & result ) const

inline

Convert the UnicodeString to UTF-8 and append the result to a standard string.

Unpaired surrogates are replaced with U+FFFD. Calls toUTF8().

Template Parameters

StringClass A std::string or a std::u8string (or a compatible type)

Parameters

result A std::string or a std::u8string (or a compatible object) to which the UTF-8 version of the string is appended.

Returns: The string object.

Stable:: ICU 4.2

See also: toUTF8

Definition at line 1783 of file unistr.h.

References icu::Replaceable::length().

◆ trim()

UnicodeString& icu::UnicodeString::trim ( )

Trims leading and trailing whitespace from this UnicodeString.

Returns: a reference to this

Stable:: ICU 2.0

◆ truncate()

UBool icu::UnicodeString::truncate ( int32_t targetLength )

inline

Truncate this UnicodeString to the targetLength.

Parameters

targetLength the desired length of this UnicodeString.

Returns: true if the text was truncated, false otherwise

Stable:: ICU 2.0

Definition at line 5127 of file unistr.h.

Referenced by icu::Transliterator::setID().

◆ unescape()

UnicodeString icu::UnicodeString::unescape ( ) const

Unescape a string of characters and return a string containing the result.

The following escape sequences are recognized:

\uhhhh 4 hex digits; h in [0-9A-Fa-f] \Uhhhhhhhh 8 hex digits \xhh 1-2 hex digits \ooo 1-3 octal digits; o in [0-7] \cX control-X; X is masked with 0x1F

as well as the standard ANSI C escapes:

\a => U+0007, \b => U+0008, \t => U+0009, \n => U+000A, \v => U+000B, \f => U+000C, \r => U+000D, \e => U+001B, \" => U+0022, \' => U+0027, \? => U+003F, \\ => U+005C

Anything else following a backslash is generically escaped. For example, "[a\\-z]" returns "[a-z]".

If an escape sequence is ill-formed, this method returns an empty string. An example of an ill-formed sequence is "\\u" followed by fewer than 4 hex digits.

This function is similar to u_unescape() but not identical to it. The latter takes a source char*, so it does escape recognition and also invariant conversion.

Returns: a string with backslash escapes interpreted, or an empty string on error.

See also: UnicodeString::unescapeAt(); u_unescape(); u_unescapeAt()

Stable:: ICU 2.0

◆ unescapeAt()

UChar32 icu::UnicodeString::unescapeAt ( int32_t & offset ) const

Unescape a single escape sequence and return the represented character.

See unescape() for a listing of the recognized escape sequences. The character at offset-1 is assumed (without checking) to be a backslash. If the escape sequence is ill-formed, or the offset is out of range, U_SENTINEL=-1 is returned.

Parameters

offset an input output parameter. On input, it is the offset into this string where the escape sequence is located, after the initial backslash. On output, it is advanced after the last character parsed. On error, it is not advanced at all.

Returns: the character represented by the escape sequence at offset, or U_SENTINEL=-1 on error.

See also: UnicodeString::unescape(); u_unescape(); u_unescapeAt()

Stable:: ICU 2.0

Friends And Related Function Documentation

◆ swap

void swap	(	UnicodeString &	s1,
		UnicodeString &	s2
	)

friend

Non-member UnicodeString swap function.

Parameters

s1	will get s2's contents and state
s2	will get s1's contents and state

Stable:: ICU 56

Definition at line 2051 of file unistr.h.

The documentation for this class was generated from the following file:

common/unicode/unistr.h

Public Types

Public Member Functions

Static Public Member Functions

Protected Member Functions

Friends

Detailed Description

Member Typedef Documentation

◆ value_type

Member Enumeration Documentation

◆ EInvariant

Constructor & Destructor Documentation

◆ UnicodeString() [1/25]

◆ UnicodeString() [2/25]

◆ UnicodeString() [3/25]

◆ UnicodeString() [4/25]

◆ UnicodeString() [5/25]

◆ UnicodeString() [6/25]

◆ UnicodeString() [7/25]

◆ UnicodeString() [8/25]

◆ UnicodeString() [9/25]

◆ UnicodeString() [10/25]

◆ UnicodeString() [11/25]

◆ UnicodeString() [12/25]

◆ UnicodeString() [13/25]

◆ UnicodeString() [14/25]

◆ UnicodeString() [15/25]

◆ UnicodeString() [16/25]

◆ UnicodeString() [17/25]

◆ UnicodeString() [18/25]

◆ UnicodeString() [19/25]

◆ UnicodeString() [20/25]

◆ UnicodeString() [21/25]

◆ UnicodeString() [22/25]

◆ UnicodeString() [23/25]

◆ UnicodeString() [24/25]

◆ UnicodeString() [25/25]

◆ ~UnicodeString()

Member Function Documentation

◆ append() [1/7]

◆ append() [2/7]

◆ append() [3/7]

◆ append() [4/7]

◆ append() [5/7]

◆ append() [6/7]

◆ append() [7/7]

◆ begin()

◆ caseCompare() [1/6]

◆ caseCompare() [2/6]

◆ caseCompare() [3/6]

◆ caseCompare() [4/6]

◆ caseCompare() [5/6]

◆ caseCompare() [6/6]

◆ caseCompareBetween()

◆ char32At()

◆ charAt()

◆ clone()

◆ compare() [1/6]

◆ compare() [2/6]

◆ compare() [3/6]

◆ compare() [4/6]

◆ compare() [5/6]

◆ compare() [6/6]

◆ compareBetween()

◆ compareCodePointOrder() [1/6]

◆ compareCodePointOrder() [2/6]

◆ compareCodePointOrder() [3/6]

◆ compareCodePointOrder() [4/6]

◆ compareCodePointOrder() [5/6]

◆ compareCodePointOrder() [6/6]

◆ compareCodePointOrderBetween()

◆ copy()

◆ countChar32()

◆ end()

◆ endsWith() [1/4]

◆ endsWith() [2/4]

◆ endsWith() [3/4]

◆ endsWith() [4/4]

◆ extract() [1/8]

◆ extract() [2/8]

◆ extract() [3/8]