18 #if U_SHOW_CPLUSPLUS_API
35 class RBBIRuleScanner;
37 class UnicodeSetStringSpan;
39 class RuleCharacterIterator;
293 static constexpr uint8_t kIsBogus = 1;
300 BMPSet *bmpSet =
nullptr;
302 int32_t bufferCapacity = 0;
313 char16_t *pat =
nullptr;
316 UVector* strings_ =
nullptr;
317 UnicodeSetStringSpan *stringSpan =
nullptr;
336 inline UBool isBogus()
const;
394 #ifndef U_HIDE_INTERNAL_API
427 #ifndef U_HIDE_INTERNAL_API
554 inline USet *toUSet();
564 inline const USet * toUSet()
const;
579 inline UBool isFrozen()
const;
644 #ifndef U_HIDE_INTERNAL_API
722 UBool escapeUnprintable =
false)
const override;
1027 UBool incremental)
override;
1053 int32_t start, int32_t limit,
1065 int32_t findCodePoint(
UChar32 c)
const;
1105 #ifndef U_HIDE_DRAFT_API
1123 inline U_HEADER_NESTED_NAMESPACE::USetCodePoints
codePoints()
const {
1124 return U_HEADER_NESTED_NAMESPACE::USetCodePoints(toUSet());
1149 inline U_HEADER_NESTED_NAMESPACE::USetRanges
ranges()
const {
1150 return U_HEADER_NESTED_NAMESPACE::USetRanges(toUSet());
1173 inline U_HEADER_NESTED_NAMESPACE::USetStrings
strings()
const {
1174 return U_HEADER_NESTED_NAMESPACE::USetStrings(toUSet());
1178 #ifndef U_HIDE_DRAFT_API
1203 inline U_HEADER_NESTED_NAMESPACE::USetElementIterator
begin()
const {
1204 return U_HEADER_NESTED_NAMESPACE::USetElements(toUSet()).begin();
1215 inline U_HEADER_NESTED_NAMESPACE::USetElementIterator
end()
const {
1216 return U_HEADER_NESTED_NAMESPACE::USetElements(toUSet()).end();
1663 friend class USetAccess;
1681 friend class RBBIRuleScanner;
1699 void applyPattern(RuleCharacterIterator& chars,
1707 void closeOverCaseInsensitive(
bool simple);
1708 void closeOverAddCaseMappings();
1714 static int32_t nextCapacity(int32_t minCapacity);
1716 bool ensureCapacity(int32_t newLen);
1718 bool ensureBufferCapacity(int32_t newLen);
1723 int32_t stringsSize()
const;
1727 UBool escapeUnprintable)
const;
1730 UBool escapeUnprintable)
const;
1737 UBool escapeUnprintable);
1743 void exclusiveOr(
const UChar32* other, int32_t otherLen, int8_t polarity);
1745 void add(
const UChar32* other, int32_t otherLen, int8_t polarity);
1747 void retain(
const UChar32* other, int32_t otherLen, int8_t polarity);
1757 static UBool resemblesPropertyPattern(RuleCharacterIterator& chars,
1803 void applyPropertyPattern(RuleCharacterIterator& chars,
1811 typedef UBool (*Filter)(
UChar32 codePoint,
void* context);
1822 void applyFilter(Filter filter,
1833 void setPattern(
const char16_t *newPat, int32_t newPatLen);
1837 void releasePattern();
1848 inline UBool UnicodeSet::isFrozen()
const {
1849 return bmpSet !=
nullptr || stringSpan !=
nullptr;
1853 return !containsNone(start, end);
1857 return !containsNone(s);
1861 return !containsNone(s);
1864 inline UBool UnicodeSet::isBogus()
const {
1865 return fFlags & kIsBogus;
1873 return reinterpret_cast<const UnicodeSet *
>(uset);
1876 inline USet *UnicodeSet::toUSet() {
1877 return reinterpret_cast<USet *
>(
this);
1880 inline const USet *UnicodeSet::toUSet()
const {
1881 return reinterpret_cast<const USet *
>(
this);
1885 int32_t sLength=s.
length();
1888 }
else if(start>sLength) {
1891 return start+span(s.
getBuffer()+start, sLength-start, spanCondition);
1895 int32_t sLength=s.
length();
1898 }
else if(limit>sLength) {
1901 return spanBack(s.
getBuffer(), limit, spanCondition);
#define INITIAL_CAPACITY
The initial size of an array if it is unspecified.
ParsePosition is a simple class used by Format and its subclasses to keep track of the current positi...
Replaceable is an abstract base class representing a string of characters that supports the replaceme...
An interface that defines both lookup protocol and parsing of symbolic names.
UnicodeFilter defines a protocol for selecting a subset of the full range (U+0000 to U+10FFFF) of Uni...
virtual UBool matchesIndexValue(uint8_t v) const =0
Returns true if this matcher will match a character c, where c & 0xFF == v, at offset,...
UnicodeSetIterator iterates over the contents of a UnicodeSet.
A mutable set of Unicode characters and multicharacter strings.
U_HEADER_NESTED_NAMESPACE::USetStrings strings() const
Returns a C++ "range" for iterating over the empty and multi-character strings of this set.
virtual UnicodeSet & removeAll(const UnicodeSet &c)
Removes from this set all of its elements that are contained in the specified set.
UnicodeSet * cloneAsThawed() const
Clone the set and make the clone mutable.
virtual UChar32 getRangeEnd(int32_t index) const
Iteration method that returns the last character in the specified range of this set.
UnicodeSet()
Constructs an empty set.
int32_t spanBackUTF8(const char *s, int32_t length, USetSpanCondition spanCondition) const
Returns the start of the trailing substring of the input string which consists only of characters and...
virtual UnicodeSet & complement(UChar32 start, UChar32 end)
Complements the specified range in this set.
UnicodeSet & remove(UChar32 c)
Removes the specified character from this set if it is present.
UnicodeSet(UChar32 start, UChar32 end)
Constructs a set containing the given range.
UnicodeSet & complementAll(const UnicodeString &s)
Complement EACH of the characters in this string.
int32_t spanUTF8(const char *s, int32_t length, USetSpanCondition spanCondition) const
Returns the length of the initial substring of the input string which consists only of characters and...
virtual UBool isEmpty() const
Returns true if this set contains no elements.
void setToBogus()
Make this UnicodeSet object invalid.
UnicodeSet & retain(UChar32 c)
Retain the specified character from this set if it is present.
virtual UClassID getDynamicClassID() const override
Implement UnicodeFunctor API.
virtual int32_t size() const
Returns the number of elements in this set (its cardinality).
virtual UnicodeSet & retain(UChar32 start, UChar32 end)
Retain only the elements in this set that are contained in the specified range.
virtual void addMatchSetTo(UnicodeSet &toUnionTo) const override
Implementation of UnicodeMatcher API.
UnicodeSet & retainAll(const UnicodeString &s)
Retains EACH of the characters in this string.
virtual UChar32 getRangeStart(int32_t index) const
Iteration method that returns the first character in the specified range of this set.
UnicodeSet & complement(const UnicodeString &s)
Complement the specified string in this set.
UnicodeSet * freeze()
Freeze the set (make it immutable).
UnicodeSet & addAll(const UnicodeString &s)
Adds each of the characters in this string to the set.
UnicodeSet & remove(const UnicodeString &s)
Removes the specified string from this set if it is present.
int32_t indexOf(UChar32 c) const
Returns the index of the given character within this set, where the set is ordered by ascending code ...
U_HEADER_NESTED_NAMESPACE::USetCodePoints codePoints() const
Returns a C++ "range" for iterating over the code points of this set.
virtual UnicodeSet & addAll(const UnicodeSet &c)
Adds all of the elements in the specified set to this set if they're not already present.
U_HEADER_NESTED_NAMESPACE::USetElementIterator end() const
UnicodeSet(const UnicodeString &pattern, ParsePosition &pos, uint32_t options, const SymbolTable *symbols, UErrorCode &status)
Constructs a set from the given pattern.
virtual UnicodeString & toPattern(UnicodeString &result, UBool escapeUnprintable=false) const override
Returns a string representation of this set.
U_HEADER_NESTED_NAMESPACE::USetElementIterator begin() const
Returns a C++ iterator for iterating over all of the elements of this set.
UnicodeSet & applyPattern(const UnicodeString &pattern, uint32_t options, const SymbolTable *symbols, UErrorCode &status)
Modifies this set to represent the set specified by the given pattern, optionally ignoring Unicode Pa...
virtual UnicodeSet & complement()
This is equivalent to complement(MIN_VALUE, MAX_VALUE).
UnicodeSet & retain(const UnicodeString &s)
Retains only the specified string from this set if it is present.
U_HEADER_NESTED_NAMESPACE::USetRanges ranges() const
Returns a C++ "range" for iterating over the code point ranges of this set.
virtual UBool containsAll(const UnicodeSet &c) const
Returns true if this set contains all the characters and strings of the given set.
UnicodeSet & applyPattern(const UnicodeString &pattern, UErrorCode &status)
Modifies this set to represent the set specified by the given pattern, ignoring Unicode Pattern_White...
static UnicodeSet * createFrom(const UnicodeString &s)
Makes a set from a multicharacter string.
UBool containsNone(const UnicodeString &s) const
Returns true if this set contains none of the characters of the given string.
static UnicodeSet * createFromAll(const UnicodeString &s)
Makes a set from each of the characters in the string.
virtual UnicodeSet & retainAll(const UnicodeSet &c)
Retains only the elements in this set that are contained in the specified set.
UnicodeSet(const UnicodeSet &o)
Constructs a set that is identical to the given UnicodeSet.
static UBool resemblesPattern(const UnicodeString &pattern, int32_t pos)
Return true if the given position, in the given pattern, appears to be the start of a UnicodeSet patt...
virtual ~UnicodeSet()
Destructs the set.
int32_t span(const char16_t *s, int32_t length, USetSpanCondition spanCondition) const
Returns the length of the initial substring of the input string which consists only of characters and...
UChar32 charAt(int32_t index) const
Returns the character at the given index within this set, where the set is ordered by ascending code ...
static UClassID getStaticClassID()
Return the class ID for this class.
UnicodeSet & closeOver(int32_t attribute)
Close this set over the given attribute.
UnicodeSet & applyPattern(const UnicodeString &pattern, ParsePosition &pos, uint32_t options, const SymbolTable *symbols, UErrorCode &status)
Parses the given pattern, starting at the given position.
UBool containsNone(UChar32 start, UChar32 end) const
Returns true if this set contains none of the characters of the given range.
UnicodeSet & add(UChar32 c)
Adds the specified character to this set if it is not already present.
UnicodeSet & applyIntPropertyValue(UProperty prop, int32_t value, UErrorCode &ec)
Modifies this set to contain those code points which have the given value for the given binary or enu...
virtual UBool contains(UChar32 start, UChar32 end) const
Returns true if this set contains every character of the given range.
virtual int32_t getRangeCount() const
Iteration method that returns the number of ranges contained in this set.
UBool containsAll(const UnicodeString &s) const
Returns true if this set contains all the characters of the given string.
int32_t serialize(uint16_t *dest, int32_t destCapacity, UErrorCode &ec) const
Serializes this set into an array of 16-bit integers.
UnicodeSet & add(const UnicodeString &s)
Adds the specified multicharacter to this set if it is not already present.
UBool contains(const UnicodeString &s) const
Returns true if this set contains the given multicharacter string.
virtual UnicodeSet & removeAllStrings()
Remove all strings from this set.
virtual UnicodeSet * clone() const override
Returns a copy of this object.
virtual int32_t hashCode() const
Returns the hash code value for this set.
virtual bool operator==(const UnicodeSet &o) const
Compares the specified object with this set for equality.
UnicodeSet(const UnicodeString &pattern, UErrorCode &status)
Constructs a set from the given pattern.
UnicodeSet & complement(UChar32 c)
Complements the specified character in this set.
int32_t spanBack(const char16_t *s, int32_t length, USetSpanCondition spanCondition) const
Returns the start of the trailing substring of the input string which consists only of characters and...
virtual UnicodeSet & compact()
Reallocate this objects internal structures to take up the least possible space, without changing thi...
virtual UMatchDegree matches(const Replaceable &text, int32_t &offset, int32_t limit, UBool incremental) override
Implement UnicodeMatcher::matches()
UnicodeSet & removeAll(const UnicodeString &s)
Remove EACH of the characters in this string.
virtual UnicodeSet & add(UChar32 start, UChar32 end)
Adds the specified range to this set if it is not already present.
virtual UBool contains(UChar32 c) const override
Returns true if this set contains the given character.
UnicodeSet(const uint16_t buffer[], int32_t bufferLen, ESerialization serialization, UErrorCode &status)
Constructs a set from the output of serialize().
virtual UnicodeSet & remove(UChar32 start, UChar32 end)
Removes the specified range from this set if it is present.
virtual UnicodeSet & complementAll(const UnicodeSet &c)
Complements in this set all elements contained in the specified set.
UnicodeSet(const UnicodeString &pattern, uint32_t options, const SymbolTable *symbols, UErrorCode &status)
Constructs a set from the given pattern.
UnicodeSet & applyPropertyAlias(const UnicodeString &prop, const UnicodeString &value, UErrorCode &ec)
Modifies this set to contain those code points which have the given value for the given property.
UnicodeSet & set(UChar32 start, UChar32 end)
Make this object represent the range start - end.
UnicodeSet & operator=(const UnicodeSet &o)
Assigns this object to be a copy of another.
virtual UnicodeSet & clear()
Removes all of the elements from this set.
UBool containsNone(const UnicodeSet &c) const
Returns true if this set contains none of the characters and strings of the given set.
UnicodeString is a string class that stores Unicode characters directly and provides similar function...
int32_t length() const
Return the length of the UnicodeString object.
char16_t * getBuffer(int32_t minCapacity)
Get a read/write pointer to the internal buffer.
U_EXPORT UBool operator==(const StringPiece &x, const StringPiece &y)
Global operator == for StringPiece.
UMatchDegree
Constants returned by UnicodeMatcher::matches() indicating the degree of match.
bool operator!=(const StringPiece &x, const StringPiece &y)
Global operator != for StringPiece.
UProperty
Selection constants for Unicode properties.
C API: This file defines an abstract map from Unicode code points to integer values.
int32_t UChar32
Define UChar32 as a type for single Unicode code points.
int8_t UBool
The ICU boolean type, a signed-byte integer.
void * UClassID
UClassID is used to identify classes without using the compiler's RTTI.
USetSpanCondition
Argument values for whether span() and similar functions continue while the current character is cont...
struct USet USet
USet is the C API type corresponding to C++ class UnicodeSet.
Basic definitions for ICU, for both C and C++ APIs.
UErrorCode
Standard ICU4C error code type, a substitute for exceptions.
#define U_COMMON_API
Set to export library symbols from inside the common library, and to import them from outside.