ICU 75.1 75.1
Loading...
Searching...
No Matches
uset.h
Go to the documentation of this file.
1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
3/*
4*******************************************************************************
5*
6* Copyright (C) 2002-2014, International Business Machines
7* Corporation and others. All Rights Reserved.
8*
9*******************************************************************************
10* file name: uset.h
11* encoding: UTF-8
12* tab size: 8 (not used)
13* indentation:4
14*
15* created on: 2002mar07
16* created by: Markus W. Scherer
17*
18* C version of UnicodeSet.
19*/
20
21
29#ifndef __USET_H__
30#define __USET_H__
31
32#include "unicode/utypes.h"
33#include "unicode/uchar.h"
34
35#if U_SHOW_CPLUSPLUS_API
37#endif // U_SHOW_CPLUSPLUS_API
38
39#ifndef USET_DEFINED
40
41#ifndef U_IN_DOXYGEN
42#define USET_DEFINED
43#endif
50typedef struct USet USet;
51#endif
52
64enum {
70
98
111
126
182typedef enum USetSpanCondition {
231#ifndef U_HIDE_DEPRECATED_API
237#endif // U_HIDE_DEPRECATED_API
239
240enum {
249
277
278/*********************************************************************
279 * USet API
280 *********************************************************************/
281
289U_CAPI USet* U_EXPORT2
291
302U_CAPI USet* U_EXPORT2
304
314U_CAPI USet* U_EXPORT2
315uset_openPattern(const UChar* pattern, int32_t patternLength,
316 UErrorCode* ec);
317
331U_CAPI USet* U_EXPORT2
332uset_openPatternOptions(const UChar* pattern, int32_t patternLength,
333 uint32_t options,
334 UErrorCode* ec);
335
342U_CAPI void U_EXPORT2
344
345#if U_SHOW_CPLUSPLUS_API
346
347U_NAMESPACE_BEGIN
348
359
360U_NAMESPACE_END
361
362#endif
363
373U_CAPI USet * U_EXPORT2
374uset_clone(const USet *set);
375
385U_CAPI UBool U_EXPORT2
386uset_isFrozen(const USet *set);
387
402U_CAPI void U_EXPORT2
404
415U_CAPI USet * U_EXPORT2
417
427U_CAPI void U_EXPORT2
429 UChar32 start, UChar32 end);
430
455U_CAPI int32_t U_EXPORT2
457 const UChar *pattern, int32_t patternLength,
458 uint32_t options,
459 UErrorCode *status);
460
483U_CAPI void U_EXPORT2
485 UProperty prop, int32_t value, UErrorCode* ec);
486
522U_CAPI void U_EXPORT2
524 const UChar *prop, int32_t propLength,
525 const UChar *value, int32_t valueLength,
526 UErrorCode* ec);
527
537U_CAPI UBool U_EXPORT2
538uset_resemblesPattern(const UChar *pattern, int32_t patternLength,
539 int32_t pos);
540
556U_CAPI int32_t U_EXPORT2
558 UChar* result, int32_t resultCapacity,
559 UBool escapeUnprintable,
560 UErrorCode* ec);
561
570U_CAPI void U_EXPORT2
572
585U_CAPI void U_EXPORT2
586uset_addAll(USet* set, const USet *additionalSet);
587
597U_CAPI void U_EXPORT2
599
609U_CAPI void U_EXPORT2
610uset_addString(USet* set, const UChar* str, int32_t strLen);
611
621U_CAPI void U_EXPORT2
622uset_addAllCodePoints(USet* set, const UChar *str, int32_t strLen);
623
632U_CAPI void U_EXPORT2
634
644U_CAPI void U_EXPORT2
646
656U_CAPI void U_EXPORT2
657uset_removeString(USet* set, const UChar* str, int32_t strLen);
658
668U_CAPI void U_EXPORT2
669uset_removeAllCodePoints(USet *set, const UChar *str, int32_t length);
670
682U_CAPI void U_EXPORT2
683uset_removeAll(USet* set, const USet* removeSet);
684
697U_CAPI void U_EXPORT2
698uset_retain(USet* set, UChar32 start, UChar32 end);
699
711U_CAPI void U_EXPORT2
712uset_retainString(USet *set, const UChar *str, int32_t length);
713
723U_CAPI void U_EXPORT2
724uset_retainAllCodePoints(USet *set, const UChar *str, int32_t length);
725
738U_CAPI void U_EXPORT2
739uset_retainAll(USet* set, const USet* retain);
740
749U_CAPI void U_EXPORT2
751
765U_CAPI void U_EXPORT2
767
781U_CAPI void U_EXPORT2
783
794U_CAPI void U_EXPORT2
795uset_complementString(USet *set, const UChar *str, int32_t length);
796
806U_CAPI void U_EXPORT2
807uset_complementAllCodePoints(USet *set, const UChar *str, int32_t length);
808
820U_CAPI void U_EXPORT2
821uset_complementAll(USet* set, const USet* complement);
822
830U_CAPI void U_EXPORT2
832
861U_CAPI void U_EXPORT2
862uset_closeOver(USet* set, int32_t attributes);
863
870U_CAPI void U_EXPORT2
872
880U_CAPI UBool U_EXPORT2
881uset_isEmpty(const USet* set);
882
888U_CAPI UBool U_EXPORT2
890
899U_CAPI UBool U_EXPORT2
901
911U_CAPI UBool U_EXPORT2
912uset_containsRange(const USet* set, UChar32 start, UChar32 end);
913
922U_CAPI UBool U_EXPORT2
923uset_containsString(const USet* set, const UChar* str, int32_t strLen);
924
935U_CAPI int32_t U_EXPORT2
936uset_indexOf(const USet* set, UChar32 c);
937
953U_CAPI UChar32 U_EXPORT2
954uset_charAt(const USet* set, int32_t charIndex);
955
969U_CAPI int32_t U_EXPORT2
970uset_size(const USet* set);
971
980U_CAPI int32_t U_EXPORT2
982
991U_CAPI int32_t U_EXPORT2
993
1022U_CAPI int32_t U_EXPORT2
1023uset_getItem(const USet* set, int32_t itemIndex,
1024 UChar32* start, UChar32* end,
1025 UChar* str, int32_t strCapacity,
1026 UErrorCode* ec);
1027
1036U_CAPI UBool U_EXPORT2
1037uset_containsAll(const USet* set1, const USet* set2);
1038
1049U_CAPI UBool U_EXPORT2
1050uset_containsAllCodePoints(const USet* set, const UChar *str, int32_t strLen);
1051
1060U_CAPI UBool U_EXPORT2
1061uset_containsNone(const USet* set1, const USet* set2);
1062
1071U_CAPI UBool U_EXPORT2
1072uset_containsSome(const USet* set1, const USet* set2);
1073
1093U_CAPI int32_t U_EXPORT2
1094uset_span(const USet *set, const UChar *s, int32_t length, USetSpanCondition spanCondition);
1095
1114U_CAPI int32_t U_EXPORT2
1115uset_spanBack(const USet *set, const UChar *s, int32_t length, USetSpanCondition spanCondition);
1116
1136U_CAPI int32_t U_EXPORT2
1137uset_spanUTF8(const USet *set, const char *s, int32_t length, USetSpanCondition spanCondition);
1138
1157U_CAPI int32_t U_EXPORT2
1158uset_spanBackUTF8(const USet *set, const char *s, int32_t length, USetSpanCondition spanCondition);
1159
1168U_CAPI UBool U_EXPORT2
1169uset_equals(const USet* set1, const USet* set2);
1170
1171/*********************************************************************
1172 * Serialized set API
1173 *********************************************************************/
1174
1224U_CAPI int32_t U_EXPORT2
1225uset_serialize(const USet* set, uint16_t* dest, int32_t destCapacity, UErrorCode* pErrorCode);
1226
1235U_CAPI UBool U_EXPORT2
1236uset_getSerializedSet(USerializedSet* fillSet, const uint16_t* src, int32_t srcLength);
1237
1245U_CAPI void U_EXPORT2
1247
1256U_CAPI UBool U_EXPORT2
1258
1268U_CAPI int32_t U_EXPORT2
1270
1284U_CAPI UBool U_EXPORT2
1285uset_getSerializedRange(const USerializedSet* set, int32_t rangeIndex,
1286 UChar32* pStart, UChar32* pEnd);
1287
1288#endif
"Smart pointer" class, closes a USet via uset_close().
C++ API: "Smart pointers" for use with and in ICU4C C++ code.
#define U_DEFINE_LOCAL_OPEN_POINTER(LocalPointerClassName, Type, closeFunction)
"Smart pointer" definition macro, deletes objects via the closeFunction.
A serialized form of a Unicode set.
Definition uset.h:255
uint16_t staticArray[USET_SERIALIZED_STATIC_ARRAY_CAPACITY]
A small buffer for the array to reduce memory allocations.
Definition uset.h:275
int32_t bmpLength
The length of the array that contains BMP characters.
Definition uset.h:265
const uint16_t * array
The serialized Unicode Set.
Definition uset.h:260
int32_t length
The total length of the array.
Definition uset.h:270
C API: Unicode Properties.
UProperty
Selection constants for Unicode properties.
Definition uchar.h:196
int32_t UChar32
Define UChar32 as a type for single Unicode code points.
Definition umachine.h:427
int8_t UBool
The ICU boolean type, a signed-byte integer.
Definition umachine.h:247
#define U_CAPI
This is used to declare a function as a public ICU C API.
Definition umachine.h:110
char16_t UChar
The base type for UTF-16 code units and pointers.
Definition umachine.h:378
U_CAPI UBool uset_containsAll(const USet *set1, const USet *set2)
Returns true if set1 contains all the characters and strings of set2.
U_CAPI UBool uset_hasStrings(const USet *set)
U_CAPI USet * uset_open(UChar32 start, UChar32 end)
Creates a USet object that contains the range of characters start..end, inclusive.
U_CAPI UBool uset_serializedContains(const USerializedSet *set, UChar32 c)
Returns true if the given USerializedSet contains the given character.
U_CAPI UBool uset_getSerializedSet(USerializedSet *fillSet, const uint16_t *src, int32_t srcLength)
Given a serialized array, fill in the given serialized set object.
U_CAPI void uset_set(USet *set, UChar32 start, UChar32 end)
Causes the USet object to represent the range start - end.
U_CAPI void uset_addString(USet *set, const UChar *str, int32_t strLen)
Adds the given string to the given USet.
U_CAPI UBool uset_containsString(const USet *set, const UChar *str, int32_t strLen)
Returns true if the given USet contains the given string.
@ USET_IGNORE_SPACE
Ignore white space within patterns unless quoted or escaped.
Definition uset.h:69
@ USET_ADD_CASE_MAPPINGS
Adds all case mappings for each element in the set.
Definition uset.h:110
@ USET_CASE_INSENSITIVE
Enable case insensitive matching.
Definition uset.h:97
@ USET_SIMPLE_CASE_INSENSITIVE
Enable case insensitive matching.
Definition uset.h:124
U_CAPI UBool uset_isEmpty(const USet *set)
Returns true if the given USet contains no characters and no strings.
U_CAPI int32_t uset_getItemCount(const USet *set)
Returns the number of items in this set.
U_CAPI int32_t uset_getSerializedRangeCount(const USerializedSet *set)
Returns the number of disjoint ranges of characters contained in the given serialized set.
U_CAPI UChar32 uset_charAt(const USet *set, int32_t charIndex)
Returns the character at the given index within this set, where the set is ordered by ascending code ...
U_CAPI void uset_retainAll(USet *set, const USet *retain)
Retains only the elements in this set that are contained in the specified set.
U_CAPI void uset_setSerializedToOne(USerializedSet *fillSet, UChar32 c)
Set the USerializedSet to contain the given character (and nothing else).
U_CAPI void uset_complementString(USet *set, const UChar *str, int32_t length)
Complements the specified string in this set.
U_CAPI void uset_applyIntPropertyValue(USet *set, UProperty prop, int32_t value, UErrorCode *ec)
Modifies the set to contain those code points which have the given value for the given binary or enum...
U_CAPI void uset_complementRange(USet *set, UChar32 start, UChar32 end)
Complements the specified range in this set.
U_CAPI UBool uset_resemblesPattern(const UChar *pattern, int32_t patternLength, int32_t pos)
Return true if the given position, in the given pattern, appears to be the start of a UnicodeSet patt...
U_CAPI void uset_addRange(USet *set, UChar32 start, UChar32 end)
Adds the given range of characters to the given USet.
U_CAPI void uset_add(USet *set, UChar32 c)
Adds the given character to the given USet.
U_CAPI void uset_complementAllCodePoints(USet *set, const UChar *str, int32_t length)
Complements EACH of the characters in this string.
U_CAPI int32_t uset_size(const USet *set)
Returns the number of characters and strings contained in this set.
U_CAPI int32_t uset_span(const USet *set, const UChar *s, int32_t length, USetSpanCondition spanCondition)
Returns the length of the initial substring of the input string which consists only of characters and...
U_CAPI USet * uset_openPatternOptions(const UChar *pattern, int32_t patternLength, uint32_t options, UErrorCode *ec)
Creates a set from the given pattern.
U_CAPI void uset_removeAllStrings(USet *set)
Remove all strings from this set.
U_CAPI void uset_clear(USet *set)
Removes all of the elements from this set.
U_CAPI void uset_removeAllCodePoints(USet *set, const UChar *str, int32_t length)
Removes EACH of the characters in this string.
USetSpanCondition
Argument values for whether span() and similar functions continue while the current character is cont...
Definition uset.h:182
@ USET_SPAN_NOT_CONTAINED
Continues a span() while there is no set element at the current position.
Definition uset.h:195
@ USET_SPAN_CONTAINED
Spans the longest substring that is a concatenation of set elements (characters or strings).
Definition uset.h:210
@ USET_SPAN_CONDITION_COUNT
One more than the last span condition.
Definition uset.h:236
@ USET_SPAN_SIMPLE
Continues a span() while there is a set element at the current position.
Definition uset.h:230
U_CAPI void uset_remove(USet *set, UChar32 c)
Removes the given character from the given USet.
U_CAPI USet * uset_openPattern(const UChar *pattern, int32_t patternLength, UErrorCode *ec)
Creates a set from the given pattern.
U_CAPI void uset_removeRange(USet *set, UChar32 start, UChar32 end)
Removes the given range of characters from the given USet.
U_CAPI USet * uset_openEmpty(void)
Create an empty USet object.
U_CAPI UBool uset_equals(const USet *set1, const USet *set2)
Returns true if set1 contains all of the characters and strings of set2, and vis versa.
U_CAPI UBool uset_isFrozen(const USet *set)
Determines whether the set has been frozen (made immutable) or not.
U_CAPI int32_t uset_getRangeCount(const USet *set)
U_CAPI USet * uset_cloneAsThawed(const USet *set)
Clone the set and make the clone mutable.
U_CAPI UBool uset_containsAllCodePoints(const USet *set, const UChar *str, int32_t strLen)
Returns true if this set contains all the characters of the given string.
U_CAPI int32_t uset_serialize(const USet *set, uint16_t *dest, int32_t destCapacity, UErrorCode *pErrorCode)
Serializes this set into an array of 16-bit integers.
U_CAPI int32_t uset_indexOf(const USet *set, UChar32 c)
Returns the index of the given character within this set, where the set is ordered by ascending code ...
U_CAPI USet * uset_clone(const USet *set)
Returns a copy of this object.
U_CAPI void uset_freeze(USet *set)
Freeze the set (make it immutable).
U_CAPI int32_t uset_spanUTF8(const USet *set, const char *s, int32_t length, USetSpanCondition spanCondition)
Returns the length of the initial substring of the input string which consists only of characters and...
U_CAPI void uset_retain(USet *set, UChar32 start, UChar32 end)
Retain only the elements in this set that are contained in the specified range.
U_CAPI int32_t uset_toPattern(const USet *set, UChar *result, int32_t resultCapacity, UBool escapeUnprintable, UErrorCode *ec)
Returns a string representation of this set.
U_CAPI int32_t uset_spanBackUTF8(const USet *set, const char *s, int32_t length, USetSpanCondition spanCondition)
Returns the start of the trailing substring of the input string which consists only of characters and...
U_CAPI void uset_complement(USet *set)
This is equivalent to uset_complementRange(set, 0, 0x10FFFF).
@ USET_SERIALIZED_STATIC_ARRAY_CAPACITY
Capacity of USerializedSet::staticArray.
Definition uset.h:247
U_CAPI UBool uset_containsSome(const USet *set1, const USet *set2)
Returns true if set1 contains some of the characters and strings of set2.
U_CAPI int32_t uset_spanBack(const USet *set, const UChar *s, int32_t length, USetSpanCondition spanCondition)
Returns the start of the trailing substring of the input string which consists only of characters and...
U_CAPI UBool uset_containsNone(const USet *set1, const USet *set2)
Returns true if set1 contains none of the characters and strings of set2.
U_CAPI void uset_retainAllCodePoints(USet *set, const UChar *str, int32_t length)
Retains EACH of the characters in this string.
U_CAPI UBool uset_contains(const USet *set, UChar32 c)
Returns true if the given USet contains the given character.
U_CAPI int32_t uset_applyPattern(USet *set, const UChar *pattern, int32_t patternLength, uint32_t options, UErrorCode *status)
Modifies the set to represent the set specified by the given pattern.
U_CAPI void uset_complementAll(USet *set, const USet *complement)
Complements in this set all elements contained in the specified set.
U_CAPI void uset_applyPropertyAlias(USet *set, const UChar *prop, int32_t propLength, const UChar *value, int32_t valueLength, UErrorCode *ec)
Modifies the set to contain those code points which have the given value for the given property.
U_CAPI void uset_close(USet *set)
Disposes of the storage used by a USet object.
struct USet USet
USet is the C API type corresponding to C++ class UnicodeSet.
Definition uset.h:50
U_CAPI void uset_addAll(USet *set, const USet *additionalSet)
Adds all of the elements in the specified set to this set if they're not already present.
U_CAPI void uset_removeString(USet *set, const UChar *str, int32_t strLen)
Removes the given string to the given USet.
U_CAPI void uset_compact(USet *set)
Reallocate this objects internal structures to take up the least possible space, without changing thi...
U_CAPI int32_t uset_getItem(const USet *set, int32_t itemIndex, UChar32 *start, UChar32 *end, UChar *str, int32_t strCapacity, UErrorCode *ec)
Returns an item of this set.
U_CAPI void uset_closeOver(USet *set, int32_t attributes)
Close this set over the given attribute.
U_CAPI void uset_addAllCodePoints(USet *set, const UChar *str, int32_t strLen)
Adds each of the characters in this string to the set.
U_CAPI void uset_retainString(USet *set, const UChar *str, int32_t length)
Retains only the specified string from this set if it is present.
U_CAPI void uset_removeAll(USet *set, const USet *removeSet)
Removes from this set all of its elements that are contained in the specified set.
U_CAPI UBool uset_containsRange(const USet *set, UChar32 start, UChar32 end)
Returns true if the given USet contains all characters c where start <= c && c <= end.
U_CAPI UBool uset_getSerializedRange(const USerializedSet *set, int32_t rangeIndex, UChar32 *pStart, UChar32 *pEnd)
Returns a range of characters contained in the given serialized set.
Basic definitions for ICU, for both C and C++ APIs.
UErrorCode
Standard ICU4C error code type, a substitute for exceptions.
Definition utypes.h:415