ICU 76.1 76.1
Loading...
Searching...
No Matches
uset.h
Go to the documentation of this file.
1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
3/*
4*******************************************************************************
5*
6* Copyright (C) 2002-2014, International Business Machines
7* Corporation and others. All Rights Reserved.
8*
9*******************************************************************************
10* file name: uset.h
11* encoding: UTF-8
12* tab size: 8 (not used)
13* indentation:4
14*
15* created on: 2002mar07
16* created by: Markus W. Scherer
17*
18* C version of UnicodeSet.
19*/
20
21
29#ifndef __USET_H__
30#define __USET_H__
31
32#include "unicode/utypes.h"
33#include "unicode/uchar.h"
34
35#if U_SHOW_CPLUSPLUS_API
36#include <string_view>
37#include "unicode/char16ptr.h"
39#include "unicode/unistr.h"
40#endif // U_SHOW_CPLUSPLUS_API
41
42#ifndef USET_DEFINED
43
44#ifndef U_IN_DOXYGEN
45#define USET_DEFINED
46#endif
53typedef struct USet USet;
54#endif
55
67enum {
73
101
114
129
185typedef enum USetSpanCondition {
234#ifndef U_HIDE_DEPRECATED_API
240#endif // U_HIDE_DEPRECATED_API
242
243enum {
252
280
281/*********************************************************************
282 * USet API
283 *********************************************************************/
284
292U_CAPI USet* U_EXPORT2
294
305U_CAPI USet* U_EXPORT2
307
317U_CAPI USet* U_EXPORT2
318uset_openPattern(const UChar* pattern, int32_t patternLength,
319 UErrorCode* ec);
320
334U_CAPI USet* U_EXPORT2
335uset_openPatternOptions(const UChar* pattern, int32_t patternLength,
336 uint32_t options,
337 UErrorCode* ec);
338
345U_CAPI void U_EXPORT2
347
348#if U_SHOW_CPLUSPLUS_API
349
350U_NAMESPACE_BEGIN
351
362
363U_NAMESPACE_END
364
365#endif
366
376U_CAPI USet * U_EXPORT2
377uset_clone(const USet *set);
378
388U_CAPI UBool U_EXPORT2
389uset_isFrozen(const USet *set);
390
405U_CAPI void U_EXPORT2
407
418U_CAPI USet * U_EXPORT2
420
430U_CAPI void U_EXPORT2
432 UChar32 start, UChar32 end);
433
458U_CAPI int32_t U_EXPORT2
460 const UChar *pattern, int32_t patternLength,
461 uint32_t options,
462 UErrorCode *status);
463
486U_CAPI void U_EXPORT2
488 UProperty prop, int32_t value, UErrorCode* ec);
489
525U_CAPI void U_EXPORT2
527 const UChar *prop, int32_t propLength,
528 const UChar *value, int32_t valueLength,
529 UErrorCode* ec);
530
540U_CAPI UBool U_EXPORT2
541uset_resemblesPattern(const UChar *pattern, int32_t patternLength,
542 int32_t pos);
543
559U_CAPI int32_t U_EXPORT2
561 UChar* result, int32_t resultCapacity,
562 UBool escapeUnprintable,
563 UErrorCode* ec);
564
573U_CAPI void U_EXPORT2
575
588U_CAPI void U_EXPORT2
589uset_addAll(USet* set, const USet *additionalSet);
590
600U_CAPI void U_EXPORT2
602
612U_CAPI void U_EXPORT2
613uset_addString(USet* set, const UChar* str, int32_t strLen);
614
624U_CAPI void U_EXPORT2
625uset_addAllCodePoints(USet* set, const UChar *str, int32_t strLen);
626
635U_CAPI void U_EXPORT2
637
647U_CAPI void U_EXPORT2
649
659U_CAPI void U_EXPORT2
660uset_removeString(USet* set, const UChar* str, int32_t strLen);
661
671U_CAPI void U_EXPORT2
672uset_removeAllCodePoints(USet *set, const UChar *str, int32_t length);
673
685U_CAPI void U_EXPORT2
686uset_removeAll(USet* set, const USet* removeSet);
687
700U_CAPI void U_EXPORT2
701uset_retain(USet* set, UChar32 start, UChar32 end);
702
714U_CAPI void U_EXPORT2
715uset_retainString(USet *set, const UChar *str, int32_t length);
716
726U_CAPI void U_EXPORT2
727uset_retainAllCodePoints(USet *set, const UChar *str, int32_t length);
728
741U_CAPI void U_EXPORT2
742uset_retainAll(USet* set, const USet* retain);
743
752U_CAPI void U_EXPORT2
754
768U_CAPI void U_EXPORT2
770
784U_CAPI void U_EXPORT2
786
797U_CAPI void U_EXPORT2
798uset_complementString(USet *set, const UChar *str, int32_t length);
799
809U_CAPI void U_EXPORT2
810uset_complementAllCodePoints(USet *set, const UChar *str, int32_t length);
811
823U_CAPI void U_EXPORT2
824uset_complementAll(USet* set, const USet* complement);
825
833U_CAPI void U_EXPORT2
835
864U_CAPI void U_EXPORT2
865uset_closeOver(USet* set, int32_t attributes);
866
873U_CAPI void U_EXPORT2
875
883U_CAPI UBool U_EXPORT2
884uset_isEmpty(const USet* set);
885
891U_CAPI UBool U_EXPORT2
893
902U_CAPI UBool U_EXPORT2
904
914U_CAPI UBool U_EXPORT2
915uset_containsRange(const USet* set, UChar32 start, UChar32 end);
916
925U_CAPI UBool U_EXPORT2
926uset_containsString(const USet* set, const UChar* str, int32_t strLen);
927
938U_CAPI int32_t U_EXPORT2
939uset_indexOf(const USet* set, UChar32 c);
940
956U_CAPI UChar32 U_EXPORT2
957uset_charAt(const USet* set, int32_t charIndex);
958
974U_CAPI int32_t U_EXPORT2
975uset_size(const USet* set);
976
986U_CAPI int32_t U_EXPORT2
988
989#ifndef U_HIDE_DRAFT_API
990
999U_CAPI int32_t U_EXPORT2
1001
1014U_CAPI const UChar* U_EXPORT2
1015uset_getString(const USet *set, int32_t index, int32_t *pLength);
1016
1017#endif // U_HIDE_DRAFT_API
1018
1029U_CAPI int32_t U_EXPORT2
1031
1062U_CAPI int32_t U_EXPORT2
1063uset_getItem(const USet* set, int32_t itemIndex,
1064 UChar32* start, UChar32* end,
1065 UChar* str, int32_t strCapacity,
1066 UErrorCode* ec);
1067
1076U_CAPI UBool U_EXPORT2
1077uset_containsAll(const USet* set1, const USet* set2);
1078
1089U_CAPI UBool U_EXPORT2
1090uset_containsAllCodePoints(const USet* set, const UChar *str, int32_t strLen);
1091
1100U_CAPI UBool U_EXPORT2
1101uset_containsNone(const USet* set1, const USet* set2);
1102
1111U_CAPI UBool U_EXPORT2
1112uset_containsSome(const USet* set1, const USet* set2);
1113
1133U_CAPI int32_t U_EXPORT2
1134uset_span(const USet *set, const UChar *s, int32_t length, USetSpanCondition spanCondition);
1135
1154U_CAPI int32_t U_EXPORT2
1155uset_spanBack(const USet *set, const UChar *s, int32_t length, USetSpanCondition spanCondition);
1156
1176U_CAPI int32_t U_EXPORT2
1177uset_spanUTF8(const USet *set, const char *s, int32_t length, USetSpanCondition spanCondition);
1178
1197U_CAPI int32_t U_EXPORT2
1198uset_spanBackUTF8(const USet *set, const char *s, int32_t length, USetSpanCondition spanCondition);
1199
1208U_CAPI UBool U_EXPORT2
1209uset_equals(const USet* set1, const USet* set2);
1210
1211/*********************************************************************
1212 * Serialized set API
1213 *********************************************************************/
1214
1264U_CAPI int32_t U_EXPORT2
1265uset_serialize(const USet* set, uint16_t* dest, int32_t destCapacity, UErrorCode* pErrorCode);
1266
1275U_CAPI UBool U_EXPORT2
1276uset_getSerializedSet(USerializedSet* fillSet, const uint16_t* src, int32_t srcLength);
1277
1285U_CAPI void U_EXPORT2
1287
1296U_CAPI UBool U_EXPORT2
1298
1308U_CAPI int32_t U_EXPORT2
1310
1324U_CAPI UBool U_EXPORT2
1325uset_getSerializedRange(const USerializedSet* set, int32_t rangeIndex,
1326 UChar32* pStart, UChar32* pEnd);
1327
1328#if U_SHOW_CPLUSPLUS_API || U_SHOW_CPLUSPLUS_HEADER_API
1329#ifndef U_HIDE_DRAFT_API
1330
1331namespace U_HEADER_ONLY_NAMESPACE {
1332
1333// Note: Not U_COMMON_API, and not a subclass of UMemory, because this is a header-only class,
1334// not intended to be used via export from the ICU DLL.
1335
1341public:
1344
1346 bool operator==(const USetCodePointIterator &other) const {
1347 // No need to compare rangeCount & end given private constructor
1348 // and assuming we don't compare iterators across the set being modified.
1349 // And comparing rangeIndex is redundant with comparing c.
1350 // We might even skip comparing uset.
1351 // Unless we want operator==() to be "correct" for more than iteration.
1352 return uset == other.uset && c == other.c;
1353 }
1354
1356 bool operator!=(const USetCodePointIterator &other) const { return !operator==(other); }
1357
1359 UChar32 operator*() const { return c; }
1360
1366 if (c < end) {
1367 ++c;
1368 } else if (rangeIndex < rangeCount) {
1369 UErrorCode errorCode = U_ZERO_ERROR;
1370 int32_t result = uset_getItem(uset, rangeIndex, &c, &end, nullptr, 0, &errorCode);
1371 if (U_SUCCESS(errorCode) && result == 0) {
1372 ++rangeIndex;
1373 } else {
1374 c = end = U_SENTINEL;
1375 }
1376 } else {
1377 c = end = U_SENTINEL;
1378 }
1379 return *this;
1380 }
1381
1387 USetCodePointIterator result(*this);
1388 operator++();
1389 return result;
1390 }
1391
1392private:
1393 friend class USetCodePoints;
1394
1395 USetCodePointIterator(const USet *uset, int32_t rangeIndex, int32_t rangeCount)
1396 : uset(uset), rangeIndex(rangeIndex), rangeCount(rangeCount),
1397 c(U_SENTINEL), end(U_SENTINEL) {
1398 // Fetch the first range.
1399 operator++();
1400 }
1401
1402 const USet *uset;
1403 int32_t rangeIndex;
1404 int32_t rangeCount;
1405 UChar32 c, end;
1406};
1407
1427public:
1432 USetCodePoints(const USet *uset) : uset(uset), rangeCount(uset_getRangeCount(uset)) {}
1433
1435 USetCodePoints(const USetCodePoints &other) = default;
1436
1439 return USetCodePointIterator(uset, 0, rangeCount);
1440 }
1441
1444 return USetCodePointIterator(uset, rangeCount, rangeCount);
1445 }
1446
1447private:
1448 const USet *uset;
1449 int32_t rangeCount;
1450};
1451
1461 struct iterator {
1464
1466 bool operator==(const iterator &other) const { return c == other.c; }
1468 bool operator!=(const iterator &other) const { return !operator==(other); }
1469
1471 UChar32 operator*() const { return c; }
1472
1478 ++c;
1479 return *this;
1480 }
1481
1487 return c++;
1488 }
1489
1495 };
1496
1500 CodePointRange(const CodePointRange &other) = default;
1502 size_t size() const { return (rangeEnd + 1) - rangeStart; }
1504 iterator begin() const { return rangeStart; }
1506 iterator end() const { return rangeEnd + 1; }
1507
1518};
1519
1525public:
1527 USetRangeIterator(const USetRangeIterator &other) = default;
1528
1530 bool operator==(const USetRangeIterator &other) const {
1531 // No need to compare rangeCount given private constructor
1532 // and assuming we don't compare iterators across the set being modified.
1533 // We might even skip comparing uset.
1534 // Unless we want operator==() to be "correct" for more than iteration.
1535 return uset == other.uset && rangeIndex == other.rangeIndex;
1536 }
1537
1539 bool operator!=(const USetRangeIterator &other) const { return !operator==(other); }
1540
1543 if (rangeIndex < rangeCount) {
1544 UChar32 start, end;
1545 UErrorCode errorCode = U_ZERO_ERROR;
1546 int32_t result = uset_getItem(uset, rangeIndex, &start, &end, nullptr, 0, &errorCode);
1547 if (U_SUCCESS(errorCode) && result == 0) {
1548 return CodePointRange(start, end);
1549 }
1550 }
1552 }
1553
1559 ++rangeIndex;
1560 return *this;
1561 }
1562
1568 USetRangeIterator result(*this);
1569 ++rangeIndex;
1570 return result;
1571 }
1572
1573private:
1574 friend class USetRanges;
1575
1576 USetRangeIterator(const USet *uset, int32_t rangeIndex, int32_t rangeCount)
1577 : uset(uset), rangeIndex(rangeIndex), rangeCount(rangeCount) {}
1578
1579 const USet *uset;
1580 int32_t rangeIndex;
1581 int32_t rangeCount;
1582};
1583
1608public:
1613 USetRanges(const USet *uset) : uset(uset), rangeCount(uset_getRangeCount(uset)) {}
1614
1616 USetRanges(const USetRanges &other) = default;
1617
1620 return USetRangeIterator(uset, 0, rangeCount);
1621 }
1622
1625 return USetRangeIterator(uset, rangeCount, rangeCount);
1626 }
1627
1628private:
1629 const USet *uset;
1630 int32_t rangeCount;
1631};
1632
1638public:
1641
1643 bool operator==(const USetStringIterator &other) const {
1644 // No need to compare count given private constructor
1645 // and assuming we don't compare iterators across the set being modified.
1646 // We might even skip comparing uset.
1647 // Unless we want operator==() to be "correct" for more than iteration.
1648 return uset == other.uset && index == other.index;
1649 }
1650
1652 bool operator!=(const USetStringIterator &other) const { return !operator==(other); }
1653
1655 std::u16string_view operator*() const {
1656 if (index < count) {
1657 int32_t length;
1658 const UChar *uchars = uset_getString(uset, index, &length);
1659 // assert uchars != nullptr;
1660 return {ConstChar16Ptr(uchars), static_cast<uint32_t>(length)};
1661 }
1662 return {};
1663 }
1664
1670 ++index;
1671 return *this;
1672 }
1673
1679 USetStringIterator result(*this);
1680 ++index;
1681 return result;
1682 }
1683
1684private:
1685 friend class USetStrings;
1686
1687 USetStringIterator(const USet *uset, int32_t index, int32_t count)
1688 : uset(uset), index(index), count(count) {}
1689
1690 const USet *uset;
1691 int32_t index;
1692 int32_t count;
1693};
1694
1716public:
1721 USetStrings(const USet *uset) : uset(uset), count(uset_getStringCount(uset)) {}
1722
1724 USetStrings(const USetStrings &other) = default;
1725
1728 return USetStringIterator(uset, 0, count);
1729 }
1730
1733 return USetStringIterator(uset, count, count);
1734 }
1735
1736private:
1737 const USet *uset;
1738 int32_t count;
1739};
1740
1746public:
1749
1751 bool operator==(const USetElementIterator &other) const {
1752 // No need to compare rangeCount & end given private constructor
1753 // and assuming we don't compare iterators across the set being modified.
1754 // We might even skip comparing uset.
1755 // Unless we want operator==() to be "correct" for more than iteration.
1756 return uset == other.uset && c == other.c && index == other.index;
1757 }
1758
1760 bool operator!=(const USetElementIterator &other) const { return !operator==(other); }
1761
1763 UnicodeString operator*() const {
1764 if (c >= 0) {
1765 return UnicodeString(c);
1766 } else if (index < totalCount) {
1767 int32_t length;
1768 const UChar *uchars = uset_getString(uset, index - rangeCount, &length);
1769 // assert uchars != nullptr;
1770 return UnicodeString(uchars, length);
1771 } else {
1772 return UnicodeString();
1773 }
1774 }
1775
1781 if (c < end) {
1782 ++c;
1783 } else if (index < rangeCount) {
1784 UErrorCode errorCode = U_ZERO_ERROR;
1785 int32_t result = uset_getItem(uset, index, &c, &end, nullptr, 0, &errorCode);
1786 if (U_SUCCESS(errorCode) && result == 0) {
1787 ++index;
1788 } else {
1789 c = end = U_SENTINEL;
1790 }
1791 } else if (c >= 0) {
1792 // assert index == rangeCount;
1793 // Switch from the last range to the first string.
1794 c = end = U_SENTINEL;
1795 } else {
1796 ++index;
1797 }
1798 return *this;
1799 }
1800
1806 USetElementIterator result(*this);
1807 operator++();
1808 return result;
1809 }
1810
1811private:
1812 friend class USetElements;
1813
1814 USetElementIterator(const USet *uset, int32_t index, int32_t rangeCount, int32_t totalCount)
1815 : uset(uset), index(index), rangeCount(rangeCount), totalCount(totalCount),
1816 c(U_SENTINEL), end(U_SENTINEL) {
1817 if (index < rangeCount) {
1818 // Fetch the first range.
1819 operator++();
1820 }
1821 // Otherwise don't move beyond the (index - rangeCount)-th string.
1822 }
1823
1824 const USet *uset;
1825 int32_t index;
1827 int32_t rangeCount;
1837 int32_t totalCount;
1838 UChar32 c, end;
1839};
1840
1866public:
1871 USetElements(const USet *uset)
1872 : uset(uset), rangeCount(uset_getRangeCount(uset)),
1873 stringCount(uset_getStringCount(uset)) {}
1874
1876 USetElements(const USetElements &other) = default;
1877
1880 return USetElementIterator(uset, 0, rangeCount, rangeCount + stringCount);
1881 }
1882
1885 return USetElementIterator(uset, rangeCount + stringCount, rangeCount, rangeCount + stringCount);
1886 }
1887
1888private:
1889 const USet *uset;
1890 int32_t rangeCount, stringCount;
1891};
1892
1893} // namespace U_HEADER_ONLY_NAMESPACE
1894
1895#endif // U_HIDE_DRAFT_API
1896#endif // U_SHOW_CPLUSPLUS_API || U_SHOW_CPLUSPLUS_HEADER_API
1897
1898#endif // __USET_H__
C++ API: char16_t pointer wrappers with implicit conversion from bit-compatible raw pointer types.
"Smart pointer" class, closes a USet via uset_close().
Iterator returned by USetCodePoints.
Definition uset.h:1340
bool operator==(const USetCodePointIterator &other) const
Definition uset.h:1346
USetCodePointIterator & operator++()
Pre-increment.
Definition uset.h:1365
bool operator!=(const USetCodePointIterator &other) const
Definition uset.h:1356
USetCodePointIterator operator++(int)
Post-increment.
Definition uset.h:1386
USetCodePointIterator(const USetCodePointIterator &other)=default
C++ "range" for iterating over the code points of a USet.
Definition uset.h:1426
USetCodePoints(const USetCodePoints &other)=default
USetCodePointIterator end() const
Definition uset.h:1443
USetCodePoints(const USet *uset)
Constructs a C++ "range" object over the code points of the USet.
Definition uset.h:1432
USetCodePointIterator begin() const
Definition uset.h:1438
Iterator returned by USetElements.
Definition uset.h:1745
USetElementIterator operator++(int)
Post-increment.
Definition uset.h:1805
bool operator==(const USetElementIterator &other) const
Definition uset.h:1751
bool operator!=(const USetElementIterator &other) const
Definition uset.h:1760
USetElementIterator & operator++()
Pre-increment.
Definition uset.h:1780
USetElementIterator(const USetElementIterator &other)=default
A C++ "range" for iterating over all of the elements of a USet.
Definition uset.h:1865
USetElements(const USetElements &other)=default
USetElementIterator end() const
Definition uset.h:1884
USetElementIterator begin() const
Definition uset.h:1879
USetElements(const USet *uset)
Constructs a C++ "range" object over all of the elements of the USet.
Definition uset.h:1871
Iterator returned by USetRanges.
Definition uset.h:1524
USetRangeIterator & operator++()
Pre-increment.
Definition uset.h:1558
CodePointRange operator*() const
Definition uset.h:1542
bool operator==(const USetRangeIterator &other) const
Definition uset.h:1530
USetRangeIterator operator++(int)
Post-increment.
Definition uset.h:1567
bool operator!=(const USetRangeIterator &other) const
Definition uset.h:1539
USetRangeIterator(const USetRangeIterator &other)=default
C++ "range" for iterating over the code point ranges of a USet.
Definition uset.h:1607
USetRangeIterator end() const
Definition uset.h:1624
USetRangeIterator begin() const
Definition uset.h:1619
USetRanges(const USet *uset)
Constructs a C++ "range" object over the code point ranges of the USet.
Definition uset.h:1613
USetRanges(const USetRanges &other)=default
Iterator returned by USetStrings.
Definition uset.h:1637
USetStringIterator & operator++()
Pre-increment.
Definition uset.h:1669
USetStringIterator(const USetStringIterator &other)=default
bool operator!=(const USetStringIterator &other) const
Definition uset.h:1652
std::u16string_view operator*() const
Definition uset.h:1655
USetStringIterator operator++(int)
Post-increment.
Definition uset.h:1678
bool operator==(const USetStringIterator &other) const
Definition uset.h:1643
C++ "range" for iterating over the empty and multi-character strings of a USet.
Definition uset.h:1715
USetStrings(const USetStrings &other)=default
USetStringIterator begin() const
Definition uset.h:1727
USetStringIterator end() const
Definition uset.h:1732
USetStrings(const USet *uset)
Constructs a C++ "range" object over the strings of the USet.
Definition uset.h:1721
C++ API: "Smart pointers" for use with and in ICU4C C++ code.
#define U_DEFINE_LOCAL_OPEN_POINTER(LocalPointerClassName, Type, closeFunction)
"Smart pointer" definition macro, deletes objects via the closeFunction.
A serialized form of a Unicode set.
Definition uset.h:258
uint16_t staticArray[USET_SERIALIZED_STATIC_ARRAY_CAPACITY]
A small buffer for the array to reduce memory allocations.
Definition uset.h:278
int32_t bmpLength
The length of the array that contains BMP characters.
Definition uset.h:268
const uint16_t * array
The serialized Unicode Set.
Definition uset.h:263
int32_t length
The total length of the array.
Definition uset.h:273
bool operator!=(const iterator &other) const
Definition uset.h:1468
UChar32 c
The current code point in the range.
Definition uset.h:1494
iterator & operator++()
Pre-increment.
Definition uset.h:1477
iterator operator++(int)
Post-increment.
Definition uset.h:1486
bool operator==(const iterator &other) const
Definition uset.h:1466
A contiguous range of code points in a USet/UnicodeSet.
Definition uset.h:1459
CodePointRange(UChar32 start, UChar32 end)
Definition uset.h:1498
UChar32 rangeEnd
Inclusive end of a USet/UnicodeSet range of code points.
Definition uset.h:1517
CodePointRange(const CodePointRange &other)=default
UChar32 rangeStart
Start of a USet/UnicodeSet range of code points.
Definition uset.h:1512
C API: Unicode Properties.
UProperty
Selection constants for Unicode properties.
Definition uchar.h:196
int32_t UChar32
Define UChar32 as a type for single Unicode code points.
Definition umachine.h:427
int8_t UBool
The ICU boolean type, a signed-byte integer.
Definition umachine.h:247
#define U_CAPI
This is used to declare a function as a public ICU C API.
Definition umachine.h:110
char16_t UChar
The base type for UTF-16 code units and pointers.
Definition umachine.h:378
#define U_SENTINEL
This value is intended for sentinel values for APIs that (take or) return single code points (UChar32...
Definition umachine.h:447
C++ API: Unicode String.
U_CAPI UBool uset_containsAll(const USet *set1, const USet *set2)
Returns true if set1 contains all the characters and strings of set2.
U_CAPI UBool uset_hasStrings(const USet *set)
U_CAPI int32_t uset_getStringCount(const USet *set)
U_CAPI USet * uset_open(UChar32 start, UChar32 end)
Creates a USet object that contains the range of characters start..end, inclusive.
U_CAPI UBool uset_serializedContains(const USerializedSet *set, UChar32 c)
Returns true if the given USerializedSet contains the given character.
U_CAPI UBool uset_getSerializedSet(USerializedSet *fillSet, const uint16_t *src, int32_t srcLength)
Given a serialized array, fill in the given serialized set object.
U_CAPI void uset_set(USet *set, UChar32 start, UChar32 end)
Causes the USet object to represent the range start - end.
U_CAPI void uset_addString(USet *set, const UChar *str, int32_t strLen)
Adds the given string to the given USet.
U_CAPI UBool uset_containsString(const USet *set, const UChar *str, int32_t strLen)
Returns true if the given USet contains the given string.
@ USET_IGNORE_SPACE
Ignore white space within patterns unless quoted or escaped.
Definition uset.h:72
@ USET_ADD_CASE_MAPPINGS
Adds all case mappings for each element in the set.
Definition uset.h:113
@ USET_CASE_INSENSITIVE
Enable case insensitive matching.
Definition uset.h:100
@ USET_SIMPLE_CASE_INSENSITIVE
Enable case insensitive matching.
Definition uset.h:127
U_CAPI UBool uset_isEmpty(const USet *set)
Returns true if the given USet contains no characters and no strings.
U_CAPI int32_t uset_getItemCount(const USet *set)
Returns the number of items in this set.
U_CAPI int32_t uset_getSerializedRangeCount(const USerializedSet *set)
Returns the number of disjoint ranges of characters contained in the given serialized set.
U_CAPI UChar32 uset_charAt(const USet *set, int32_t charIndex)
Returns the character at the given index within this set, where the set is ordered by ascending code ...
U_CAPI void uset_retainAll(USet *set, const USet *retain)
Retains only the elements in this set that are contained in the specified set.
U_CAPI void uset_setSerializedToOne(USerializedSet *fillSet, UChar32 c)
Set the USerializedSet to contain the given character (and nothing else).
U_CAPI void uset_complementString(USet *set, const UChar *str, int32_t length)
Complements the specified string in this set.
U_CAPI void uset_applyIntPropertyValue(USet *set, UProperty prop, int32_t value, UErrorCode *ec)
Modifies the set to contain those code points which have the given value for the given binary or enum...
U_CAPI void uset_complementRange(USet *set, UChar32 start, UChar32 end)
Complements the specified range in this set.
U_CAPI UBool uset_resemblesPattern(const UChar *pattern, int32_t patternLength, int32_t pos)
Return true if the given position, in the given pattern, appears to be the start of a UnicodeSet patt...
U_CAPI void uset_addRange(USet *set, UChar32 start, UChar32 end)
Adds the given range of characters to the given USet.
U_CAPI void uset_add(USet *set, UChar32 c)
Adds the given character to the given USet.
U_CAPI void uset_complementAllCodePoints(USet *set, const UChar *str, int32_t length)
Complements EACH of the characters in this string.
U_CAPI int32_t uset_size(const USet *set)
Returns the number of characters and strings contained in this set.
U_CAPI int32_t uset_span(const USet *set, const UChar *s, int32_t length, USetSpanCondition spanCondition)
Returns the length of the initial substring of the input string which consists only of characters and...
U_CAPI USet * uset_openPatternOptions(const UChar *pattern, int32_t patternLength, uint32_t options, UErrorCode *ec)
Creates a set from the given pattern.
U_CAPI void uset_removeAllStrings(USet *set)
Remove all strings from this set.
U_CAPI void uset_clear(USet *set)
Removes all of the elements from this set.
U_CAPI void uset_removeAllCodePoints(USet *set, const UChar *str, int32_t length)
Removes EACH of the characters in this string.
USetSpanCondition
Argument values for whether span() and similar functions continue while the current character is cont...
Definition uset.h:185
@ USET_SPAN_NOT_CONTAINED
Continues a span() while there is no set element at the current position.
Definition uset.h:198
@ USET_SPAN_CONTAINED
Spans the longest substring that is a concatenation of set elements (characters or strings).
Definition uset.h:213
@ USET_SPAN_CONDITION_COUNT
One more than the last span condition.
Definition uset.h:239
@ USET_SPAN_SIMPLE
Continues a span() while there is a set element at the current position.
Definition uset.h:233
U_CAPI void uset_remove(USet *set, UChar32 c)
Removes the given character from the given USet.
U_CAPI USet * uset_openPattern(const UChar *pattern, int32_t patternLength, UErrorCode *ec)
Creates a set from the given pattern.
U_CAPI void uset_removeRange(USet *set, UChar32 start, UChar32 end)
Removes the given range of characters from the given USet.
U_CAPI USet * uset_openEmpty(void)
Create an empty USet object.
U_CAPI UBool uset_equals(const USet *set1, const USet *set2)
Returns true if set1 contains all of the characters and strings of set2, and vis versa.
U_CAPI UBool uset_isFrozen(const USet *set)
Determines whether the set has been frozen (made immutable) or not.
U_CAPI int32_t uset_getRangeCount(const USet *set)
U_CAPI USet * uset_cloneAsThawed(const USet *set)
Clone the set and make the clone mutable.
U_CAPI UBool uset_containsAllCodePoints(const USet *set, const UChar *str, int32_t strLen)
Returns true if this set contains all the characters of the given string.
U_CAPI int32_t uset_serialize(const USet *set, uint16_t *dest, int32_t destCapacity, UErrorCode *pErrorCode)
Serializes this set into an array of 16-bit integers.
U_CAPI int32_t uset_indexOf(const USet *set, UChar32 c)
Returns the index of the given character within this set, where the set is ordered by ascending code ...
U_CAPI USet * uset_clone(const USet *set)
Returns a copy of this object.
U_CAPI void uset_freeze(USet *set)
Freeze the set (make it immutable).
U_CAPI int32_t uset_spanUTF8(const USet *set, const char *s, int32_t length, USetSpanCondition spanCondition)
Returns the length of the initial substring of the input string which consists only of characters and...
U_CAPI void uset_retain(USet *set, UChar32 start, UChar32 end)
Retain only the elements in this set that are contained in the specified range.
U_CAPI int32_t uset_toPattern(const USet *set, UChar *result, int32_t resultCapacity, UBool escapeUnprintable, UErrorCode *ec)
Returns a string representation of this set.
U_CAPI int32_t uset_spanBackUTF8(const USet *set, const char *s, int32_t length, USetSpanCondition spanCondition)
Returns the start of the trailing substring of the input string which consists only of characters and...
U_CAPI void uset_complement(USet *set)
This is equivalent to uset_complementRange(set, 0, 0x10FFFF).
@ USET_SERIALIZED_STATIC_ARRAY_CAPACITY
Capacity of USerializedSet::staticArray.
Definition uset.h:250
U_CAPI UBool uset_containsSome(const USet *set1, const USet *set2)
Returns true if set1 contains some of the characters and strings of set2.
U_CAPI int32_t uset_spanBack(const USet *set, const UChar *s, int32_t length, USetSpanCondition spanCondition)
Returns the start of the trailing substring of the input string which consists only of characters and...
U_CAPI UBool uset_containsNone(const USet *set1, const USet *set2)
Returns true if set1 contains none of the characters and strings of set2.
U_CAPI void uset_retainAllCodePoints(USet *set, const UChar *str, int32_t length)
Retains EACH of the characters in this string.
U_CAPI UBool uset_contains(const USet *set, UChar32 c)
Returns true if the given USet contains the given character.
U_CAPI int32_t uset_applyPattern(USet *set, const UChar *pattern, int32_t patternLength, uint32_t options, UErrorCode *status)
Modifies the set to represent the set specified by the given pattern.
U_CAPI void uset_complementAll(USet *set, const USet *complement)
Complements in this set all elements contained in the specified set.
U_CAPI void uset_applyPropertyAlias(USet *set, const UChar *prop, int32_t propLength, const UChar *value, int32_t valueLength, UErrorCode *ec)
Modifies the set to contain those code points which have the given value for the given property.
U_CAPI void uset_close(USet *set)
Disposes of the storage used by a USet object.
struct USet USet
USet is the C API type corresponding to C++ class UnicodeSet.
Definition uset.h:53
U_CAPI const UChar * uset_getString(const USet *set, int32_t index, int32_t *pLength)
Returns the index-th string (empty or multi-character) in the set.
U_CAPI void uset_addAll(USet *set, const USet *additionalSet)
Adds all of the elements in the specified set to this set if they're not already present.
U_CAPI void uset_removeString(USet *set, const UChar *str, int32_t strLen)
Removes the given string to the given USet.
U_CAPI void uset_compact(USet *set)
Reallocate this objects internal structures to take up the least possible space, without changing thi...
U_CAPI int32_t uset_getItem(const USet *set, int32_t itemIndex, UChar32 *start, UChar32 *end, UChar *str, int32_t strCapacity, UErrorCode *ec)
Returns an item of this set.
U_CAPI void uset_closeOver(USet *set, int32_t attributes)
Close this set over the given attribute.
U_CAPI void uset_addAllCodePoints(USet *set, const UChar *str, int32_t strLen)
Adds each of the characters in this string to the set.
U_CAPI void uset_retainString(USet *set, const UChar *str, int32_t length)
Retains only the specified string from this set if it is present.
U_CAPI void uset_removeAll(USet *set, const USet *removeSet)
Removes from this set all of its elements that are contained in the specified set.
U_CAPI UBool uset_containsRange(const USet *set, UChar32 start, UChar32 end)
Returns true if the given USet contains all characters c where start <= c && c <= end.
U_CAPI UBool uset_getSerializedRange(const USerializedSet *set, int32_t rangeIndex, UChar32 *pStart, UChar32 *pEnd)
Returns a range of characters contained in the given serialized set.
Basic definitions for ICU, for both C and C++ APIs.
UErrorCode
Standard ICU4C error code type, a substitute for exceptions.
Definition utypes.h:430
@ U_ZERO_ERROR
No error, no warning.
Definition utypes.h:465
#define U_SUCCESS(x)
Does the error code indicate success?
Definition utypes.h:742