ICU 76.1 76.1
Loading...
Searching...
No Matches
coll.h
Go to the documentation of this file.
1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
3/*
4******************************************************************************
5* Copyright (C) 1996-2016, International Business Machines
6* Corporation and others. All Rights Reserved.
7******************************************************************************
8*/
9
52#ifndef COLL_H
53#define COLL_H
54
55#include "unicode/utypes.h"
56
57#if U_SHOW_CPLUSPLUS_API
58
59#if !UCONFIG_NO_COLLATION
60
61#include <functional>
62#include <string_view>
63#include <type_traits>
64
65#include "unicode/char16ptr.h"
66#include "unicode/uobject.h"
67#include "unicode/ucol.h"
68#include "unicode/unorm.h"
69#include "unicode/locid.h"
70#include "unicode/uniset.h"
71#include "unicode/umisc.h"
72#include "unicode/unistr.h"
73#include "unicode/uiter.h"
74#include "unicode/stringpiece.h"
75
76U_NAMESPACE_BEGIN
77
78class StringEnumeration;
79
80#if !UCONFIG_NO_SERVICE
84class CollatorFactory;
85#endif
86
90class CollationKey;
91
174public:
175
176 // Collator public enums -----------------------------------------------
177
204 {
205 PRIMARY = UCOL_PRIMARY, // 0
206 SECONDARY = UCOL_SECONDARY, // 1
207 TERTIARY = UCOL_TERTIARY, // 2
208 QUATERNARY = UCOL_QUATERNARY, // 3
209 IDENTICAL = UCOL_IDENTICAL // 15
210 };
211
212
213 // Cannot use #ifndef U_HIDE_DEPRECATED_API for the following, it is
214 // used by virtual methods that cannot have that conditional.
215#ifndef U_FORCE_HIDE_DEPRECATED_API
227 {
228 LESS = UCOL_LESS, // -1
229 EQUAL = UCOL_EQUAL, // 0
230 GREATER = UCOL_GREATER // 1
231 };
232#endif // U_FORCE_HIDE_DEPRECATED_API
233
234 // Collator public destructor -----------------------------------------
235
240 virtual ~Collator();
241
242 // Collator public methods --------------------------------------------
243
262 virtual bool operator==(const Collator& other) const;
263
271 virtual bool operator!=(const Collator& other) const;
272
278 virtual Collator* clone() const = 0;
279
300
335
336#ifndef U_FORCE_HIDE_DEPRECATED_API
349 const UnicodeString& target) const;
350#endif // U_FORCE_HIDE_DEPRECATED_API
351
365 const UnicodeString& target,
366 UErrorCode &status) const = 0;
367
368#ifndef U_FORCE_HIDE_DEPRECATED_API
382 const UnicodeString& target,
383 int32_t length) const;
384#endif // U_FORCE_HIDE_DEPRECATED_API
385
400 const UnicodeString& target,
401 int32_t length,
402 UErrorCode &status) const = 0;
403
404#ifndef U_FORCE_HIDE_DEPRECATED_API
438 virtual EComparisonResult compare(const char16_t* source, int32_t sourceLength,
439 const char16_t* target, int32_t targetLength)
440 const;
441#endif // U_FORCE_HIDE_DEPRECATED_API
442
459 virtual UCollationResult compare(const char16_t* source, int32_t sourceLength,
460 const char16_t* target, int32_t targetLength,
461 UErrorCode &status) const = 0;
462
476 UErrorCode &status) const;
477
492 const StringPiece &target,
493 UErrorCode &status) const;
494
515 UErrorCode& status) const = 0;
516
536 virtual CollationKey& getCollationKey(const char16_t*source,
539 UErrorCode& status) const = 0;
544 virtual int32_t hashCode() const = 0;
545
546#ifndef U_FORCE_HIDE_DEPRECATED_API
560#endif // U_FORCE_HIDE_DEPRECATED_API
561
571 UBool greater(const UnicodeString& source, const UnicodeString& target)
572 const;
573
584 const UnicodeString& target) const;
585
595 UBool equals(const UnicodeString& source, const UnicodeString& target) const;
596
597#ifndef U_HIDE_DRAFT_API
598
604 inline auto equal_to() const { return Predicate<std::equal_to, UCOL_EQUAL>(*this); }
605
611 inline auto greater() const { return Predicate<std::equal_to, UCOL_GREATER>(*this); }
612
618 inline auto less() const { return Predicate<std::equal_to, UCOL_LESS>(*this); }
619
625 inline auto not_equal_to() const { return Predicate<std::not_equal_to, UCOL_EQUAL>(*this); }
626
632 inline auto greater_equal() const { return Predicate<std::not_equal_to, UCOL_LESS>(*this); }
633
639 inline auto less_equal() const { return Predicate<std::not_equal_to, UCOL_GREATER>(*this); }
640
641#endif // U_HIDE_DRAFT_API
642
643#ifndef U_FORCE_HIDE_DEPRECATED_API
655
675#endif // U_FORCE_HIDE_DEPRECATED_API
676
694 UErrorCode& status) const;
695
714
736 int32_t* dest,
739
750 const Locale& displayLocale,
751 UnicodeString& name);
752
762 UnicodeString& name);
763
776
786
797
810
829
857 static Locale U_EXPORT2 getFunctionalEquivalent(const char* keyword, const Locale& locale,
859
860#if !UCONFIG_NO_SERVICE
873
885
900#endif /* UCONFIG_NO_SERVICE */
901
907 virtual void getVersion(UVersionInfo info) const = 0;
908
919 virtual UClassID getDynamicClassID() const override = 0;
920
930 UErrorCode &status) = 0;
931
941 UErrorCode &status) const = 0;
942
962
972
973#ifndef U_FORCE_HIDE_DEPRECATED_API
990 virtual uint32_t setVariableTop(const char16_t *varTop, int32_t len, UErrorCode &status) = 0;
991
1008
1021#endif // U_FORCE_HIDE_DEPRECATED_API
1022
1031
1042
1043#ifndef U_FORCE_HIDE_DEPRECATED_API
1051 virtual Collator* safeClone() const;
1052#endif // U_FORCE_HIDE_DEPRECATED_API
1053
1070 virtual int32_t getSortKey(const UnicodeString& source,
1071 uint8_t* result,
1072 int32_t resultLength) const = 0;
1073
1093 virtual int32_t getSortKey(const char16_t*source, int32_t sourceLength,
1094 uint8_t*result, int32_t resultLength) const = 0;
1095
1133 static int32_t U_EXPORT2 getBound(const uint8_t *source,
1137 uint8_t *result,
1140
1141
1142protected:
1143
1144 // Collator protected constructors -------------------------------------
1145
1154
1155#ifndef U_HIDE_DEPRECATED_API
1169#endif /* U_HIDE_DEPRECATED_API */
1170
1177
1178public:
1186 virtual void setLocales(const Locale& requestedLocale, const Locale& validLocale, const Locale& actualLocale);
1187
1211 virtual int32_t internalGetShortDefinitionString(const char *locale,
1212 char *buffer,
1213 int32_t capacity,
1214 UErrorCode &status) const;
1215
1221 const char *left, int32_t leftLength,
1222 const char *right, int32_t rightLength,
1223 UErrorCode &errorCode) const;
1224
1229 virtual int32_t
1232 uint8_t *dest, int32_t count, UErrorCode &errorCode) const;
1233
1234#ifndef U_HIDE_INTERNAL_API
1237 return reinterpret_cast<Collator *>(uc);
1238 }
1240 static inline const Collator *fromUCollator(const UCollator *uc) {
1241 return reinterpret_cast<const Collator *>(uc);
1242 }
1245 return reinterpret_cast<UCollator *>(this);
1246 }
1248 inline const UCollator *toUCollator() const {
1249 return reinterpret_cast<const UCollator *>(this);
1250 }
1251#endif // U_HIDE_INTERNAL_API
1252
1253private:
1257 Collator& operator=(const Collator& other) = delete;
1258
1259 friend class CFactory;
1260 friend class SimpleCFactory;
1261 friend class ICUCollatorFactory;
1262 friend class ICUCollatorService;
1263 static Collator* makeInstance(const Locale& desiredLocale,
1265
1266#ifndef U_HIDE_DRAFT_API
1271 template <template <typename...> typename Compare, UCollationResult result>
1272 class Predicate {
1273 public:
1274 explicit Predicate(const Collator& parent) : collator(parent) {}
1275
1276 template <
1277 typename T, typename U,
1278 typename = std::enable_if_t<ConvertibleToU16StringView<T> && ConvertibleToU16StringView<U>>>
1279 bool operator()(const T& lhs, const U& rhs) const {
1280 UErrorCode status = U_ZERO_ERROR;
1281 return compare(
1282 collator.compare(
1283 UnicodeString::readOnlyAlias(lhs),
1284 UnicodeString::readOnlyAlias(rhs),
1285 status),
1286 result);
1287 }
1288
1289 bool operator()(std::string_view lhs, std::string_view rhs) const {
1290 UErrorCode status = U_ZERO_ERROR;
1291 return compare(collator.compareUTF8(lhs, rhs, status), result);
1292 }
1293
1294#if defined(__cpp_char8_t)
1295 bool operator()(std::u8string_view lhs, std::u8string_view rhs) const {
1296 UErrorCode status = U_ZERO_ERROR;
1297 return compare(collator.compareUTF8(lhs, rhs, status), result);
1298 }
1299#endif
1300
1301 private:
1302 const Collator& collator;
1303 static constexpr Compare<UCollationResult> compare{};
1304 };
1305#endif // U_HIDE_DRAFT_API
1306};
1307
1308#if !UCONFIG_NO_SERVICE
1326public:
1327
1333
1341 virtual UBool visible() const;
1342
1350 virtual Collator* createCollator(const Locale& loc) = 0;
1351
1363 const Locale& displayLocale,
1365
1376};
1377#endif /* UCONFIG_NO_SERVICE */
1378
1379// Collator inline methods -----------------------------------------------
1380
1382
1383#endif /* #if !UCONFIG_NO_COLLATION */
1384
1385#endif /* U_SHOW_CPLUSPLUS_API */
1386
1387#endif
C++ API: char16_t pointer wrappers with implicit conversion from bit-compatible raw pointer types.
Collation keys are generated by the Collator class.
Definition sortkey.h:101
A factory, used with registerFactory, the creates multiple collators and provides display names for t...
Definition coll.h:1325
virtual const UnicodeString * getSupportedIDs(int32_t &count, UErrorCode &status)=0
Return an array of all the locale names directly supported by this factory.
virtual UnicodeString & getDisplayName(const Locale &objectLocale, const Locale &displayLocale, UnicodeString &result)
Return the name of the collator for the objectLocale, localized for the displayLocale.
virtual UBool visible() const
Return true if this factory is visible.
virtual Collator * createCollator(const Locale &loc)=0
Return a collator for the provided locale.
virtual ~CollatorFactory()
Destructor.
The Collator class performs locale-sensitive string comparison.
Definition coll.h:173
static Collator * createInstance(UErrorCode &err)
Creates the Collator object for the current default locale.
virtual UCollationResult internalCompareUTF8(const char *left, int32_t leftLength, const char *right, int32_t rightLength, UErrorCode &errorCode) const
Implements ucol_strcollUTF8().
static URegistryKey registerInstance(Collator *toAdopt, const Locale &locale, UErrorCode &status)
Register a new Collator.
virtual ECollationStrength getStrength() const
Determines the minimum strength that will be used in comparison or transformation.
UBool equals(const UnicodeString &source, const UnicodeString &target) const
Convenience method for comparing two strings based on the collation rules.
UCollator * toUCollator()
Definition coll.h:1244
virtual void getVersion(UVersionInfo info) const =0
Gets the version information for a Collator.
virtual bool operator==(const Collator &other) const
Returns true if "other" is the same as "this".
auto less() const
Creates a comparison function object that uses this collator.
Definition coll.h:618
static Locale getFunctionalEquivalent(const char *keyword, const Locale &locale, UBool &isAvailable, UErrorCode &status)
Return the functionally equivalent locale for the given requested locale, with respect to given keywo...
static UnicodeString & getDisplayName(const Locale &objectLocale, UnicodeString &name)
Get name of the object for the desired Locale, in the language of the default locale.
static Collator * fromUCollator(UCollator *uc)
Definition coll.h:1236
static StringEnumeration * getAvailableLocales()
Return a StringEnumeration over the locales available at the time of the call, including registered l...
Collator()
Default constructor.
virtual EComparisonResult compare(const UnicodeString &source, const UnicodeString &target) const
The comparison function compares the character data stored in two different strings.
virtual void setLocales(const Locale &requestedLocale, const Locale &validLocale, const Locale &actualLocale)
Used internally by registration to define the requested and valid locales.
auto less_equal() const
Creates a comparison function object that uses this collator.
Definition coll.h:639
virtual void setAttribute(UColAttribute attr, UColAttributeValue value, UErrorCode &status)=0
Universal attribute setter.
static UnicodeString & getDisplayName(const Locale &objectLocale, const Locale &displayLocale, UnicodeString &name)
Get name of the object for the desired Locale, in the desired language.
virtual UClassID getDynamicClassID() const override=0
Returns a unique class ID POLYMORPHICALLY.
UBool greaterOrEqual(const UnicodeString &source, const UnicodeString &target) const
Convenience method for comparing two strings based on the collation rules.
virtual int32_t getSortKey(const UnicodeString &source, uint8_t *result, int32_t resultLength) const =0
Get the sort key as an array of bytes from a UnicodeString.
virtual UCollationResult compareUTF8(const StringPiece &source, const StringPiece &target, UErrorCode &status) const
Compares two UTF-8 strings using the Collator.
UBool greater(const UnicodeString &source, const UnicodeString &target) const
Convenience method for comparing two strings based on the collation rules.
static StringEnumeration * getKeywords(UErrorCode &status)
Create a string enumerator of all possible keywords that are relevant to collation.
virtual uint32_t getVariableTop(UErrorCode &status) const =0
Gets the variable top value of a Collator.
virtual void setStrength(ECollationStrength newStrength)
Sets the minimum strength to be used in comparison or transformation.
static int32_t getBound(const uint8_t *source, int32_t sourceLength, UColBoundMode boundType, uint32_t noOfLevels, uint8_t *result, int32_t resultLength, UErrorCode &status)
Produce a bound for a given sortkey and a number of levels.
virtual Collator * safeClone() const
Same as clone().
virtual UCollationResult compare(const char16_t *source, int32_t sourceLength, const char16_t *target, int32_t targetLength, UErrorCode &status) const =0
The comparison function compares the character data stored in two different string arrays.
auto equal_to() const
Creates a comparison function object that uses this collator.
Definition coll.h:604
virtual int32_t getReorderCodes(int32_t *dest, int32_t destCapacity, UErrorCode &status) const
Retrieves the reordering codes for this collator.
virtual uint32_t setVariableTop(const UnicodeString &varTop, UErrorCode &status)=0
Sets the variable top to the primary weight of the specified string.
const UCollator * toUCollator() const
Definition coll.h:1248
ECollationStrength
Base letter represents a primary difference.
Definition coll.h:204
virtual UCollationResult compare(const UnicodeString &source, const UnicodeString &target, UErrorCode &status) const =0
The comparison function compares the character data stored in two different strings.
virtual UCollationResult compare(UCharIterator &sIter, UCharIterator &tIter, UErrorCode &status) const
Compares two strings using the Collator.
virtual CollationKey & getCollationKey(const UnicodeString &source, CollationKey &key, UErrorCode &status) const =0
Transforms the string into a series of characters that can be compared with CollationKey::compareTo.
virtual Collator & setMaxVariable(UColReorderCode group, UErrorCode &errorCode)
Sets the variable top to the top of the specified reordering group.
auto greater_equal() const
Creates a comparison function object that uses this collator.
Definition coll.h:632
auto not_equal_to() const
Creates a comparison function object that uses this collator.
Definition coll.h:625
virtual int32_t hashCode() const =0
Generates the hash code for the collation object.
static URegistryKey registerFactory(CollatorFactory *toAdopt, UErrorCode &status)
Register a new CollatorFactory.
Collator(UCollationStrength collationStrength, UNormalizationMode decompositionMode)
Constructor.
static StringEnumeration * getKeywordValues(const char *keyword, UErrorCode &status)
Given a keyword, create a string enumeration of all values for that keyword that are currently in use...
virtual ~Collator()
Destructor.
virtual int32_t internalNextSortKeyPart(UCharIterator *iter, uint32_t state[2], uint8_t *dest, int32_t count, UErrorCode &errorCode) const
Implements ucol_nextSortKeyPart().
virtual CollationKey & getCollationKey(const char16_t *source, int32_t sourceLength, CollationKey &key, UErrorCode &status) const =0
Transforms the string into a series of characters that can be compared with CollationKey::compareTo.
virtual Locale getLocale(ULocDataLocaleType type, UErrorCode &status) const =0
Gets the locale of the Collator.
virtual UColAttributeValue getAttribute(UColAttribute attr, UErrorCode &status) const =0
Universal attribute getter.
virtual EComparisonResult compare(const char16_t *source, int32_t sourceLength, const char16_t *target, int32_t targetLength) const
The comparison function compares the character data stored in two different string arrays.
static Collator * createInstance(const Locale &loc, UErrorCode &err)
Gets the collation object for the desired locale.
static const Locale * getAvailableLocales(int32_t &count)
Get the set of Locales for which Collations are installed.
virtual UColReorderCode getMaxVariable() const
Returns the maximum reordering group whose characters are affected by UCOL_ALTERNATE_HANDLING.
virtual UnicodeSet * getTailoredSet(UErrorCode &status) const
Get a UnicodeSet that contains all the characters and sequences tailored in this collator.
auto greater() const
Creates a comparison function object that uses this collator.
Definition coll.h:611
virtual int32_t getSortKey(const char16_t *source, int32_t sourceLength, uint8_t *result, int32_t resultLength) const =0
Get the sort key as an array of bytes from a char16_t buffer.
EComparisonResult
LESS is returned if source string is compared to be less than target string in the compare() method.
Definition coll.h:227
virtual bool operator!=(const Collator &other) const
Returns true if "other" is not the same as "this".
virtual void setReorderCodes(const int32_t *reorderCodes, int32_t reorderCodesLength, UErrorCode &status)
Sets the ordering of scripts for this collator.
virtual void setVariableTop(uint32_t varTop, UErrorCode &status)=0
Sets the variable top to the specified primary weight.
static const Collator * fromUCollator(const UCollator *uc)
Definition coll.h:1240
static StringEnumeration * getKeywordValuesForLocale(const char *keyword, const Locale &locale, UBool commonlyUsed, UErrorCode &status)
Given a key and a locale, returns an array of string values in a preferred order that would make a di...
virtual uint32_t setVariableTop(const char16_t *varTop, int32_t len, UErrorCode &status)=0
Sets the variable top to the primary weight of the specified string.
virtual UCollationResult compare(const UnicodeString &source, const UnicodeString &target, int32_t length, UErrorCode &status) const =0
Does the same thing as compare but limits the comparison to a specified length.
static UBool unregister(URegistryKey key, UErrorCode &status)
Unregister a previously-registered Collator or CollatorFactory using the key returned from the regist...
virtual Collator * clone() const =0
Makes a copy of this object.
Collator(const Collator &other)
Copy constructor.
virtual EComparisonResult compare(const UnicodeString &source, const UnicodeString &target, int32_t length) const
Does the same thing as compare but limits the comparison to a specified length.
static int32_t getEquivalentReorderCodes(int32_t reorderCode, int32_t *dest, int32_t destCapacity, UErrorCode &status)
Retrieves the reorder codes that are grouped with the given reorder code.
virtual int32_t internalGetShortDefinitionString(const char *locale, char *buffer, int32_t capacity, UErrorCode &status) const
Get the short definition string for a collator.
"Smart pointer" base class; do not use directly: use LocalPointer etc.
A Locale object represents a specific geographical, political, or cultural region.
Definition locid.h:195
Base class for 'pure' C++ implementations of uenum api.
Definition strenum.h:61
A string-like object that points to a sized piece of memory.
Definition stringpiece.h:61
UObject is the common ICU "boilerplate" class.
Definition uobject.h:223
A mutable set of Unicode characters and multicharacter strings.
Definition uniset.h:285
UnicodeString is a string class that stores Unicode characters directly and provides similar function...
Definition unistr.h:296
C++ API: Locale ID object.
C++ API: StringPiece: Read-only byte string wrapper class.
C API for code unit iteration.
Definition uiter.h:341
C API: Collator.
struct UCollator UCollator
structure representing a collator object instance
Definition ucol.h:61
UColAttribute
Attributes that collation service understands.
Definition ucol.h:245
UCollationResult
UCOL_LESS is returned if source string is compared to be less than target string in the ucol_strcoll(...
Definition ucol.h:76
@ UCOL_LESS
string a < string b
Definition ucol.h:82
@ UCOL_GREATER
string a > string b
Definition ucol.h:80
@ UCOL_EQUAL
string a == string b
Definition ucol.h:78
UColBoundMode
enum that is taken by ucol_getBound API See below for explanation do not change the values assigned...
Definition ucol.h:1071
UColAttributeValue
Enum containing attribute values for controlling collation behavior.
Definition ucol.h:92
@ UCOL_TERTIARY
Tertiary collation strength.
Definition ucol.h:101
@ UCOL_IDENTICAL
Identical collation strength.
Definition ucol.h:108
@ UCOL_QUATERNARY
Quaternary collation strength.
Definition ucol.h:106
@ UCOL_PRIMARY
Primary collation strength.
Definition ucol.h:97
@ UCOL_SECONDARY
Secondary collation strength.
Definition ucol.h:99
UColReorderCode
Enum containing the codes for reordering segments of the collation table that are not script codes.
Definition ucol.h:149
C API: Unicode Character Iteration.
ULocDataLocaleType
Constants for *_getLocale() Allow user to select whether she wants information on requested,...
Definition uloc.h:338
int8_t UBool
The ICU boolean type, a signed-byte integer.
Definition umachine.h:247
C API: Miscellaneous definitions.
const void * URegistryKey
Opaque type returned by registerInstance, registerFactory and unregister for service registration.
Definition umisc.h:57
C++ API: Unicode Set.
C++ API: Unicode String.
C API: Unicode Normalization.
UNormalizationMode
Constants for normalization modes.
Definition unorm.h:140
C++ API: Common ICU base class UObject.
void * UClassID
UClassID is used to identify classes without using the compiler's RTTI.
Definition uobject.h:96
Basic definitions for ICU, for both C and C++ APIs.
UErrorCode
Standard ICU4C error code type, a substitute for exceptions.
Definition utypes.h:430
@ U_ZERO_ERROR
No error, no warning.
Definition utypes.h:465
#define U_I18N_API
Set to export library symbols from inside the i18n library, and to import them from outside.
Definition utypes.h:316
uint8_t UVersionInfo[U_MAX_VERSION_LENGTH]
The binary form of a version on ICU APIs is an array of 4 uint8_t.
Definition uversion.h:59