ICU 70.1  70.1
coll.h
Go to the documentation of this file.
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 ******************************************************************************
5 * Copyright (C) 1996-2016, International Business Machines
6 * Corporation and others. All Rights Reserved.
7 ******************************************************************************
8 */
9 
52 #ifndef COLL_H
53 #define COLL_H
54 
55 #include "unicode/utypes.h"
56 
57 #if U_SHOW_CPLUSPLUS_API
58 
59 #if !UCONFIG_NO_COLLATION
60 
61 #include "unicode/uobject.h"
62 #include "unicode/ucol.h"
63 #include "unicode/unorm.h"
64 #include "unicode/locid.h"
65 #include "unicode/uniset.h"
66 #include "unicode/umisc.h"
67 #include "unicode/uiter.h"
68 #include "unicode/stringpiece.h"
69 
70 U_NAMESPACE_BEGIN
71 
72 class StringEnumeration;
73 
74 #if !UCONFIG_NO_SERVICE
78 class CollatorFactory;
79 #endif
80 
84 class CollationKey;
85 
167 class U_I18N_API Collator : public UObject {
168 public:
169 
170  // Collator public enums -----------------------------------------------
171 
198  {
199  PRIMARY = UCOL_PRIMARY, // 0
200  SECONDARY = UCOL_SECONDARY, // 1
201  TERTIARY = UCOL_TERTIARY, // 2
202  QUATERNARY = UCOL_QUATERNARY, // 3
203  IDENTICAL = UCOL_IDENTICAL // 15
204  };
205 
206 
207  // Cannot use #ifndef U_HIDE_DEPRECATED_API for the following, it is
208  // used by virtual methods that cannot have that conditional.
209 #ifndef U_FORCE_HIDE_DEPRECATED_API
221  {
222  LESS = UCOL_LESS, // -1
223  EQUAL = UCOL_EQUAL, // 0
224  GREATER = UCOL_GREATER // 1
225  };
226 #endif // U_FORCE_HIDE_DEPRECATED_API
227 
228  // Collator public destructor -----------------------------------------
229 
234  virtual ~Collator();
235 
236  // Collator public methods --------------------------------------------
237 
256  virtual bool operator==(const Collator& other) const;
257 
265  virtual bool operator!=(const Collator& other) const;
266 
272  virtual Collator* clone() const = 0;
273 
293  static Collator* U_EXPORT2 createInstance(UErrorCode& err);
294 
328  static Collator* U_EXPORT2 createInstance(const Locale& loc, UErrorCode& err);
329 
330 #ifndef U_FORCE_HIDE_DEPRECATED_API
342  virtual EComparisonResult compare(const UnicodeString& source,
343  const UnicodeString& target) const;
344 #endif // U_FORCE_HIDE_DEPRECATED_API
345 
358  virtual UCollationResult compare(const UnicodeString& source,
359  const UnicodeString& target,
360  UErrorCode &status) const = 0;
361 
362 #ifndef U_FORCE_HIDE_DEPRECATED_API
375  virtual EComparisonResult compare(const UnicodeString& source,
376  const UnicodeString& target,
377  int32_t length) const;
378 #endif // U_FORCE_HIDE_DEPRECATED_API
379 
393  virtual UCollationResult compare(const UnicodeString& source,
394  const UnicodeString& target,
395  int32_t length,
396  UErrorCode &status) const = 0;
397 
398 #ifndef U_FORCE_HIDE_DEPRECATED_API
432  virtual EComparisonResult compare(const char16_t* source, int32_t sourceLength,
433  const char16_t* target, int32_t targetLength)
434  const;
435 #endif // U_FORCE_HIDE_DEPRECATED_API
436 
453  virtual UCollationResult compare(const char16_t* source, int32_t sourceLength,
454  const char16_t* target, int32_t targetLength,
455  UErrorCode &status) const = 0;
456 
469  UCharIterator &tIter,
470  UErrorCode &status) const;
471 
485  virtual UCollationResult compareUTF8(const StringPiece &source,
486  const StringPiece &target,
487  UErrorCode &status) const;
488 
508  CollationKey& key,
509  UErrorCode& status) const = 0;
510 
530  virtual CollationKey& getCollationKey(const char16_t*source,
531  int32_t sourceLength,
532  CollationKey& key,
533  UErrorCode& status) const = 0;
538  virtual int32_t hashCode(void) const = 0;
539 
540 #ifndef U_FORCE_HIDE_DEPRECATED_API
553  virtual Locale getLocale(ULocDataLocaleType type, UErrorCode& status) const = 0;
554 #endif // U_FORCE_HIDE_DEPRECATED_API
555 
565  UBool greater(const UnicodeString& source, const UnicodeString& target)
566  const;
567 
578  const UnicodeString& target) const;
579 
589  UBool equals(const UnicodeString& source, const UnicodeString& target) const;
590 
591 #ifndef U_FORCE_HIDE_DEPRECATED_API
602  virtual ECollationStrength getStrength(void) const;
603 
622  virtual void setStrength(ECollationStrength newStrength);
623 #endif // U_FORCE_HIDE_DEPRECATED_API
624 
640  virtual int32_t getReorderCodes(int32_t *dest,
641  int32_t destCapacity,
642  UErrorCode& status) const;
643 
659  virtual void setReorderCodes(const int32_t* reorderCodes,
660  int32_t reorderCodesLength,
661  UErrorCode& status) ;
662 
683  static int32_t U_EXPORT2 getEquivalentReorderCodes(int32_t reorderCode,
684  int32_t* dest,
685  int32_t destCapacity,
686  UErrorCode& status);
687 
697  static UnicodeString& U_EXPORT2 getDisplayName(const Locale& objectLocale,
698  const Locale& displayLocale,
699  UnicodeString& name);
700 
709  static UnicodeString& U_EXPORT2 getDisplayName(const Locale& objectLocale,
710  UnicodeString& name);
711 
723  static const Locale* U_EXPORT2 getAvailableLocales(int32_t& count);
724 
733  static StringEnumeration* U_EXPORT2 getAvailableLocales(void);
734 
744  static StringEnumeration* U_EXPORT2 getKeywords(UErrorCode& status);
745 
757  static StringEnumeration* U_EXPORT2 getKeywordValues(const char *keyword, UErrorCode& status);
758 
775  static StringEnumeration* U_EXPORT2 getKeywordValuesForLocale(const char* keyword, const Locale& locale,
776  UBool commonlyUsed, UErrorCode& status);
777 
805  static Locale U_EXPORT2 getFunctionalEquivalent(const char* keyword, const Locale& locale,
806  UBool& isAvailable, UErrorCode& status);
807 
808 #if !UCONFIG_NO_SERVICE
820  static URegistryKey U_EXPORT2 registerInstance(Collator* toAdopt, const Locale& locale, UErrorCode& status);
821 
832  static URegistryKey U_EXPORT2 registerFactory(CollatorFactory* toAdopt, UErrorCode& status);
833 
847  static UBool U_EXPORT2 unregister(URegistryKey key, UErrorCode& status);
848 #endif /* UCONFIG_NO_SERVICE */
849 
855  virtual void getVersion(UVersionInfo info) const = 0;
856 
867  virtual UClassID getDynamicClassID(void) const override = 0;
868 
878  UErrorCode &status) = 0;
879 
889  UErrorCode &status) const = 0;
890 
909  virtual Collator &setMaxVariable(UColReorderCode group, UErrorCode &errorCode);
910 
920 
921 #ifndef U_FORCE_HIDE_DEPRECATED_API
938  virtual uint32_t setVariableTop(const char16_t *varTop, int32_t len, UErrorCode &status) = 0;
939 
955  virtual uint32_t setVariableTop(const UnicodeString &varTop, UErrorCode &status) = 0;
956 
968  virtual void setVariableTop(uint32_t varTop, UErrorCode &status) = 0;
969 #endif // U_FORCE_HIDE_DEPRECATED_API
970 
978  virtual uint32_t getVariableTop(UErrorCode &status) const = 0;
979 
989  virtual UnicodeSet *getTailoredSet(UErrorCode &status) const;
990 
991 #ifndef U_FORCE_HIDE_DEPRECATED_API
999  virtual Collator* safeClone() const;
1000 #endif // U_FORCE_HIDE_DEPRECATED_API
1001 
1018  virtual int32_t getSortKey(const UnicodeString& source,
1019  uint8_t* result,
1020  int32_t resultLength) const = 0;
1021 
1041  virtual int32_t getSortKey(const char16_t*source, int32_t sourceLength,
1042  uint8_t*result, int32_t resultLength) const = 0;
1043 
1081  static int32_t U_EXPORT2 getBound(const uint8_t *source,
1082  int32_t sourceLength,
1083  UColBoundMode boundType,
1084  uint32_t noOfLevels,
1085  uint8_t *result,
1086  int32_t resultLength,
1087  UErrorCode &status);
1088 
1089 
1090 protected:
1091 
1092  // Collator protected constructors -------------------------------------
1093 
1102 
1103 #ifndef U_HIDE_DEPRECATED_API
1115  Collator(UCollationStrength collationStrength,
1116  UNormalizationMode decompositionMode);
1117 #endif /* U_HIDE_DEPRECATED_API */
1118 
1124  Collator(const Collator& other);
1125 
1126 public:
1134  virtual void setLocales(const Locale& requestedLocale, const Locale& validLocale, const Locale& actualLocale);
1135 
1159  virtual int32_t internalGetShortDefinitionString(const char *locale,
1160  char *buffer,
1161  int32_t capacity,
1162  UErrorCode &status) const;
1163 
1169  const char *left, int32_t leftLength,
1170  const char *right, int32_t rightLength,
1171  UErrorCode &errorCode) const;
1172 
1177  virtual int32_t
1179  UCharIterator *iter, uint32_t state[2],
1180  uint8_t *dest, int32_t count, UErrorCode &errorCode) const;
1181 
1182 #ifndef U_HIDE_INTERNAL_API
1184  static inline Collator *fromUCollator(UCollator *uc) {
1185  return reinterpret_cast<Collator *>(uc);
1186  }
1188  static inline const Collator *fromUCollator(const UCollator *uc) {
1189  return reinterpret_cast<const Collator *>(uc);
1190  }
1193  return reinterpret_cast<UCollator *>(this);
1194  }
1196  inline const UCollator *toUCollator() const {
1197  return reinterpret_cast<const UCollator *>(this);
1198  }
1199 #endif // U_HIDE_INTERNAL_API
1200 
1201 private:
1205  Collator& operator=(const Collator& other);
1206 
1207  friend class CFactory;
1208  friend class SimpleCFactory;
1209  friend class ICUCollatorFactory;
1210  friend class ICUCollatorService;
1211  static Collator* makeInstance(const Locale& desiredLocale,
1212  UErrorCode& status);
1213 };
1214 
1215 #if !UCONFIG_NO_SERVICE
1233 public:
1234 
1239  virtual ~CollatorFactory();
1240 
1248  virtual UBool visible(void) const;
1249 
1257  virtual Collator* createCollator(const Locale& loc) = 0;
1258 
1269  virtual UnicodeString& getDisplayName(const Locale& objectLocale,
1270  const Locale& displayLocale,
1271  UnicodeString& result);
1272 
1282  virtual const UnicodeString * getSupportedIDs(int32_t &count, UErrorCode& status) = 0;
1283 };
1284 #endif /* UCONFIG_NO_SERVICE */
1285 
1286 // Collator inline methods -----------------------------------------------
1287 
1288 U_NAMESPACE_END
1289 
1290 #endif /* #if !UCONFIG_NO_COLLATION */
1291 
1292 #endif /* U_SHOW_CPLUSPLUS_API */
1293 
1294 #endif
Collation keys are generated by the Collator class.
Definition: sortkey.h:101
A factory, used with registerFactory, the creates multiple collators and provides display names for t...
Definition: coll.h:1232
virtual UBool visible(void) const
Return true if this factory is visible.
virtual const UnicodeString * getSupportedIDs(int32_t &count, UErrorCode &status)=0
Return an array of all the locale names directly supported by this factory.
virtual UnicodeString & getDisplayName(const Locale &objectLocale, const Locale &displayLocale, UnicodeString &result)
Return the name of the collator for the objectLocale, localized for the displayLocale.
virtual Collator * createCollator(const Locale &loc)=0
Return a collator for the provided locale.
virtual ~CollatorFactory()
Destructor.
The Collator class performs locale-sensitive string comparison.
Definition: coll.h:167
virtual UCollationResult internalCompareUTF8(const char *left, int32_t leftLength, const char *right, int32_t rightLength, UErrorCode &errorCode) const
Implements ucol_strcollUTF8().
static URegistryKey registerInstance(Collator *toAdopt, const Locale &locale, UErrorCode &status)
Register a new Collator.
virtual Collator * clone() const =0
Makes a copy of this object.
static UnicodeString & getDisplayName(const Locale &objectLocale, UnicodeString &name)
Get name of the object for the desired Locale, in the language of the default locale.
UBool equals(const UnicodeString &source, const UnicodeString &target) const
Convenience method for comparing two strings based on the collation rules.
static StringEnumeration * getKeywords(UErrorCode &status)
Create a string enumerator of all possible keywords that are relevant to collation.
const UCollator * toUCollator() const
Definition: coll.h:1196
virtual void getVersion(UVersionInfo info) const =0
Gets the version information for a Collator.
virtual Collator * safeClone() const
Same as clone().
virtual bool operator==(const Collator &other) const
Returns true if "other" is the same as "this".
virtual CollationKey & getCollationKey(const char16_t *source, int32_t sourceLength, CollationKey &key, UErrorCode &status) const =0
Transforms the string into a series of characters that can be compared with CollationKey::compareTo.
static Locale getFunctionalEquivalent(const char *keyword, const Locale &locale, UBool &isAvailable, UErrorCode &status)
Return the functionally equivalent locale for the given requested locale, with respect to given keywo...
Collator()
Default constructor.
virtual EComparisonResult compare(const UnicodeString &source, const UnicodeString &target) const
The comparison function compares the character data stored in two different strings.
virtual void setLocales(const Locale &requestedLocale, const Locale &validLocale, const Locale &actualLocale)
Used internally by registration to define the requested and valid locales.
static StringEnumeration * getKeywordValues(const char *keyword, UErrorCode &status)
Given a keyword, create a string enumeration of all values for that keyword that are currently in use...
virtual void setAttribute(UColAttribute attr, UColAttributeValue value, UErrorCode &status)=0
Universal attribute setter.
static Collator * createInstance(UErrorCode &err)
Creates the Collator object for the current default locale.
UBool greaterOrEqual(const UnicodeString &source, const UnicodeString &target) const
Convenience method for comparing two strings based on the collation rules.
virtual int32_t getSortKey(const UnicodeString &source, uint8_t *result, int32_t resultLength) const =0
Get the sort key as an array of bytes from a UnicodeString.
virtual UnicodeSet * getTailoredSet(UErrorCode &status) const
Get a UnicodeSet that contains all the characters and sequences tailored in this collator.
virtual UCollationResult compareUTF8(const StringPiece &source, const StringPiece &target, UErrorCode &status) const
Compares two UTF-8 strings using the Collator.
UBool greater(const UnicodeString &source, const UnicodeString &target) const
Convenience method for comparing two strings based on the collation rules.
virtual uint32_t getVariableTop(UErrorCode &status) const =0
Gets the variable top value of a Collator.
virtual void setStrength(ECollationStrength newStrength)
Sets the minimum strength to be used in comparison or transformation.
static int32_t getBound(const uint8_t *source, int32_t sourceLength, UColBoundMode boundType, uint32_t noOfLevels, uint8_t *result, int32_t resultLength, UErrorCode &status)
Produce a bound for a given sortkey and a number of levels.
virtual UCollationResult compare(const char16_t *source, int32_t sourceLength, const char16_t *target, int32_t targetLength, UErrorCode &status) const =0
The comparison function compares the character data stored in two different string arrays.
static Collator * fromUCollator(UCollator *uc)
Definition: coll.h:1184
virtual int32_t getReorderCodes(int32_t *dest, int32_t destCapacity, UErrorCode &status) const
Retrieves the reordering codes for this collator.
UCollator * toUCollator()
Definition: coll.h:1192
virtual uint32_t setVariableTop(const UnicodeString &varTop, UErrorCode &status)=0
Sets the variable top to the primary weight of the specified string.
ECollationStrength
Base letter represents a primary difference.
Definition: coll.h:198
virtual UCollationResult compare(const UnicodeString &source, const UnicodeString &target, UErrorCode &status) const =0
The comparison function compares the character data stored in two different strings.
static StringEnumeration * getAvailableLocales(void)
Return a StringEnumeration over the locales available at the time of the call, including registered l...
virtual UCollationResult compare(UCharIterator &sIter, UCharIterator &tIter, UErrorCode &status) const
Compares two strings using the Collator.
static StringEnumeration * getKeywordValuesForLocale(const char *keyword, const Locale &locale, UBool commonlyUsed, UErrorCode &status)
Given a key and a locale, returns an array of string values in a preferred order that would make a di...
static URegistryKey registerFactory(CollatorFactory *toAdopt, UErrorCode &status)
Register a new CollatorFactory.
Collator(UCollationStrength collationStrength, UNormalizationMode decompositionMode)
Constructor.
static const Collator * fromUCollator(const UCollator *uc)
Definition: coll.h:1188
virtual ~Collator()
Destructor.
virtual int32_t internalNextSortKeyPart(UCharIterator *iter, uint32_t state[2], uint8_t *dest, int32_t count, UErrorCode &errorCode) const
Implements ucol_nextSortKeyPart().
virtual Locale getLocale(ULocDataLocaleType type, UErrorCode &status) const =0
Gets the locale of the Collator.
virtual UColAttributeValue getAttribute(UColAttribute attr, UErrorCode &status) const =0
Universal attribute getter.
virtual EComparisonResult compare(const char16_t *source, int32_t sourceLength, const char16_t *target, int32_t targetLength) const
The comparison function compares the character data stored in two different string arrays.
virtual UColReorderCode getMaxVariable() const
Returns the maximum reordering group whose characters are affected by UCOL_ALTERNATE_HANDLING.
virtual int32_t hashCode(void) const =0
Generates the hash code for the collation object.
virtual Collator & setMaxVariable(UColReorderCode group, UErrorCode &errorCode)
Sets the variable top to the top of the specified reordering group.
virtual int32_t getSortKey(const char16_t *source, int32_t sourceLength, uint8_t *result, int32_t resultLength) const =0
Get the sort key as an array of bytes from a char16_t buffer.
EComparisonResult
LESS is returned if source string is compared to be less than target string in the compare() method.
Definition: coll.h:221
virtual bool operator!=(const Collator &other) const
Returns true if "other" is not the same as "this".
virtual void setReorderCodes(const int32_t *reorderCodes, int32_t reorderCodesLength, UErrorCode &status)
Sets the ordering of scripts for this collator.
virtual CollationKey & getCollationKey(const UnicodeString &source, CollationKey &key, UErrorCode &status) const =0
Transforms the string into a series of characters that can be compared with CollationKey::compareTo.
virtual ECollationStrength getStrength(void) const
Determines the minimum strength that will be used in comparison or transformation.
virtual void setVariableTop(uint32_t varTop, UErrorCode &status)=0
Sets the variable top to the specified primary weight.
virtual uint32_t setVariableTop(const char16_t *varTop, int32_t len, UErrorCode &status)=0
Sets the variable top to the primary weight of the specified string.
virtual UCollationResult compare(const UnicodeString &source, const UnicodeString &target, int32_t length, UErrorCode &status) const =0
Does the same thing as compare but limits the comparison to a specified length.
static UBool unregister(URegistryKey key, UErrorCode &status)
Unregister a previously-registered Collator or CollatorFactory using the key returned from the regist...
Collator(const Collator &other)
Copy constructor.
virtual EComparisonResult compare(const UnicodeString &source, const UnicodeString &target, int32_t length) const
Does the same thing as compare but limits the comparison to a specified length.
static Collator * createInstance(const Locale &loc, UErrorCode &err)
Gets the collation object for the desired locale.
static int32_t getEquivalentReorderCodes(int32_t reorderCode, int32_t *dest, int32_t destCapacity, UErrorCode &status)
Retrieves the reorder codes that are grouped with the given reorder code.
static const Locale * getAvailableLocales(int32_t &count)
Get the set of Locales for which Collations are installed.
static UnicodeString & getDisplayName(const Locale &objectLocale, const Locale &displayLocale, UnicodeString &name)
Get name of the object for the desired Locale, in the desired language.
virtual int32_t internalGetShortDefinitionString(const char *locale, char *buffer, int32_t capacity, UErrorCode &status) const
Get the short definition string for a collator.
virtual UClassID getDynamicClassID(void) const override=0
Returns a unique class ID POLYMORPHICALLY.
A Locale object represents a specific geographical, political, or cultural region.
Definition: locid.h:195
Base class for 'pure' C++ implementations of uenum api.
Definition: strenum.h:61
A string-like object that points to a sized piece of memory.
Definition: stringpiece.h:60
UObject is the common ICU "boilerplate" class.
Definition: uobject.h:223
A mutable set of Unicode characters and multicharacter strings.
Definition: uniset.h:285
UnicodeString is a string class that stores Unicode characters directly and provides similar function...
Definition: unistr.h:296
C++ API: Locale ID object.
C++ API: StringPiece: Read-only byte string wrapper class.
C API for code unit iteration.
Definition: uiter.h:341
C API: Collator.
struct UCollator UCollator
structure representing a collator object instance
Definition: ucol.h:61
UColAttribute
Attributes that collation service understands.
Definition: ucol.h:245
UCollationResult
UCOL_LESS is returned if source string is compared to be less than target string in the ucol_strcoll(...
Definition: ucol.h:76
@ UCOL_LESS
string a < string b
Definition: ucol.h:82
@ UCOL_GREATER
string a > string b
Definition: ucol.h:80
@ UCOL_EQUAL
string a == string b
Definition: ucol.h:78
UColBoundMode
enum that is taken by ucol_getBound API See below for explanation do not change the values assigned...
Definition: ucol.h:1062
UColAttributeValue
Enum containing attribute values for controlling collation behavior.
Definition: ucol.h:92
@ UCOL_TERTIARY
Tertiary collation strength.
Definition: ucol.h:101
@ UCOL_IDENTICAL
Identical collation strength.
Definition: ucol.h:108
@ UCOL_QUATERNARY
Quaternary collation strength.
Definition: ucol.h:106
@ UCOL_PRIMARY
Primary collation strength.
Definition: ucol.h:97
@ UCOL_SECONDARY
Secondary collation strength.
Definition: ucol.h:99
UColReorderCode
Enum containing the codes for reordering segments of the collation table that are not script codes.
Definition: ucol.h:149
C API: Unicode Character Iteration.
ULocDataLocaleType
Constants for *_getLocale() Allow user to select whether she wants information on requested,...
Definition: uloc.h:338
int8_t UBool
The ICU boolean type, a signed-byte integer.
Definition: umachine.h:269
C API:misc definitions.
const void * URegistryKey
Opaque type returned by registerInstance, registerFactory and unregister for service registration.
Definition: umisc.h:57
C++ API: Unicode Set.
C API: Unicode Normalization.
UNormalizationMode
Constants for normalization modes.
Definition: unorm.h:140
C++ API: Common ICU base class UObject.
void * UClassID
UClassID is used to identify classes without using the compiler's RTTI.
Definition: uobject.h:96
Basic definitions for ICU, for both C and C++ APIs.
UErrorCode
Standard ICU4C error code type, a substitute for exceptions.
Definition: utypes.h:415
#define U_I18N_API
Set to export library symbols from inside the i18n library, and to import them from outside.
Definition: utypes.h:301
uint8_t UVersionInfo[U_MAX_VERSION_LENGTH]
The binary form of a version on ICU APIs is an array of 4 uint8_t.
Definition: uversion.h:59