ICU 70.1  70.1
tblcoll.h
Go to the documentation of this file.
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 ******************************************************************************
5 * Copyright (C) 1996-2016, International Business Machines Corporation and
6 * others. All Rights Reserved.
7 ******************************************************************************
8 */
9 
62 #ifndef TBLCOLL_H
63 #define TBLCOLL_H
64 
65 #include "unicode/utypes.h"
66 
67 #if U_SHOW_CPLUSPLUS_API
68 
69 #if !UCONFIG_NO_COLLATION
70 
71 #include "unicode/coll.h"
72 #include "unicode/locid.h"
73 #include "unicode/uiter.h"
74 #include "unicode/ucol.h"
75 
76 U_NAMESPACE_BEGIN
77 
78 struct CollationCacheEntry;
79 struct CollationData;
80 struct CollationSettings;
81 struct CollationTailoring;
85 class StringSearch;
89 class CollationElementIterator;
90 class CollationKey;
91 class SortKeyByteSink;
92 class UnicodeSet;
93 class UnicodeString;
94 class UVector64;
95 
116 public:
126 
137  ECollationStrength collationStrength,
138  UErrorCode& status);
139 
150  UColAttributeValue decompositionMode,
151  UErrorCode& status);
152 
164  ECollationStrength collationStrength,
165  UColAttributeValue decompositionMode,
166  UErrorCode& status);
167 
168 #ifndef U_HIDE_INTERNAL_API
174  UParseError &parseError, UnicodeString &reason,
175  UErrorCode &errorCode);
176 #endif /* U_HIDE_INTERNAL_API */
177 
184 
185 
203  RuleBasedCollator(const uint8_t *bin, int32_t length,
204  const RuleBasedCollator *base,
205  UErrorCode &status);
206 
212 
219 
226  virtual bool operator==(const Collator& other) const override;
227 
233  virtual RuleBasedCollator* clone() const override;
234 
246  const UnicodeString& source) const;
247 
258  const CharacterIterator& source) const;
259 
260  // Make deprecated versions of Collator::compare() visible.
261  using Collator::compare;
262 
275  virtual UCollationResult compare(const UnicodeString& source,
276  const UnicodeString& target,
277  UErrorCode &status) const override;
278 
292  virtual UCollationResult compare(const UnicodeString& source,
293  const UnicodeString& target,
294  int32_t length,
295  UErrorCode &status) const override;
296 
313  virtual UCollationResult compare(const char16_t* source, int32_t sourceLength,
314  const char16_t* target, int32_t targetLength,
315  UErrorCode &status) const override;
316 
329  UCharIterator &tIter,
330  UErrorCode &status) const override;
331 
345  virtual UCollationResult compareUTF8(const StringPiece &source,
346  const StringPiece &target,
347  UErrorCode &status) const override;
348 
364  CollationKey& key,
365  UErrorCode& status) const override;
366 
382  virtual CollationKey& getCollationKey(const char16_t *source,
383  int32_t sourceLength,
384  CollationKey& key,
385  UErrorCode& status) const override;
386 
392  virtual int32_t hashCode() const override;
393 
394 #ifndef U_FORCE_HIDE_DEPRECATED_API
405  virtual Locale getLocale(ULocDataLocaleType type, UErrorCode& status) const override;
406 #endif // U_FORCE_HIDE_DEPRECATED_API
407 
413  const UnicodeString& getRules() const;
414 
420  virtual void getVersion(UVersionInfo info) const override;
421 
422 #ifndef U_HIDE_DEPRECATED_API
439  int32_t getMaxExpansion(int32_t order) const;
440 #endif /* U_HIDE_DEPRECATED_API */
441 
452  virtual UClassID getDynamicClassID(void) const override;
453 
465  static UClassID U_EXPORT2 getStaticClassID(void);
466 
467 #ifndef U_HIDE_DEPRECATED_API
478  uint8_t *cloneRuleData(int32_t &length, UErrorCode &status) const;
479 #endif /* U_HIDE_DEPRECATED_API */
480 
491  int32_t cloneBinary(uint8_t *buffer, int32_t capacity, UErrorCode &status) const;
492 
504  void getRules(UColRuleOption delta, UnicodeString &buffer) const;
505 
514  UErrorCode &status) override;
515 
524  UErrorCode &status) const override;
525 
542  virtual Collator &setMaxVariable(UColReorderCode group, UErrorCode &errorCode) override;
543 
550  virtual UColReorderCode getMaxVariable() const override;
551 
552 #ifndef U_FORCE_HIDE_DEPRECATED_API
569  virtual uint32_t setVariableTop(const char16_t *varTop, int32_t len, UErrorCode &status) override;
570 
586  virtual uint32_t setVariableTop(const UnicodeString &varTop, UErrorCode &status) override;
587 
599  virtual void setVariableTop(uint32_t varTop, UErrorCode &status) override;
600 #endif // U_FORCE_HIDE_DEPRECATED_API
601 
609  virtual uint32_t getVariableTop(UErrorCode &status) const override;
610 
620  virtual UnicodeSet *getTailoredSet(UErrorCode &status) const override;
621 
636  virtual int32_t getSortKey(const UnicodeString& source, uint8_t *result,
637  int32_t resultLength) const override;
638 
655  virtual int32_t getSortKey(const char16_t *source, int32_t sourceLength,
656  uint8_t *result, int32_t resultLength) const override;
657 
671  virtual int32_t getReorderCodes(int32_t *dest,
672  int32_t destCapacity,
673  UErrorCode& status) const override;
674 
686  virtual void setReorderCodes(const int32_t* reorderCodes,
687  int32_t reorderCodesLength,
688  UErrorCode& status) override;
689 
695  const char *left, int32_t leftLength,
696  const char *right, int32_t rightLength,
697  UErrorCode &errorCode) const override;
698 
722  virtual int32_t internalGetShortDefinitionString(const char *locale,
723  char *buffer,
724  int32_t capacity,
725  UErrorCode &status) const override;
726 
731  virtual int32_t internalNextSortKeyPart(
732  UCharIterator *iter, uint32_t state[2],
733  uint8_t *dest, int32_t count, UErrorCode &errorCode) const override;
734 
735  // Do not enclose the default constructor with #ifndef U_HIDE_INTERNAL_API
741 
742 #ifndef U_HIDE_INTERNAL_API
749  const char *internalGetLocaleID(ULocDataLocaleType type, UErrorCode &errorCode) const;
750 
764  UnicodeSet *contractions, UnicodeSet *expansions,
765  UBool addPrefixes, UErrorCode &errorCode) const;
766 
772  void internalAddContractions(UChar32 c, UnicodeSet &set, UErrorCode &errorCode) const;
773 
779  const UnicodeString &rules,
780  int32_t strength,
781  UColAttributeValue decompositionMode,
782  UParseError *outParseError, UnicodeString *outReason,
783  UErrorCode &errorCode);
784 
787  return dynamic_cast<RuleBasedCollator *>(fromUCollator(uc));
788  }
790  static inline const RuleBasedCollator *rbcFromUCollator(const UCollator *uc) {
791  return dynamic_cast<const RuleBasedCollator *>(fromUCollator(uc));
792  }
793 
798  void internalGetCEs(const UnicodeString &str, UVector64 &ces, UErrorCode &errorCode) const;
799 #endif // U_HIDE_INTERNAL_API
800 
801 protected:
809  virtual void setLocales(const Locale& requestedLocale, const Locale& validLocale, const Locale& actualLocale) override;
810 
811 private:
812  friend class CollationElementIterator;
813  friend class Collator;
814 
815  RuleBasedCollator(const CollationCacheEntry *entry);
816 
822  enum Attributes {
823  ATTR_VARIABLE_TOP = UCOL_ATTRIBUTE_COUNT,
824  ATTR_LIMIT
825  };
826 
827  void adoptTailoring(CollationTailoring *t, UErrorCode &errorCode);
828 
829  // Both lengths must be <0 or else both must be >=0.
830  UCollationResult doCompare(const char16_t *left, int32_t leftLength,
831  const char16_t *right, int32_t rightLength,
832  UErrorCode &errorCode) const;
833  UCollationResult doCompare(const uint8_t *left, int32_t leftLength,
834  const uint8_t *right, int32_t rightLength,
835  UErrorCode &errorCode) const;
836 
837  void writeSortKey(const char16_t *s, int32_t length,
838  SortKeyByteSink &sink, UErrorCode &errorCode) const;
839 
840  void writeIdenticalLevel(const char16_t *s, const char16_t *limit,
841  SortKeyByteSink &sink, UErrorCode &errorCode) const;
842 
843  const CollationSettings &getDefaultSettings() const;
844 
845  void setAttributeDefault(int32_t attribute) {
846  explicitlySetAttributes &= ~((uint32_t)1 << attribute);
847  }
848  void setAttributeExplicitly(int32_t attribute) {
849  explicitlySetAttributes |= (uint32_t)1 << attribute;
850  }
851  UBool attributeHasBeenSetExplicitly(int32_t attribute) const {
852  // assert(0 <= attribute < ATTR_LIMIT);
853  return (UBool)((explicitlySetAttributes & ((uint32_t)1 << attribute)) != 0);
854  }
855 
863  UBool isUnsafe(UChar32 c) const;
864 
865  static void U_CALLCONV computeMaxExpansions(const CollationTailoring *t, UErrorCode &errorCode);
866  UBool initMaxExpansions(UErrorCode &errorCode) const;
867 
868  void setFastLatinOptions(CollationSettings &ownedSettings) const;
869 
870  const CollationData *data;
871  const CollationSettings *settings; // reference-counted
872  const CollationTailoring *tailoring; // alias of cacheEntry->tailoring
873  const CollationCacheEntry *cacheEntry; // reference-counted
874  Locale validLocale;
875  uint32_t explicitlySetAttributes;
876 
877  UBool actualLocaleIsSameAsValid;
878 };
879 
880 U_NAMESPACE_END
881 
882 #endif // !UCONFIG_NO_COLLATION
883 
884 #endif /* U_SHOW_CPLUSPLUS_API */
885 
886 #endif // TBLCOLL_H
Abstract class that defines an API for iteration on text objects.
Definition: chariter.h:361
The CollationElementIterator class is used as an iterator to walk through each character of an inte...
Definition: coleitr.h:121
Collation keys are generated by the Collator class.
Definition: sortkey.h:101
The Collator class performs locale-sensitive string comparison.
Definition: coll.h:167
virtual EComparisonResult compare(const UnicodeString &source, const UnicodeString &target) const
The comparison function compares the character data stored in two different strings.
static Collator * fromUCollator(UCollator *uc)
Definition: coll.h:1184
ECollationStrength
Base letter represents a primary difference.
Definition: coll.h:198
A Locale object represents a specific geographical, political, or cultural region.
Definition: locid.h:195
The RuleBasedCollator class provides the implementation of Collator, using data-driven tables.
Definition: tblcoll.h:115
virtual void setVariableTop(uint32_t varTop, UErrorCode &status) override
Sets the variable top to the specified primary weight.
RuleBasedCollator(const UnicodeString &rules, UColAttributeValue decompositionMode, UErrorCode &status)
RuleBasedCollator constructor.
virtual UCollationResult internalCompareUTF8(const char *left, int32_t leftLength, const char *right, int32_t rightLength, UErrorCode &errorCode) const override
Implements ucol_strcollUTF8().
virtual CollationElementIterator * createCollationElementIterator(const CharacterIterator &source) const
Creates a collation element iterator for the source.
virtual int32_t getSortKey(const char16_t *source, int32_t sourceLength, uint8_t *result, int32_t resultLength) const override
Get the sort key as an array of bytes from a char16_t buffer.
RuleBasedCollator & operator=(const RuleBasedCollator &other)
Assignment operator.
static RuleBasedCollator * rbcFromUCollator(UCollator *uc)
Definition: tblcoll.h:786
static const RuleBasedCollator * rbcFromUCollator(const UCollator *uc)
Definition: tblcoll.h:790
virtual int32_t hashCode() const override
Generates the hash code for the rule-based collation object.
virtual uint32_t getVariableTop(UErrorCode &status) const override
Gets the variable top value of a Collator.
RuleBasedCollator(const UnicodeString &rules, ECollationStrength collationStrength, UColAttributeValue decompositionMode, UErrorCode &status)
RuleBasedCollator constructor.
virtual void setLocales(const Locale &requestedLocale, const Locale &validLocale, const Locale &actualLocale) override
Used internally by registration to define the requested and valid locales.
RuleBasedCollator(const uint8_t *bin, int32_t length, const RuleBasedCollator *base, UErrorCode &status)
Opens a collator from a collator binary image created using cloneBinary.
virtual int32_t getSortKey(const UnicodeString &source, uint8_t *result, int32_t resultLength) const override
Get the sort key as an array of bytes from a UnicodeString.
RuleBasedCollator(const UnicodeString &rules, UParseError &parseError, UnicodeString &reason, UErrorCode &errorCode)
TODO: document & propose as public API.
virtual uint32_t setVariableTop(const char16_t *varTop, int32_t len, UErrorCode &status) override
Sets the variable top to the primary weight of the specified string.
virtual uint32_t setVariableTop(const UnicodeString &varTop, UErrorCode &status) override
Sets the variable top to the primary weight of the specified string.
virtual int32_t getReorderCodes(int32_t *dest, int32_t destCapacity, UErrorCode &status) const override
Retrieves the reordering codes for this collator.
virtual UnicodeSet * getTailoredSet(UErrorCode &status) const override
Get a UnicodeSet that contains all the characters and sequences tailored in this collator.
virtual UCollationResult compare(const UnicodeString &source, const UnicodeString &target, UErrorCode &status) const override
The comparison function compares the character data stored in two different strings.
virtual RuleBasedCollator * clone() const override
Makes a copy of this object.
static UClassID getStaticClassID(void)
Returns the class ID for this class.
void internalBuildTailoring(const UnicodeString &rules, int32_t strength, UColAttributeValue decompositionMode, UParseError *outParseError, UnicodeString *outReason, UErrorCode &errorCode)
Implements from-rule constructors, and ucol_openRules().
void internalAddContractions(UChar32 c, UnicodeSet &set, UErrorCode &errorCode) const
Adds the contractions that start with character c to the set.
virtual Locale getLocale(ULocDataLocaleType type, UErrorCode &status) const override
Gets the locale of the Collator.
virtual UCollationResult compare(const UnicodeString &source, const UnicodeString &target, int32_t length, UErrorCode &status) const override
Does the same thing as compare but limits the comparison to a specified length.
void internalGetContractionsAndExpansions(UnicodeSet *contractions, UnicodeSet *expansions, UBool addPrefixes, UErrorCode &errorCode) const
Implements ucol_getContractionsAndExpansions().
virtual bool operator==(const Collator &other) const override
Returns true if argument is the same as this object.
const UnicodeString & getRules() const
Gets the tailoring rules for this collator.
virtual ~RuleBasedCollator()
Destructor.
virtual void setReorderCodes(const int32_t *reorderCodes, int32_t reorderCodesLength, UErrorCode &status) override
Sets the ordering of scripts for this collator.
const char * internalGetLocaleID(ULocDataLocaleType type, UErrorCode &errorCode) const
Implements ucol_getLocaleByType().
int32_t cloneBinary(uint8_t *buffer, int32_t capacity, UErrorCode &status) const
Creates a binary image of a collator.
virtual void getVersion(UVersionInfo info) const override
Gets the version information for a Collator.
virtual int32_t internalNextSortKeyPart(UCharIterator *iter, uint32_t state[2], uint8_t *dest, int32_t count, UErrorCode &errorCode) const override
Implements ucol_nextSortKeyPart().
void internalGetCEs(const UnicodeString &str, UVector64 &ces, UErrorCode &errorCode) const
Appends the CEs for the string to the vector.
int32_t getMaxExpansion(int32_t order) const
Returns the maximum length of any expansion sequences that end with the specified comparison order.
virtual int32_t internalGetShortDefinitionString(const char *locale, char *buffer, int32_t capacity, UErrorCode &status) const override
Get the short definition string for a collator.
virtual CollationElementIterator * createCollationElementIterator(const UnicodeString &source) const
Creates a collation element iterator for the source string.
uint8_t * cloneRuleData(int32_t &length, UErrorCode &status) const
Do not use this method: The caller and the ICU library might use different heaps.
RuleBasedCollator(const UnicodeString &rules, ECollationStrength collationStrength, UErrorCode &status)
RuleBasedCollator constructor.
virtual UCollationResult compare(const char16_t *source, int32_t sourceLength, const char16_t *target, int32_t targetLength, UErrorCode &status) const override
The comparison function compares the character data stored in two different string arrays.
RuleBasedCollator(const UnicodeString &rules, UErrorCode &status)
RuleBasedCollator constructor.
virtual UColAttributeValue getAttribute(UColAttribute attr, UErrorCode &status) const override
Universal attribute getter.
virtual UClassID getDynamicClassID(void) const override
Returns a unique class ID POLYMORPHICALLY.
RuleBasedCollator()
Only for use in ucol_openRules().
virtual CollationKey & getCollationKey(const char16_t *source, int32_t sourceLength, CollationKey &key, UErrorCode &status) const override
Transforms a specified region of the string into a series of characters that can be compared with Col...
RuleBasedCollator(const RuleBasedCollator &other)
Copy constructor.
virtual CollationKey & getCollationKey(const UnicodeString &source, CollationKey &key, UErrorCode &status) const override
Transforms the string into a series of characters that can be compared with CollationKey....
virtual UCollationResult compareUTF8(const StringPiece &source, const StringPiece &target, UErrorCode &status) const override
Compares two UTF-8 strings using the Collator.
virtual UColReorderCode getMaxVariable() const override
Returns the maximum reordering group whose characters are affected by UCOL_ALTERNATE_HANDLING.
virtual void setAttribute(UColAttribute attr, UColAttributeValue value, UErrorCode &status) override
Universal attribute setter.
virtual UCollationResult compare(UCharIterator &sIter, UCharIterator &tIter, UErrorCode &status) const override
Compares two strings using the Collator.
virtual Collator & setMaxVariable(UColReorderCode group, UErrorCode &errorCode) override
Sets the variable top to the top of the specified reordering group.
void getRules(UColRuleOption delta, UnicodeString &buffer) const
Returns current rules.
A string-like object that points to a sized piece of memory.
Definition: stringpiece.h:60
A mutable set of Unicode characters and multicharacter strings.
Definition: uniset.h:285
UnicodeString is a string class that stores Unicode characters directly and provides similar function...
Definition: unistr.h:296
C++ API: Collation Service.
C++ API: Locale ID object.
#define U_CALLCONV
Similar to U_CDECL_BEGIN/U_CDECL_END, this qualifier is necessary in callback function typedefs to ma...
Definition: platform.h:870
C API for code unit iteration.
Definition: uiter.h:341
A UParseError struct is used to returned detailed information about parsing errors.
Definition: parseerr.h:58
C API: Collator.
struct UCollator UCollator
structure representing a collator object instance
Definition: ucol.h:61
UColRuleOption
Options for retrieving the rule string.
Definition: ucol.h:362
UColAttribute
Attributes that collation service understands.
Definition: ucol.h:245
@ UCOL_ATTRIBUTE_COUNT
One more than the highest normal UColAttribute value.
Definition: ucol.h:355
UCollationResult
UCOL_LESS is returned if source string is compared to be less than target string in the ucol_strcoll(...
Definition: ucol.h:76
UColAttributeValue
Enum containing attribute values for controlling collation behavior.
Definition: ucol.h:92
UColReorderCode
Enum containing the codes for reordering segments of the collation table that are not script codes.
Definition: ucol.h:149
C API: Unicode Character Iteration.
ULocDataLocaleType
Constants for *_getLocale() Allow user to select whether she wants information on requested,...
Definition: uloc.h:338
int32_t UChar32
Define UChar32 as a type for single Unicode code points.
Definition: umachine.h:467
int8_t UBool
The ICU boolean type, a signed-byte integer.
Definition: umachine.h:269
#define U_FINAL
Defined to the C++11 "final" keyword if available.
Definition: umachine.h:141
void * UClassID
UClassID is used to identify classes without using the compiler's RTTI.
Definition: uobject.h:96
Basic definitions for ICU, for both C and C++ APIs.
UErrorCode
Standard ICU4C error code type, a substitute for exceptions.
Definition: utypes.h:415
#define U_I18N_API
Set to export library symbols from inside the i18n library, and to import them from outside.
Definition: utypes.h:301
uint8_t UVersionInfo[U_MAX_VERSION_LENGTH]
The binary form of a version on ICU APIs is an array of 4 uint8_t.
Definition: uversion.h:59