ICU 77.1  77.1
All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
coleitr.h
Go to the documentation of this file.
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4  ******************************************************************************
5  * Copyright (C) 1997-2014, International Business Machines
6  * Corporation and others. All Rights Reserved.
7  ******************************************************************************
8  */
9 
33 #ifndef COLEITR_H
34 #define COLEITR_H
35 
36 #include "unicode/utypes.h"
37 
38 #if U_SHOW_CPLUSPLUS_API
39 
40 #if !UCONFIG_NO_COLLATION
41 
42 #include "unicode/unistr.h"
43 #include "unicode/uobject.h"
44 
45 struct UCollationElements;
46 struct UHashtable;
47 
48 U_NAMESPACE_BEGIN
49 
50 struct CollationData;
51 
52 class CharacterIterator;
53 class CollationIterator;
54 class RuleBasedCollator;
55 class UCollationPCE;
56 class UVector32;
57 
122 public:
123 
124  // CollationElementIterator public data member ------------------------------
125 
126  enum {
131  NULLORDER = static_cast<int32_t>(0xffffffff)
132  };
133 
134  // CollationElementIterator public constructor/destructor -------------------
135 
143 
149 
150  // CollationElementIterator public methods ----------------------------------
151 
159  bool operator==(const CollationElementIterator& other) const;
160 
168  bool operator!=(const CollationElementIterator& other) const;
169 
174  void reset();
175 
183  int32_t next(UErrorCode& status);
184 
192  int32_t previous(UErrorCode& status);
193 
200  static inline int32_t primaryOrder(int32_t order);
201 
208  static inline int32_t secondaryOrder(int32_t order);
209 
216  static inline int32_t tertiaryOrder(int32_t order);
217 
227  int32_t getMaxExpansion(int32_t order) const;
228 
235  int32_t strengthOrder(int32_t order) const;
236 
243  void setText(const UnicodeString& str, UErrorCode& status);
244 
251  void setText(CharacterIterator& str, UErrorCode& status);
252 
259  static inline UBool isIgnorable(int32_t order);
260 
266  int32_t getOffset() const;
267 
275  void setOffset(int32_t newOffset, UErrorCode& status);
276 
282  virtual UClassID getDynamicClassID() const override;
283 
289  static UClassID U_EXPORT2 getStaticClassID();
290 
291 #ifndef U_HIDE_INTERNAL_API
294  return reinterpret_cast<CollationElementIterator *>(uc);
295  }
298  return reinterpret_cast<const CollationElementIterator *>(uc);
299  }
302  return reinterpret_cast<UCollationElements *>(this);
303  }
305  inline const UCollationElements *toUCollationElements() const {
306  return reinterpret_cast<const UCollationElements *>(this);
307  }
308 #endif // U_HIDE_INTERNAL_API
309 
310 private:
311  friend class RuleBasedCollator;
312  friend class UCollationPCE;
313 
323  CollationElementIterator(const UnicodeString& sourceText,
324  const RuleBasedCollator* order, UErrorCode& status);
325  // Note: The constructors should take settings & tailoring, not a collator,
326  // to avoid circular dependencies.
327  // However, for operator==() we would need to be able to compare tailoring data for equality
328  // without making CollationData or CollationTailoring depend on TailoredSet.
329  // (See the implementation of RuleBasedCollator::operator==().)
330  // That might require creating an intermediate class that would be used
331  // by both CollationElementIterator and RuleBasedCollator
332  // but only contain the part of RBC== related to data and rules.
333 
344  const RuleBasedCollator* order, UErrorCode& status);
345 
352  operator=(const CollationElementIterator& other);
353 
354  CollationElementIterator() = delete; // default constructor not implemented
355 
357  inline int8_t normalizeDir() const { return dir_ == 1 ? 0 : dir_; }
358 
359  static UHashtable *computeMaxExpansions(const CollationData *data, UErrorCode &errorCode);
360 
361  static int32_t getMaxExpansion(const UHashtable *maxExpansions, int32_t order);
362 
363  // CollationElementIterator private data members ----------------------------
364 
365  CollationIterator *iter_; // owned
366  const RuleBasedCollator *rbc_; // aliased
367  uint32_t otherHalf_;
372  int8_t dir_;
378  UVector32 *offsets_;
379 
380  UnicodeString string_;
381 };
382 
383 // CollationElementIterator inline method definitions --------------------------
384 
385 inline int32_t CollationElementIterator::primaryOrder(int32_t order)
386 {
387  return (order >> 16) & 0xffff;
388 }
389 
390 inline int32_t CollationElementIterator::secondaryOrder(int32_t order)
391 {
392  return (order >> 8) & 0xff;
393 }
394 
395 inline int32_t CollationElementIterator::tertiaryOrder(int32_t order)
396 {
397  return order & 0xff;
398 }
399 
400 inline UBool CollationElementIterator::isIgnorable(int32_t order)
401 {
402  return (order & 0xffff0000) == 0;
403 }
404 
405 U_NAMESPACE_END
406 
407 #endif /* #if !UCONFIG_NO_COLLATION */
408 
409 #endif /* U_SHOW_CPLUSPLUS_API */
410 
411 #endif
Abstract class that defines an API for iteration on text objects.
Definition: chariter.h:361
The CollationElementIterator class is used as an iterator to walk through each character of an inte...
Definition: coleitr.h:121
static CollationElementIterator * fromUCollationElements(UCollationElements *uc)
Definition: coleitr.h:293
UCollationElements * toUCollationElements()
Definition: coleitr.h:301
static UClassID getStaticClassID()
ICU "poor man's RTTI", returns a UClassID for this class.
int32_t strengthOrder(int32_t order) const
Gets the comparison order in the desired strength.
int32_t previous(UErrorCode &status)
Get the ordering priority of the previous collation element in the string.
virtual UClassID getDynamicClassID() const override
ICU "poor man's RTTI", returns a UClassID for the actual class.
bool operator==(const CollationElementIterator &other) const
Returns true if "other" is the same as "this".
const UCollationElements * toUCollationElements() const
Definition: coleitr.h:305
void setText(CharacterIterator &str, UErrorCode &status)
Sets the source string.
bool operator!=(const CollationElementIterator &other) const
Returns true if "other" is not the same as "this".
int32_t getOffset() const
Gets the offset of the currently processed character in the source string.
CollationElementIterator(const CollationElementIterator &other)
Copy constructor.
void setText(const UnicodeString &str, UErrorCode &status)
Sets the source string.
int32_t getMaxExpansion(int32_t order) const
Return the maximum length of any expansion sequences that end with the specified comparison order.
int32_t next(UErrorCode &status)
Gets the ordering priority of the next character in the string.
static const CollationElementIterator * fromUCollationElements(const UCollationElements *uc)
Definition: coleitr.h:297
void reset()
Resets the cursor to the beginning of the string.
void setOffset(int32_t newOffset, UErrorCode &status)
Sets the offset of the currently processed character in the source string.
virtual ~CollationElementIterator()
Destructor.
The RuleBasedCollator class provides the implementation of Collator, using data-driven tables.
Definition: tblcoll.h:115
UObject is the common ICU "boilerplate" class.
Definition: uobject.h:223
UnicodeString is a string class that stores Unicode characters directly and provides similar function...
Definition: unistr.h:296
struct UHashtable UHashtable
Definition: msgfmt.h:43
struct UCollationElements UCollationElements
The UCollationElements struct.
Definition: ucoleitr.h:39
int8_t UBool
The ICU boolean type, a signed-byte integer.
Definition: umachine.h:247
C++ API: Unicode String.
C++ API: Common ICU base class UObject.
void * UClassID
UClassID is used to identify classes without using the compiler's RTTI.
Definition: uobject.h:96
Basic definitions for ICU, for both C and C++ APIs.
UErrorCode
Standard ICU4C error code type, a substitute for exceptions.
Definition: utypes.h:430
#define U_I18N_API
Set to export library symbols from inside the i18n library, and to import them from outside.
Definition: utypes.h:316