17#ifndef __UCHARSTRIE_H__
18#define __UCHARSTRIE_H__
28#if U_SHOW_CPLUSPLUS_API
37class UCharsTrieBuilder;
71 pos_(uchars_), remainingMatchLength_(-1) {}
87 pos_(
other.pos_), remainingMatchLength_(
other.remainingMatchLength_) {}
96 remainingMatchLength_=-1;
109 return (
static_cast<uint64_t>(remainingMatchLength_ + 2) << kState64RemainingShift) |
110 static_cast<uint64_t>(pos_ - uchars_);
128 remainingMatchLength_ =
static_cast<int32_t>(
state >> kState64RemainingShift) - 2;
129 pos_ = uchars_ + (
state & kState64PosMask);
148 const char16_t *uchars;
161 state.uchars=uchars_;
163 state.remainingMatchLength=remainingMatchLength_;
178 if(uchars_==
state.uchars && uchars_!=
nullptr) {
180 remainingMatchLength_=
state.remainingMatchLength;
201 remainingMatchLength_=-1;
202 return nextImpl(uchars_,
uchar);
259 const char16_t *pos=pos_;
276 const char16_t *pos=pos_;
278 return pos!=
nullptr && findUniqueValue(pos+remainingMatchLength_+1,
false,
uniqueValue);
369 UBool truncateAndStop() {
375 const char16_t *branchNext(
const char16_t *pos,
int32_t length,
UErrorCode &errorCode);
377 const char16_t *uchars_;
378 const char16_t *pos_;
379 const char16_t *initialPos_;
381 int32_t initialRemainingMatchLength_;
399 friend class UCharsTrieBuilder;
407 UCharsTrie(
char16_t *adoptUChars,
const char16_t *trieUChars)
408 : ownedArray_(adoptUChars), uchars_(trieUChars),
409 pos_(uchars_), remainingMatchLength_(-1) {}
412 UCharsTrie &operator=(
const UCharsTrie &other) =
delete;
420 static inline int32_t readValue(
const char16_t *pos, int32_t leadUnit) {
422 if(leadUnit<kMinTwoUnitValueLead) {
424 }
else if(leadUnit<kThreeUnitValueLead) {
425 value=((leadUnit-kMinTwoUnitValueLead)<<16)|*pos;
427 value=(pos[0]<<16)|pos[1];
431 static inline const char16_t *skipValue(
const char16_t *pos, int32_t leadUnit) {
432 if(leadUnit>=kMinTwoUnitValueLead) {
433 if(leadUnit<kThreeUnitValueLead) {
441 static inline const char16_t *skipValue(
const char16_t *pos) {
442 int32_t leadUnit=*pos++;
443 return skipValue(pos, leadUnit&0x7fff);
446 static inline int32_t readNodeValue(
const char16_t *pos, int32_t leadUnit) {
449 if(leadUnit<kMinTwoUnitNodeValueLead) {
450 value=(leadUnit>>6)-1;
451 }
else if(leadUnit<kThreeUnitNodeValueLead) {
452 value=(((leadUnit&0x7fc0)-kMinTwoUnitNodeValueLead)<<10)|*pos;
454 value=(pos[0]<<16)|pos[1];
458 static inline const char16_t *skipNodeValue(
const char16_t *pos, int32_t leadUnit) {
460 if(leadUnit>=kMinTwoUnitNodeValueLead) {
461 if(leadUnit<kThreeUnitNodeValueLead) {
470 static inline const char16_t *jumpByDelta(
const char16_t *pos) {
471 int32_t delta=*pos++;
472 if(delta>=kMinTwoUnitDeltaLead) {
473 if(delta==kThreeUnitDeltaLead) {
474 delta=(pos[0]<<16)|pos[1];
477 delta=((delta-kMinTwoUnitDeltaLead)<<16)|*pos++;
483 static const char16_t *skipDelta(
const char16_t *pos) {
484 int32_t delta=*pos++;
485 if(delta>=kMinTwoUnitDeltaLead) {
486 if(delta==kThreeUnitDeltaLead) {
500 UStringTrieResult branchNext(
const char16_t *pos, int32_t length, int32_t uchar);
508 static const char16_t *findUniqueValueFromBranch(
const char16_t *pos, int32_t length,
509 UBool haveUniqueValue, int32_t &uniqueValue);
512 static UBool findUniqueValue(
const char16_t *pos,
UBool haveUniqueValue, int32_t &uniqueValue);
516 static void getNextBranchUChars(
const char16_t *pos, int32_t length, Appendable &out);
561 static const int32_t kMaxBranchLinearSubNodeLength=5;
564 static const int32_t kMinLinearMatch=0x30;
565 static const int32_t kMaxLinearMatchLength=0x10;
570 static const int32_t kMinValueLead=kMinLinearMatch+kMaxLinearMatchLength;
571 static const int32_t kNodeTypeMask=kMinValueLead-1;
574 static const int32_t kValueIsFinal=0x8000;
577 static const int32_t kMaxOneUnitValue=0x3fff;
579 static const int32_t kMinTwoUnitValueLead=kMaxOneUnitValue+1;
580 static const int32_t kThreeUnitValueLead=0x7fff;
582 static const int32_t kMaxTwoUnitValue=((kThreeUnitValueLead-kMinTwoUnitValueLead)<<16)-1;
585 static const int32_t kMaxOneUnitNodeValue=0xff;
586 static const int32_t kMinTwoUnitNodeValueLead=kMinValueLead+((kMaxOneUnitNodeValue+1)<<6);
587 static const int32_t kThreeUnitNodeValueLead=0x7fc0;
589 static const int32_t kMaxTwoUnitNodeValue=
590 ((kThreeUnitNodeValueLead-kMinTwoUnitNodeValueLead)<<10)-1;
593 static const int32_t kMaxOneUnitDelta=0xfbff;
594 static const int32_t kMinTwoUnitDeltaLead=kMaxOneUnitDelta+1;
595 static const int32_t kThreeUnitDeltaLead=0xffff;
597 static const int32_t kMaxTwoUnitDelta=((kThreeUnitDeltaLead-kMinTwoUnitDeltaLead)<<16)-1;
603 static constexpr int32_t kState64RemainingShift = 59;
604 static constexpr uint64_t kState64PosMask = (
UINT64_C(1) << kState64RemainingShift) - 1;
606 char16_t *ownedArray_;
609 const char16_t *uchars_;
614 const char16_t *pos_;
616 int32_t remainingMatchLength_;
Base class for objects to which Unicode characters and strings can be appended.
const char16_t * wrapper with implicit conversion from distinct but bit-compatible pointer types.
"Smart pointer" base class; do not use directly: use LocalPointer etc.
Iterator for all of the (string, value) pairs in a UCharsTrie.
Iterator(ConstChar16Ptr trieUChars, int32_t maxStringLength, UErrorCode &errorCode)
Iterates from the root of a char16_t-serialized UCharsTrie.
Iterator & reset()
Resets this iterator to its initial state.
const UnicodeString & getString() const
UBool next(UErrorCode &errorCode)
Finds the next (string, value) pair if there is one.
Iterator(const UCharsTrie &trie, int32_t maxStringLength, UErrorCode &errorCode)
Iterates from the current state of the specified UCharsTrie.
UCharsTrie state object, for saving a trie's current state and resetting the trie back to this state ...
State()
Constructs an empty State.
Light-weight, non-const reader class for a UCharsTrie.
const UCharsTrie & saveState(State &state) const
Saves the state of this trie.
UCharsTrie(ConstChar16Ptr trieUChars)
Constructs a UCharsTrie reader instance.
UCharsTrie(const UCharsTrie &other)
Copy constructor, copies the other trie reader object and its state, but not the char16_t array which...
UBool hasUniqueValue(int32_t &uniqueValue) const
Determines whether all strings reachable from the current state map to the same value.
uint64_t getState64() const
Returns the state of this trie as a 64-bit integer.
UCharsTrie & reset()
Resets this trie to its initial state.
UCharsTrie & resetToState(const State &state)
Resets this trie to the saved state.
UStringTrieResult next(int32_t uchar)
Traverses the trie from the current state for this input char16_t.
UStringTrieResult nextForCodePoint(UChar32 cp)
Traverses the trie from the current state for the one or two UTF-16 code units for this input code po...
int32_t getValue() const
Returns a matching string's value if called immediately after current()/first()/next() returned USTRI...
UStringTrieResult firstForCodePoint(UChar32 cp)
Traverses the trie from the initial state for the one or two UTF-16 code units for this input code po...
UStringTrieResult current() const
Determines whether the string so far matches, whether it has a value, and whether another input char1...
UStringTrieResult first(int32_t uchar)
Traverses the trie from the initial state for this input char16_t.
UCharsTrie & resetToState64(uint64_t state)
Resets this trie to the saved state.
UStringTrieResult next(ConstChar16Ptr s, int32_t length)
Traverses the trie from the current state for this string.
int32_t getNextUChars(Appendable &out) const
Finds each char16_t which continues the string from the current state.
UMemory is the common ICU base class.
UnicodeString is a string class that stores Unicode characters directly and provides similar function...
int32_t UChar32
Define UChar32 as a type for single Unicode code points.
#define UINT64_C(c)
Provides a platform independent way to specify an unsigned 64-bit integer constant.
int8_t UBool
The ICU boolean type, a signed-byte integer.
C++ API: Common ICU base class UObject.
C API: Helper definitions for dictionary trie APIs.
UStringTrieResult
Return values for BytesTrie::next(), UCharsTrie::next() and similar methods.
@ USTRINGTRIE_INTERMEDIATE_VALUE
The input unit(s) continued a matching string and there is a value for the string so far.
Basic definitions for ICU, for both C and C++ APIs.
UErrorCode
Standard ICU4C error code type, a substitute for exceptions.
#define U_COMMON_API
Set to export library symbols from inside the common library, and to import them from outside.