21 #if U_SHOW_CPLUSPLUS_API
28 #if !UCONFIG_NO_BREAK_ITERATION
40 class LanguageBreakEngine;
41 struct RBBIDataHeader;
42 class RBBIDataWrapper;
43 class UnhandledEngine;
47 #ifndef U_HIDE_INTERNAL_API
103 int32_t* foundBreaks, int32_t foundBreaksCapacity,
129 #ifndef U_HIDE_INTERNAL_API
137 RBBIDataWrapper *fData =
nullptr;
150 int32_t fPosition = 0;
155 int32_t fRuleStatusIndex = 0;
161 BreakCache *fBreakCache =
nullptr;
167 class DictionaryCache;
168 DictionaryCache *fDictionaryCache =
nullptr;
177 UStack *fLanguageBreakEngines =
nullptr;
186 UnhandledEngine *fUnhandledBreakEngine =
nullptr;
193 uint32_t fDictionaryCharCount = 0;
217 int32_t *fLookAheadMatches =
nullptr;
222 UBool fIsPhraseBreaking =
false;
256 friend class RBBIRuleBuilder;
495 virtual int32_t
last()
override;
507 virtual int32_t
next(int32_t n)
override;
514 virtual int32_t
next()
override;
645 #ifndef U_FORCE_HIDE_DEPRECATED_API
737 int32_t handleSafePrevious(int32_t fromPosition);
751 int32_t handleNext();
769 template<
typename RowType, PTrieFunc trieFunc>
770 int32_t handleSafePrevious(int32_t fromPosition);
772 template<
typename RowType, PTrieFunc trieFunc>
773 int32_t handleNext();
783 const LanguageBreakEngine *getLanguageBreakEngine(
UChar32 c,
const char* locale);
786 #ifndef U_HIDE_INTERNAL_API
800 #ifndef U_HIDE_INTERNAL_API
The BreakIterator class implements methods for finding the location of boundaries in text.
virtual bool operator==(const BreakIterator &) const =0
Return true if another object is semantically equal to this one.
Abstract class that defines an API for iteration on text objects.
The ExternalBreakEngine class define an abstract interface for the host environment to provide a low ...
virtual bool isFor(UChar32 c, const char *locale) const =0
virtual int32_t fillBreaks(UText *text, int32_t start, int32_t end, int32_t *foundBreaks, int32_t foundBreaksCapacity, UErrorCode &status) const =0
virtual bool handles(UChar32 c) const =0
virtual ~ExternalBreakEngine()
destructor
A subclass of BreakIterator whose behavior is specified using a list of rules.
virtual int32_t first() override
Sets the current iteration position to the beginning of the text, position zero.
virtual int32_t hashCode() const
Compute a hash code for this BreakIterator.
virtual int32_t following(int32_t offset) override
Sets the iterator to refer to the first boundary position following the specified position.
virtual bool operator==(const BreakIterator &that) const override
Equality operator.
virtual int32_t current() const override
Returns the current iteration position.
static UClassID getStaticClassID()
Returns the class ID for this class.
virtual RuleBasedBreakIterator & refreshInputText(UText *input, UErrorCode &status) override
Set the subject text string upon which the break iterator is operating without changing any other asp...
RuleBasedBreakIterator & operator=(const RuleBasedBreakIterator &that)
Assignment operator.
virtual UClassID getDynamicClassID() const override
Returns a unique class ID POLYMORPHICALLY.
void dumpTables()
Debugging function only.
virtual int32_t next() override
Advances the iterator to the next boundary position.
virtual int32_t getRuleStatus() const override
Return the status tag from the break rule that determined the boundary at the current iteration posit...
virtual RuleBasedBreakIterator * createBufferClone(void *stackBuffer, int32_t &BufferSize, UErrorCode &status) override
Deprecated functionality.
virtual UBool isBoundary(int32_t offset) override
Returns true if the specified position is a boundary position.
bool operator!=(const BreakIterator &that) const
Not-equal operator.
virtual RuleBasedBreakIterator * clone() const override
Returns a newly-constructed RuleBasedBreakIterator with the same behavior, and iterating over the sam...
void dumpCache()
Debugging function only.
static void registerExternalBreakEngine(ExternalBreakEngine *toAdopt, UErrorCode &status)
Register a new external break engine.
RuleBasedBreakIterator()
Default constructor.
virtual int32_t getRuleStatusVec(int32_t *fillInVec, int32_t capacity, UErrorCode &status) override
Get the status (tag) values from the break rule(s) that determined the boundary at the current iterat...
virtual const uint8_t * getBinaryRules(uint32_t &length)
Return the binary form of compiled break rules, which can then be used to create a new break iterator...
virtual int32_t last() override
Sets the current iteration position to the end of the text.
virtual int32_t preceding(int32_t offset) override
Sets the iterator to refer to the last boundary position before the specified position.
RuleBasedBreakIterator(const RuleBasedBreakIterator &that)
Copy constructor.
virtual void adoptText(CharacterIterator *newText) override
Set the iterator to analyze a new piece of text.
virtual UText * getUText(UText *fillIn, UErrorCode &status) const override
Get a UText for the text being analyzed.
virtual int32_t previous() override
Moves the iterator backwards, to the last boundary preceding this one.
RuleBasedBreakIterator(UDataMemory *image, UErrorCode &status)
This constructor uses the udata interface to create a BreakIterator whose internal tables live in a m...
virtual int32_t next(int32_t n) override
Advances the iterator either forward or backward the specified number of steps.
virtual CharacterIterator & getText() const override
virtual const UnicodeString & getRules() const
Returns the description used to create this iterator.
virtual void setText(UText *text, UErrorCode &status) override
Reset the break iterator to operate over the text represented by the UText.
virtual ~RuleBasedBreakIterator()
Destructor.
RuleBasedBreakIterator(const UnicodeString &rules, UParseError &parseError, UErrorCode &status)
Construct a RuleBasedBreakIterator from a set of rules supplied as a string.
RuleBasedBreakIterator(const uint8_t *compiledRules, uint32_t ruleLength, UErrorCode &status)
Construct a RuleBasedBreakIterator from a set of precompiled binary rules.
virtual void setText(const UnicodeString &newText) override
Set the iterator to analyze a new piece of text.
A concrete subclass of CharacterIterator that iterates over the characters (code units or code points...
UObject is the common ICU "boilerplate" class.
UnicodeString is a string class that stores Unicode characters directly and provides similar function...
C API: Parse Error Information.
C++ API: String Character Iterator.
Immutable Unicode code point trie structure.
A UParseError struct is used to returned detailed information about parsing errors.
C API: Data loading interface.
struct UDataMemory UDataMemory
Forward declaration of the data memory type.
int32_t UChar32
Define UChar32 as a type for single Unicode code points.
int8_t UBool
The ICU boolean type, a signed-byte integer.
void * UClassID
UClassID is used to identify classes without using the compiler's RTTI.
#define UTEXT_INITIALIZER
initializer to be used with local (stack) instances of a UText struct.
Basic definitions for ICU, for both C and C++ APIs.
UErrorCode
Standard ICU4C error code type, a substitute for exceptions.
@ U_ZERO_ERROR
No error, no warning.
#define U_COMMON_API
Set to export library symbols from inside the common library, and to import them from outside.