21#if U_SHOW_CPLUSPLUS_API
28#if !UCONFIG_NO_BREAK_ITERATION
40class LanguageBreakEngine;
47#ifndef U_HIDE_INTERNAL_API
129#ifndef U_HIDE_INTERNAL_API
177 UStack *fLanguageBreakEngines =
nullptr;
217 int32_t *fLookAheadMatches =
nullptr;
222 UBool fIsPhraseBreaking =
false;
256 friend class RBBIRuleBuilder;
645#ifndef U_FORCE_HIDE_DEPRECATED_API
769 template<
typename RowType, PTrieFunc trieFunc>
772 template<
typename RowType, PTrieFunc trieFunc>
786#ifndef U_HIDE_INTERNAL_API
800#ifndef U_HIDE_INTERNAL_API
The BreakIterator class implements methods for finding the location of boundaries in text.
virtual bool operator==(const BreakIterator &) const =0
Return true if another object is semantically equal to this one.
Abstract class that defines an API for iteration on text objects.
The ExternalBreakEngine class define an abstract interface for the host environment to provide a low ...
virtual bool isFor(UChar32 c, const char *locale) const =0
virtual int32_t fillBreaks(UText *text, int32_t start, int32_t end, int32_t *foundBreaks, int32_t foundBreaksCapacity, UErrorCode &status) const =0
virtual bool handles(UChar32 c) const =0
virtual ~ExternalBreakEngine()
destructor
"Smart pointer" base class; do not use directly: use LocalPointer etc.
A subclass of BreakIterator whose behavior is specified using a list of rules.
virtual const UnicodeString & getRules() const
Returns the description used to create this iterator.
virtual int32_t first() override
Sets the current iteration position to the beginning of the text, position zero.
virtual int32_t hashCode() const
Compute a hash code for this BreakIterator.
virtual int32_t following(int32_t offset) override
Sets the iterator to refer to the first boundary position following the specified position.
virtual bool operator==(const BreakIterator &that) const override
Equality operator.
virtual int32_t current() const override
Returns the current iteration position.
static UClassID getStaticClassID()
Returns the class ID for this class.
virtual RuleBasedBreakIterator * clone() const override
Returns a newly-constructed RuleBasedBreakIterator with the same behavior, and iterating over the sam...
virtual RuleBasedBreakIterator * createBufferClone(void *stackBuffer, int32_t &BufferSize, UErrorCode &status) override
Deprecated functionality.
virtual UClassID getDynamicClassID() const override
Returns a unique class ID POLYMORPHICALLY.
void dumpTables()
Debugging function only.
virtual int32_t next() override
Advances the iterator to the next boundary position.
virtual int32_t getRuleStatus() const override
Return the status tag from the break rule that determined the boundary at the current iteration posit...
virtual UText * getUText(UText *fillIn, UErrorCode &status) const override
Get a UText for the text being analyzed.
virtual UBool isBoundary(int32_t offset) override
Returns true if the specified position is a boundary position.
bool operator!=(const BreakIterator &that) const
Not-equal operator.
virtual CharacterIterator & getText() const override
void dumpCache()
Debugging function only.
static void registerExternalBreakEngine(ExternalBreakEngine *toAdopt, UErrorCode &status)
Register a new external break engine.
virtual RuleBasedBreakIterator & refreshInputText(UText *input, UErrorCode &status) override
Set the subject text string upon which the break iterator is operating without changing any other asp...
RuleBasedBreakIterator()
Default constructor.
virtual int32_t getRuleStatusVec(int32_t *fillInVec, int32_t capacity, UErrorCode &status) override
Get the status (tag) values from the break rule(s) that determined the boundary at the current iterat...
RuleBasedBreakIterator & operator=(const RuleBasedBreakIterator &that)
Assignment operator.
virtual int32_t last() override
Sets the current iteration position to the end of the text.
virtual int32_t preceding(int32_t offset) override
Sets the iterator to refer to the last boundary position before the specified position.
RuleBasedBreakIterator(const RuleBasedBreakIterator &that)
Copy constructor.
virtual void adoptText(CharacterIterator *newText) override
Set the iterator to analyze a new piece of text.
virtual int32_t previous() override
Moves the iterator backwards, to the last boundary preceding this one.
RuleBasedBreakIterator(UDataMemory *image, UErrorCode &status)
This constructor uses the udata interface to create a BreakIterator whose internal tables live in a m...
virtual int32_t next(int32_t n) override
Advances the iterator either forward or backward the specified number of steps.
virtual void setText(UText *text, UErrorCode &status) override
Reset the break iterator to operate over the text represented by the UText.
virtual ~RuleBasedBreakIterator()
Destructor.
RuleBasedBreakIterator(const UnicodeString &rules, UParseError &parseError, UErrorCode &status)
Construct a RuleBasedBreakIterator from a set of rules supplied as a string.
virtual const uint8_t * getBinaryRules(uint32_t &length)
Return the binary form of compiled break rules, which can then be used to create a new break iterator...
RuleBasedBreakIterator(const uint8_t *compiledRules, uint32_t ruleLength, UErrorCode &status)
Construct a RuleBasedBreakIterator from a set of precompiled binary rules.
virtual void setText(const UnicodeString &newText) override
Set the iterator to analyze a new piece of text.
A concrete subclass of CharacterIterator that iterates over the characters (code units or code points...
UObject is the common ICU "boilerplate" class.
UnicodeString is a string class that stores Unicode characters directly and provides similar function...
C API: Parse Error Information.
C++ API: String Character Iterator.
Immutable Unicode code point trie structure.
A UParseError struct is used to returned detailed information about parsing errors.
C API: Data loading interface.
struct UDataMemory UDataMemory
Forward declaration of the data memory type.
int32_t UChar32
Define UChar32 as a type for single Unicode code points.
int8_t UBool
The ICU boolean type, a signed-byte integer.
void * UClassID
UClassID is used to identify classes without using the compiler's RTTI.
#define UTEXT_INITIALIZER
initializer to be used with local (stack) instances of a UText struct.
Basic definitions for ICU, for both C and C++ APIs.
UErrorCode
Standard ICU4C error code type, a substitute for exceptions.
@ U_ZERO_ERROR
No error, no warning.
#define U_COMMON_API
Set to export library symbols from inside the common library, and to import them from outside.