ICU 76.1 76.1
|
class RegexMatcher bundles together a regular expression pattern and input text to which the expression can be applied. More...
#include <regex.h>
Public Member Functions | |
RegexMatcher (const UnicodeString ®exp, uint32_t flags, UErrorCode &status) | |
Construct a RegexMatcher for a regular expression. | |
RegexMatcher (UText *regexp, uint32_t flags, UErrorCode &status) | |
Construct a RegexMatcher for a regular expression. | |
RegexMatcher (const UnicodeString ®exp, const UnicodeString &input, uint32_t flags, UErrorCode &status) | |
Construct a RegexMatcher for a regular expression. | |
RegexMatcher (UText *regexp, UText *input, uint32_t flags, UErrorCode &status) | |
Construct a RegexMatcher for a regular expression. | |
virtual | ~RegexMatcher () |
Destructor. | |
virtual UBool | matches (UErrorCode &status) |
Attempts to match the entire input region against the pattern. | |
virtual UBool | matches (int64_t startIndex, UErrorCode &status) |
Resets the matcher, then attempts to match the input beginning at the specified startIndex, and extending to the end of the input. | |
virtual UBool | lookingAt (UErrorCode &status) |
Attempts to match the input string, starting from the beginning of the region, against the pattern. | |
virtual UBool | lookingAt (int64_t startIndex, UErrorCode &status) |
Attempts to match the input string, starting from the specified index, against the pattern. | |
virtual UBool | find () |
Find the next pattern match in the input string. | |
virtual UBool | find (UErrorCode &status) |
Find the next pattern match in the input string. | |
virtual UBool | find (int64_t start, UErrorCode &status) |
Resets this RegexMatcher and then attempts to find the next substring of the input string that matches the pattern, starting at the specified index. | |
virtual UnicodeString | group (UErrorCode &status) const |
Returns a string containing the text matched by the previous match. | |
virtual UnicodeString | group (int32_t groupNum, UErrorCode &status) const |
Returns a string containing the text captured by the given group during the previous match operation. | |
virtual int32_t | groupCount () const |
Returns the number of capturing groups in this matcher's pattern. | |
virtual UText * | group (UText *dest, int64_t &group_len, UErrorCode &status) const |
Returns a shallow clone of the entire live input string with the UText current native index set to the beginning of the requested group. | |
virtual UText * | group (int32_t groupNum, UText *dest, int64_t &group_len, UErrorCode &status) const |
Returns a shallow clone of the entire live input string with the UText current native index set to the beginning of the requested group. | |
virtual int32_t | start (UErrorCode &status) const |
Returns the index in the input string of the start of the text matched during the previous match operation. | |
virtual int64_t | start64 (UErrorCode &status) const |
Returns the index in the input string of the start of the text matched during the previous match operation. | |
virtual int32_t | start (int32_t group, UErrorCode &status) const |
Returns the index in the input string of the start of the text matched by the specified capture group during the previous match operation. | |
virtual int64_t | start64 (int32_t group, UErrorCode &status) const |
Returns the index in the input string of the start of the text matched by the specified capture group during the previous match operation. | |
virtual int32_t | end (UErrorCode &status) const |
Returns the index in the input string of the first character following the text matched during the previous match operation. | |
virtual int64_t | end64 (UErrorCode &status) const |
Returns the index in the input string of the first character following the text matched during the previous match operation. | |
virtual int32_t | end (int32_t group, UErrorCode &status) const |
Returns the index in the input string of the character following the text matched by the specified capture group during the previous match operation. | |
virtual int64_t | end64 (int32_t group, UErrorCode &status) const |
Returns the index in the input string of the character following the text matched by the specified capture group during the previous match operation. | |
virtual RegexMatcher & | reset () |
Resets this matcher. | |
virtual RegexMatcher & | reset (int64_t index, UErrorCode &status) |
Resets this matcher, and set the current input position. | |
virtual RegexMatcher & | reset (const UnicodeString &input) |
Resets this matcher with a new input string. | |
virtual RegexMatcher & | reset (UText *input) |
Resets this matcher with a new input string. | |
virtual RegexMatcher & | refreshInputText (UText *input, UErrorCode &status) |
Set the subject text string upon which the regular expression is looking for matches without changing any other aspect of the matching state. | |
virtual const UnicodeString & | input () const |
Returns the input string being matched. | |
virtual UText * | inputText () const |
Returns the input string being matched. | |
virtual UText * | getInput (UText *dest, UErrorCode &status) const |
Returns the input string being matched, either by copying it into the provided UText parameter or by returning a shallow clone of the live input. | |
virtual RegexMatcher & | region (int64_t start, int64_t limit, UErrorCode &status) |
Sets the limits of this matcher's region. | |
virtual RegexMatcher & | region (int64_t regionStart, int64_t regionLimit, int64_t startIndex, UErrorCode &status) |
Identical to region(start, limit, status) but also allows a start position without resetting the region state. | |
virtual int32_t | regionStart () const |
Reports the start index of this matcher's region. | |
virtual int64_t | regionStart64 () const |
Reports the start index of this matcher's region. | |
virtual int32_t | regionEnd () const |
Reports the end (limit) index (exclusive) of this matcher's region. | |
virtual int64_t | regionEnd64 () const |
Reports the end (limit) index (exclusive) of this matcher's region. | |
virtual UBool | hasTransparentBounds () const |
Queries the transparency of region bounds for this matcher. | |
virtual RegexMatcher & | useTransparentBounds (UBool b) |
Sets the transparency of region bounds for this matcher. | |
virtual UBool | hasAnchoringBounds () const |
Return true if this matcher is using anchoring bounds. | |
virtual RegexMatcher & | useAnchoringBounds (UBool b) |
Set whether this matcher is using Anchoring Bounds for its region. | |
virtual UBool | hitEnd () const |
Return true if the most recent matching operation attempted to access additional input beyond the available input text. | |
virtual UBool | requireEnd () const |
Return true the most recent match succeeded and additional input could cause it to fail. | |
virtual const RegexPattern & | pattern () const |
Returns the pattern that is interpreted by this matcher. | |
virtual UnicodeString | replaceAll (const UnicodeString &replacement, UErrorCode &status) |
Replaces every substring of the input that matches the pattern with the given replacement string. | |
virtual UText * | replaceAll (UText *replacement, UText *dest, UErrorCode &status) |
Replaces every substring of the input that matches the pattern with the given replacement string. | |
virtual UnicodeString | replaceFirst (const UnicodeString &replacement, UErrorCode &status) |
Replaces the first substring of the input that matches the pattern with the replacement string. | |
virtual UText * | replaceFirst (UText *replacement, UText *dest, UErrorCode &status) |
Replaces the first substring of the input that matches the pattern with the replacement string. | |
virtual RegexMatcher & | appendReplacement (UnicodeString &dest, const UnicodeString &replacement, UErrorCode &status) |
Implements a replace operation intended to be used as part of an incremental find-and-replace. | |
virtual RegexMatcher & | appendReplacement (UText *dest, UText *replacement, UErrorCode &status) |
Implements a replace operation intended to be used as part of an incremental find-and-replace. | |
virtual UnicodeString & | appendTail (UnicodeString &dest) |
As the final step in a find-and-replace operation, append the remainder of the input string, starting at the position following the last appendReplacement(), to the destination string. | |
virtual UText * | appendTail (UText *dest, UErrorCode &status) |
As the final step in a find-and-replace operation, append the remainder of the input string, starting at the position following the last appendReplacement(), to the destination string. | |
virtual int32_t | split (const UnicodeString &input, UnicodeString dest[], int32_t destCapacity, UErrorCode &status) |
Split a string into fields. | |
virtual int32_t | split (UText *input, UText *dest[], int32_t destCapacity, UErrorCode &status) |
Split a string into fields. | |
virtual void | setTimeLimit (int32_t limit, UErrorCode &status) |
Set a processing time limit for match operations with this Matcher. | |
virtual int32_t | getTimeLimit () const |
Get the time limit, if any, for match operations made with this Matcher. | |
virtual void | setStackLimit (int32_t limit, UErrorCode &status) |
Set the amount of heap storage available for use by the match backtracking stack. | |
virtual int32_t | getStackLimit () const |
Get the size of the heap storage available for use by the back tracking stack. | |
virtual void | setMatchCallback (URegexMatchCallback *callback, const void *context, UErrorCode &status) |
Set a callback function for use with this Matcher. | |
virtual void | getMatchCallback (URegexMatchCallback *&callback, const void *&context, UErrorCode &status) |
Get the callback function for this URegularExpression. | |
virtual void | setFindProgressCallback (URegexFindProgressCallback *callback, const void *context, UErrorCode &status) |
Set a progress callback function for use with find operations on this Matcher. | |
virtual void | getFindProgressCallback (URegexFindProgressCallback *&callback, const void *&context, UErrorCode &status) |
Get the find progress callback function for this URegularExpression. | |
void | setTrace (UBool state) |
setTrace Debug function, enable/disable tracing of the matching engine. | |
virtual UClassID | getDynamicClassID () const override |
ICU "poor man's RTTI", returns a UClassID for the actual class. | |
void | resetPreserveRegion () |
Public Member Functions inherited from icu::UObject | |
virtual | ~UObject () |
Destructor. | |
Static Public Member Functions | |
static UClassID | getStaticClassID () |
ICU "poor man's RTTI", returns a UClassID for this class. | |
Friends | |
class | RegexPattern |
class | RegexCImpl |
class RegexMatcher bundles together a regular expression pattern and input text to which the expression can be applied.
It includes methods for testing for matches, and for find and replace operations.
Class RegexMatcher is not intended to be subclassed.
icu::RegexMatcher::RegexMatcher | ( | const UnicodeString & | regexp, |
uint32_t | flags, | ||
UErrorCode & | status | ||
) |
Construct a RegexMatcher for a regular expression.
This is a convenience method that avoids the need to explicitly create a RegexPattern object. Note that if several RegexMatchers need to be created for the same expression, it will be more efficient to separately create and cache a RegexPattern object, and use its matcher() method to create the RegexMatcher objects.
regexp | The Regular Expression to be compiled. |
flags | URegexpFlag options, such as UREGEX_CASE_INSENSITIVE. |
status | Any errors are reported by setting this UErrorCode variable. |
icu::RegexMatcher::RegexMatcher | ( | UText * | regexp, |
uint32_t | flags, | ||
UErrorCode & | status | ||
) |
Construct a RegexMatcher for a regular expression.
This is a convenience method that avoids the need to explicitly create a RegexPattern object. Note that if several RegexMatchers need to be created for the same expression, it will be more efficient to separately create and cache a RegexPattern object, and use its matcher() method to create the RegexMatcher objects.
regexp | The regular expression to be compiled. |
flags | URegexpFlag options, such as UREGEX_CASE_INSENSITIVE. |
status | Any errors are reported by setting this UErrorCode variable. |
icu::RegexMatcher::RegexMatcher | ( | const UnicodeString & | regexp, |
const UnicodeString & | input, | ||
uint32_t | flags, | ||
UErrorCode & | status | ||
) |
Construct a RegexMatcher for a regular expression.
This is a convenience method that avoids the need to explicitly create a RegexPattern object. Note that if several RegexMatchers need to be created for the same expression, it will be more efficient to separately create and cache a RegexPattern object, and use its matcher() method to create the RegexMatcher objects.
The matcher will retain a reference to the supplied input string, and all regexp pattern matching operations happen directly on the original string. It is critical that the string not be altered or deleted before use by the regular expression operations is complete.
regexp | The Regular Expression to be compiled. |
input | The string to match. The matcher retains a reference to the caller's string; mo copy is made. |
flags | URegexpFlag options, such as UREGEX_CASE_INSENSITIVE. |
status | Any errors are reported by setting this UErrorCode variable. |
icu::RegexMatcher::RegexMatcher | ( | UText * | regexp, |
UText * | input, | ||
uint32_t | flags, | ||
UErrorCode & | status | ||
) |
Construct a RegexMatcher for a regular expression.
This is a convenience method that avoids the need to explicitly create a RegexPattern object. Note that if several RegexMatchers need to be created for the same expression, it will be more efficient to separately create and cache a RegexPattern object, and use its matcher() method to create the RegexMatcher objects.
The matcher will make a shallow clone of the supplied input text, and all regexp pattern matching operations happen on this clone. While read-only operations on the supplied text are permitted, it is critical that the underlying string not be altered or deleted before use by the regular expression operations is complete.
regexp | The Regular Expression to be compiled. |
input | The string to match. The matcher retains a shallow clone of the text. |
flags | URegexpFlag options, such as UREGEX_CASE_INSENSITIVE. |
status | Any errors are reported by setting this UErrorCode variable. |
|
virtual |
Implements a replace operation intended to be used as part of an incremental find-and-replace.
The input string, starting from the end of the previous replacement and ending at the start of the current match, is appended to the destination string. Then the replacement string is appended to the output string, including handling any substitutions of captured text.
For simple, prepackaged, non-incremental find-and-replace operations, see replaceFirst() or replaceAll().
dest | A UnicodeString to which the results of the find-and-replace are appended. |
replacement | A UnicodeString that provides the text to be substituted for the input text that matched the regexp pattern. The replacement text may contain references to captured text from the input. |
status | A reference to a UErrorCode to receive any errors. Possible errors are U_REGEX_INVALID_STATE if no match has been attempted or the last match failed, and U_INDEX_OUTOFBOUNDS_ERROR if the replacement text specifies a capture group that does not exist in the pattern. |
|
virtual |
Implements a replace operation intended to be used as part of an incremental find-and-replace.
The input string, starting from the end of the previous replacement and ending at the start of the current match, is appended to the destination string. Then the replacement string is appended to the output string, including handling any substitutions of captured text.
For simple, prepackaged, non-incremental find-and-replace operations, see replaceFirst() or replaceAll().
dest | A mutable UText to which the results of the find-and-replace are appended. Must not be nullptr. |
replacement | A UText that provides the text to be substituted for the input text that matched the regexp pattern. The replacement text may contain references to captured text from the input. |
status | A reference to a UErrorCode to receive any errors. Possible errors are U_REGEX_INVALID_STATE if no match has been attempted or the last match failed, and U_INDEX_OUTOFBOUNDS_ERROR if the replacement text specifies a capture group that does not exist in the pattern. |
|
virtual |
As the final step in a find-and-replace operation, append the remainder of the input string, starting at the position following the last appendReplacement(), to the destination string.
appendTail()
is intended to be invoked after one or more invocations of the RegexMatcher::appendReplacement()
.
dest | A UnicodeString to which the results of the find-and-replace are appended. |
|
virtual |
As the final step in a find-and-replace operation, append the remainder of the input string, starting at the position following the last appendReplacement(), to the destination string.
appendTail()
is intended to be invoked after one or more invocations of the RegexMatcher::appendReplacement()
.
dest | A mutable UText to which the results of the find-and-replace are appended. Must not be nullptr. |
status | error cod |
|
virtual |
Returns the index in the input string of the character following the text matched by the specified capture group during the previous match operation.
group | the capture group number |
status | A reference to a UErrorCode to receive any errors. Possible errors are U_REGEX_INVALID_STATE if no match has been attempted or the last match failed and U_INDEX_OUTOFBOUNDS_ERROR for a bad capture group number |
|
virtual |
Returns the index in the input string of the first character following the text matched during the previous match operation.
status | A reference to a UErrorCode to receive any errors. Possible errors are U_REGEX_INVALID_STATE if no match has been attempted or the last match failed. |
|
virtual |
Returns the index in the input string of the character following the text matched by the specified capture group during the previous match operation.
group | the capture group number |
status | A reference to a UErrorCode to receive any errors. Possible errors are U_REGEX_INVALID_STATE if no match has been attempted or the last match failed and U_INDEX_OUTOFBOUNDS_ERROR for a bad capture group number |
|
virtual |
Returns the index in the input string of the first character following the text matched during the previous match operation.
status | A reference to a UErrorCode to receive any errors. Possible errors are U_REGEX_INVALID_STATE if no match has been attempted or the last match failed. |
Find the next pattern match in the input string.
The find begins searching the input at the location following the end of the previous match, or at the start of the string if there is no previous match. If a match is found, start()
, end()
and group()
will provide more information regarding the match. Note that if the input string is changed by the application, use find(startPos, status) instead of find(), because the saved starting position may not be valid with the altered input string.
|
virtual |
Resets this RegexMatcher and then attempts to find the next substring of the input string that matches the pattern, starting at the specified index.
start | The (native) index in the input string to begin the search. |
status | A reference to a UErrorCode to receive any errors. |
|
virtual |
Find the next pattern match in the input string.
The find begins searching the input at the location following the end of the previous match, or at the start of the string if there is no previous match. If a match is found, start()
, end()
and group()
will provide more information regarding the match.
Note that if the input string is changed by the application, use find(startPos, status) instead of find(), because the saved starting position may not be valid with the altered input string.
status | A reference to a UErrorCode to receive any errors. |
ICU "poor man's RTTI", returns a UClassID for the actual class.
Reimplemented from icu::UObject.
|
virtual |
Get the find progress callback function for this URegularExpression.
callback | Out parameter, receives a pointer to the user-supplied callback function. |
context | Out parameter, receives the user context pointer that was set when uregex_setFindProgressCallback() was called. |
status | A reference to a UErrorCode to receive any errors. |
|
virtual |
Returns the input string being matched, either by copying it into the provided UText parameter or by returning a shallow clone of the live input.
Note that copying the entire input may cause significant performance and memory issues.
dest | The UText into which the input should be copied, or nullptr to create a new UText |
status | error code |
|
virtual |
Get the callback function for this URegularExpression.
callback | Out parameter, receives a pointer to the user-supplied callback function. |
context | Out parameter, receives the user context pointer that was set when uregex_setMatchCallback() was called. |
status | A reference to a UErrorCode to receive any errors. |
Get the size of the heap storage available for use by the back tracking stack.
ICU "poor man's RTTI", returns a UClassID for this class.
Get the time limit, if any, for match operations made with this Matcher.
|
virtual |
Returns a string containing the text captured by the given group during the previous match operation.
Group(0) is the entire match.
A zero length string is returned both for capture groups that did not participate in the match and for actual zero length matches. To distinguish between these two cases use the function start(), which returns -1 for non-participating groups.
groupNum | the capture group number |
status | A reference to a UErrorCode to receive any errors. Possible errors are U_REGEX_INVALID_STATE if no match has been attempted or the last match failed and U_INDEX_OUTOFBOUNDS_ERROR for a bad capture group number. |
|
virtual |
Returns a shallow clone of the entire live input string with the UText current native index set to the beginning of the requested group.
A group length of zero is returned both for capture groups that did not participate in the match and for actual zero length matches. To distinguish between these two cases use the function start(), which returns -1 for non-participating groups.
groupNum | The capture group number. |
dest | The UText into which the input should be cloned, or nullptr to create a new UText. |
group_len | A reference to receive the length of the desired capture group |
status | A reference to a UErrorCode to receive any errors. Possible errors are U_REGEX_INVALID_STATE if no match has been attempted or the last match failed and U_INDEX_OUTOFBOUNDS_ERROR for a bad capture group number. |
|
virtual |
Returns a string containing the text matched by the previous match.
If the pattern can match an empty string, an empty string may be returned.
status | A reference to a UErrorCode to receive any errors. Possible errors are U_REGEX_INVALID_STATE if no match has been attempted or the last match failed. |
|
virtual |
Returns a shallow clone of the entire live input string with the UText current native index set to the beginning of the requested group.
dest | The UText into which the input should be cloned, or nullptr to create a new UText |
group_len | A reference to receive the length of the desired capture group |
status | A reference to a UErrorCode to receive any errors. Possible errors are U_REGEX_INVALID_STATE if no match has been attempted or the last match failed and U_INDEX_OUTOFBOUNDS_ERROR for a bad capture group number. |
Returns the number of capturing groups in this matcher's pattern.
Return true if this matcher is using anchoring bounds.
By default, matchers use anchoring region bounds.
Queries the transparency of region bounds for this matcher.
See useTransparentBounds for a description of transparent and opaque bounds. By default, a matcher uses opaque region boundaries.
Return true if the most recent matching operation attempted to access additional input beyond the available input text.
In this case, additional input text could change the results of the match.
hitEnd() is defined for both successful and unsuccessful matches. In either case hitEnd() will return true if if the end of the text was reached at any point during the matching process.
|
virtual |
Returns the input string being matched.
This is the live input text; it should not be altered or deleted. This method will work even if the input was originally supplied as a UnicodeString.
|
virtual |
Attempts to match the input string, starting from the specified index, against the pattern.
The match may be of any length, and is not required to extend to the end of the input string. Contrast with match().
If the match succeeds then more information can be obtained via the start(), end(), and group() functions.
startIndex | The input string (native) index at which to begin matching. |
status | A reference to a UErrorCode to receive any errors. |
|
virtual |
Attempts to match the input string, starting from the beginning of the region, against the pattern.
Like the matches() method, this function always starts at the beginning of the input region; unlike that function, it does not require that the entire region be matched.
If the match succeeds then more information can be obtained via the start(), end(), and group() functions.
status | A reference to a UErrorCode to receive any errors. |
|
virtual |
Resets the matcher, then attempts to match the input beginning at the specified startIndex, and extending to the end of the input.
The input region is reset to include the entire input string. A successful match must extend to the end of the input.
startIndex | The input string (native) index at which to begin matching. |
status | A reference to a UErrorCode to receive any errors. |
|
virtual |
Attempts to match the entire input region against the pattern.
status | A reference to a UErrorCode to receive any errors. |
|
virtual |
Returns the pattern that is interpreted by this matcher.
|
virtual |
Set the subject text string upon which the regular expression is looking for matches without changing any other aspect of the matching state.
The new and previous text strings must have the same content.
This function is intended for use in environments where ICU is operating on strings that may move around in memory. It provides a mechanism for notifying ICU that the string has been relocated, and providing a new UText to access the string in its new position.
Note that the regular expression implementation never copies the underlying text of a string being matched, but always operates directly on the original text provided by the user. Refreshing simply drops the references to the old text and replaces them with references to the new.
Caution: this function is normally used only by very specialized, system-level code. One example use case is with garbage collection that moves the text in memory.
input | The new (moved) text string. |
status | Receives errors detected by this function. |
|
virtual |
Identical to region(start, limit, status) but also allows a start position without resetting the region state.
regionStart | The region start |
regionLimit | the limit of the region |
startIndex | The (native) index within the region bounds at which to begin searches. |
status | A reference to a UErrorCode to receive any errors. If startIndex is not within the specified region bounds, U_INDEX_OUTOFBOUNDS_ERROR is returned. |
|
virtual |
Sets the limits of this matcher's region.
The region is the part of the input string that will be searched to find a match. Invoking this method resets the matcher, and then sets the region to start at the index specified by the start parameter and end at the index specified by the end parameter.
Depending on the transparency and anchoring being used (see useTransparentBounds and useAnchoringBounds), certain constructs such as anchors may behave differently at or around the boundaries of the region
The function will fail if start is greater than limit, or if either index is less than zero or greater than the length of the string being matched.
start | The (native) index to begin searches at. |
limit | The index to end searches at (exclusive). |
status | A reference to a UErrorCode to receive any errors. |
Reports the end (limit) index (exclusive) of this matcher's region.
The searches this matcher conducts are limited to finding matches within regionStart (inclusive) and regionEnd (exclusive).
Reports the end (limit) index (exclusive) of this matcher's region.
The searches this matcher conducts are limited to finding matches within regionStart (inclusive) and regionEnd (exclusive).
Reports the start index of this matcher's region.
The searches this matcher conducts are limited to finding matches within regionStart (inclusive) and regionEnd (exclusive).
Reports the start index of this matcher's region.
The searches this matcher conducts are limited to finding matches within regionStart (inclusive) and regionEnd (exclusive).
|
virtual |
Replaces every substring of the input that matches the pattern with the given replacement string.
This is a convenience function that provides a complete find-and-replace-all operation.
This method first resets this matcher. It then scans the input string looking for matches of the pattern. Input that is not part of any match is left unchanged; each match is replaced in the result by the replacement string. The replacement string may contain references to capture groups.
replacement | a string containing the replacement text. |
status | a reference to a UErrorCode to receive any errors. |
|
virtual |
Replaces every substring of the input that matches the pattern with the given replacement string.
This is a convenience function that provides a complete find-and-replace-all operation.
This method first resets this matcher. It then scans the input string looking for matches of the pattern. Input that is not part of any match is left unchanged; each match is replaced in the result by the replacement string. The replacement string may contain references to capture groups.
replacement | a string containing the replacement text. |
dest | a mutable UText in which the results are placed. If nullptr, a new UText will be created (which may not be mutable). |
status | a reference to a UErrorCode to receive any errors. |
|
virtual |
Replaces the first substring of the input that matches the pattern with the replacement string.
This is a convenience function that provides a complete find-and-replace operation.
This function first resets this RegexMatcher. It then scans the input string looking for a match of the pattern. Input that is not part of the match is appended directly to the result string; the match is replaced in the result by the replacement string. The replacement string may contain references to captured groups.
The state of the matcher (the position at which a subsequent find() would begin) after completing a replaceFirst() is not specified. The RegexMatcher should be reset before doing additional find() operations.
replacement | a string containing the replacement text. |
status | a reference to a UErrorCode to receive any errors. |
|
virtual |
Replaces the first substring of the input that matches the pattern with the replacement string.
This is a convenience function that provides a complete find-and-replace operation.
This function first resets this RegexMatcher. It then scans the input string looking for a match of the pattern. Input that is not part of the match is appended directly to the result string; the match is replaced in the result by the replacement string. The replacement string may contain references to captured groups.
The state of the matcher (the position at which a subsequent find() would begin) after completing a replaceFirst() is not specified. The RegexMatcher should be reset before doing additional find() operations.
replacement | a string containing the replacement text. |
dest | a mutable UText in which the results are placed. If nullptr, a new UText will be created (which may not be mutable). |
status | a reference to a UErrorCode to receive any errors. |
Return true the most recent match succeeded and additional input could cause it to fail.
If this method returns false and a match was found, then more input might change the match but the match won't be lost. If a match was not found, then requireEnd has no meaning.
|
virtual |
Resets this matcher.
The effect is to remove any memory of previous matches, and to cause subsequent find() operations to begin at the beginning of the input string.
|
virtual |
Resets this matcher with a new input string.
This allows instances of RegexMatcher to be reused, which is more efficient than creating a new RegexMatcher for each input string to be processed.
input | The new string on which subsequent pattern matches will operate. The matcher retains a reference to the callers string, and operates directly on that. Ownership of the string remains with the caller. Because no copy of the string is made, it is essential that the caller not delete the string until after regexp operations on it are done. Note that while a reset on the matcher with an input string that is then modified across/during matcher operations may be supported currently for UnicodeString, this was not originally intended behavior, and support for this is not guaranteed in upcoming versions of ICU. |
|
virtual |
Resets this matcher, and set the current input position.
The effect is to remove any memory of previous matches, and to cause subsequent find() operations to begin at the specified (native) position in the input string.
The matcher's region is reset to its default, which is the entire input string.
An alternative to this function is to set a match region beginning at the desired index.
|
virtual |
Resets this matcher with a new input string.
This allows instances of RegexMatcher to be reused, which is more efficient than creating a new RegexMatcher for each input string to be processed.
input | The new string on which subsequent pattern matches will operate. The matcher makes a shallow clone of the given text; ownership of the original string remains with the caller. Because no deep copy of the text is made, it is essential that the caller not modify the string until after regexp operations on it are done. |
void icu::RegexMatcher::resetPreserveRegion | ( | ) |
|
virtual |
Set a progress callback function for use with find operations on this Matcher.
During find operations, the callback will be invoked after each return from a match attempt, giving the application the opportunity to terminate a long-running find operation.
callback | A pointer to the user-supplied callback function. |
context | User context pointer. The value supplied at the time the callback function is set will be saved and passed to the callback each time that it is called. |
status | A reference to a UErrorCode to receive any errors. |
|
virtual |
Set a callback function for use with this Matcher.
During matching operations the function will be called periodically, giving the application the opportunity to terminate a long-running match.
callback | A pointer to the user-supplied callback function. |
context | User context pointer. The value supplied at the time the callback function is set will be saved and passed to the callback each time that it is called. |
status | A reference to a UErrorCode to receive any errors. |
|
virtual |
Set the amount of heap storage available for use by the match backtracking stack.
The matcher is also reset, discarding any results from previous matches.
ICU uses a backtracking regular expression engine, with the backtrack stack maintained on the heap. This function sets the limit to the amount of memory that can be used for this purpose. A backtracking stack overflow will result in an error from the match operation that caused it.
A limit is desirable because a malicious or poorly designed pattern can use excessive memory, potentially crashing the process. A limit is enabled by default.
limit | The maximum size, in bytes, of the matching backtrack stack. A value of zero means no limit. The limit must be greater or equal to zero. |
status | A reference to a UErrorCode to receive any errors. |
|
virtual |
Set a processing time limit for match operations with this Matcher.
Some patterns, when matching certain strings, can run in exponential time. For practical purposes, the match operation may appear to be in an infinite loop. When a limit is set a match operation will fail with an error if the limit is exceeded.
The units of the limit are steps of the match engine. Correspondence with actual processor time will depend on the speed of the processor and the details of the specific pattern, but will typically be on the order of milliseconds.
By default, the matching time is not limited.
limit | The limit value, or 0 for no limit. |
status | A reference to a UErrorCode to receive any errors. |
setTrace Debug function, enable/disable tracing of the matching engine.
For internal ICU development use only. DO NO USE!!!!
|
virtual |
Split a string into fields.
Somewhat like split() from Perl. The pattern matches identify delimiters that separate the input into fields. The input data between the matches becomes the fields themselves.
input | The string to be split into fields. The field delimiters match the pattern (in the "this" object). This matcher will be reset to this input string. |
dest | An array of UnicodeStrings to receive the results of the split. This is an array of actual UnicodeString objects, not an array of pointers to strings. Local (stack based) arrays can work well here. |
destCapacity | The number of elements in the destination array. If the number of fields found is less than destCapacity, the extra strings in the destination array are not altered. If the number of destination strings is less than the number of fields, the trailing part of the input string, including any field delimiters, is placed in the last destination string. |
status | A reference to a UErrorCode to receive any errors. |
|
virtual |
Split a string into fields.
Somewhat like split() from Perl. The pattern matches identify delimiters that separate the input into fields. The input data between the matches becomes the fields themselves.
input | The string to be split into fields. The field delimiters match the pattern (in the "this" object). This matcher will be reset to this input string. |
dest | An array of mutable UText structs to receive the results of the split. If a field is nullptr, a new UText is allocated to contain the results for that field. This new UText is not guaranteed to be mutable. |
destCapacity | The number of elements in the destination array. If the number of fields found is less than destCapacity, the extra strings in the destination array are not altered. If the number of destination strings is less than the number of fields, the trailing part of the input string, including any field delimiters, is placed in the last destination string. |
status | A reference to a UErrorCode to receive any errors. |
|
virtual |
Returns the index in the input string of the start of the text matched by the specified capture group during the previous match operation.
Return -1 if the capture group exists in the pattern, but was not part of the last match.
group | the capture group number |
status | A reference to a UErrorCode to receive any errors. Possible errors are U_REGEX_INVALID_STATE if no match has been attempted or the last match failed, and U_INDEX_OUTOFBOUNDS_ERROR for a bad capture group number |
|
virtual |
Returns the index in the input string of the start of the text matched during the previous match operation.
status | a reference to a UErrorCode to receive any errors. |
|
virtual |
Returns the index in the input string of the start of the text matched by the specified capture group during the previous match operation.
Return -1 if the capture group exists in the pattern, but was not part of the last match.
group | the capture group number. |
status | A reference to a UErrorCode to receive any errors. Possible errors are U_REGEX_INVALID_STATE if no match has been attempted or the last match failed, and U_INDEX_OUTOFBOUNDS_ERROR for a bad capture group number. |
|
virtual |
Returns the index in the input string of the start of the text matched during the previous match operation.
status | a reference to a UErrorCode to receive any errors. |
|
virtual |
Set whether this matcher is using Anchoring Bounds for its region.
With anchoring bounds, pattern anchors such as ^ and $ will match at the start and end of the region. Without Anchoring Bounds, anchors will only match at the positions they would in the complete text.
Anchoring Bounds are the default for regions.
b | true if to enable anchoring bounds; false to disable them. |
|
virtual |
Sets the transparency of region bounds for this matcher.
Invoking this function with an argument of true will set this matcher to use transparent bounds. If the boolean argument is false, then opaque bounds will be used.
Using transparent bounds, the boundaries of this matcher's region are transparent to lookahead, lookbehind, and boundary matching constructs. Those constructs can see text beyond the boundaries of the region while checking for a match.
With opaque bounds, no text outside of the matcher's region is visible to lookahead, lookbehind, and boundary matching constructs.
By default, a matcher uses opaque bounds.
b | true for transparent bounds; false for opaque bounds |
|
friend |