ICU 77.1  77.1
All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
regex.h
Go to the documentation of this file.
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 **********************************************************************
5 * Copyright (C) 2002-2016, International Business Machines
6 * Corporation and others. All Rights Reserved.
7 **********************************************************************
8 * file name: regex.h
9 * encoding: UTF-8
10 * indentation:4
11 *
12 * created on: 2002oct22
13 * created by: Andy Heninger
14 *
15 * ICU Regular Expressions, API for C++
16 */
17 
18 #ifndef REGEX_H
19 #define REGEX_H
20 
21 //#define REGEX_DEBUG
22 
45 #include "unicode/utypes.h"
46 
47 #if U_SHOW_CPLUSPLUS_API
48 
49 #if !UCONFIG_NO_REGULAR_EXPRESSIONS
50 
51 #include "unicode/uobject.h"
52 #include "unicode/unistr.h"
53 #include "unicode/utext.h"
54 #include "unicode/parseerr.h"
55 
56 #include "unicode/uregex.h"
57 
58 // Forward Declarations
59 
60 struct UHashtable;
61 
62 U_NAMESPACE_BEGIN
63 
64 struct Regex8BitSet;
65 class RegexCImpl;
66 class RegexMatcher;
67 class RegexPattern;
68 struct REStackFrame;
69 class BreakIterator;
70 class UnicodeSet;
71 class UVector;
72 class UVector32;
73 class UVector64;
74 
75 
87 class U_I18N_API RegexPattern final : public UObject {
88 public:
89 
98 
105  RegexPattern(const RegexPattern &source);
106 
112  virtual ~RegexPattern();
113 
122  bool operator==(const RegexPattern& that) const;
123 
132  inline bool operator!=(const RegexPattern& that) const {return ! operator ==(that);}
133 
139  RegexPattern &operator =(const RegexPattern &source);
140 
148  virtual RegexPattern *clone() const;
149 
150 
175  static RegexPattern * U_EXPORT2 compile( const UnicodeString &regex,
176  UParseError &pe,
177  UErrorCode &status);
178 
205  static RegexPattern * U_EXPORT2 compile( UText *regex,
206  UParseError &pe,
207  UErrorCode &status);
208 
233  static RegexPattern * U_EXPORT2 compile( const UnicodeString &regex,
234  uint32_t flags,
235  UParseError &pe,
236  UErrorCode &status);
237 
264  static RegexPattern * U_EXPORT2 compile( UText *regex,
265  uint32_t flags,
266  UParseError &pe,
267  UErrorCode &status);
268 
291  static RegexPattern * U_EXPORT2 compile( const UnicodeString &regex,
292  uint32_t flags,
293  UErrorCode &status);
294 
319  static RegexPattern * U_EXPORT2 compile( UText *regex,
320  uint32_t flags,
321  UErrorCode &status);
322 
328  virtual uint32_t flags() const;
329 
347  virtual RegexMatcher *matcher(const UnicodeString &input,
348  UErrorCode &status) const;
349 
350 private:
363  RegexMatcher *matcher(const char16_t *input,
364  UErrorCode &status) const = delete;
365 public:
366 
367 
379  virtual RegexMatcher *matcher(UErrorCode &status) const;
380 
381 
396  static UBool U_EXPORT2 matches(const UnicodeString &regex,
397  const UnicodeString &input,
398  UParseError &pe,
399  UErrorCode &status);
400 
415  static UBool U_EXPORT2 matches(UText *regex,
416  UText *input,
417  UParseError &pe,
418  UErrorCode &status);
419 
428  virtual UnicodeString pattern() const;
429 
430 
441  virtual UText *patternText(UErrorCode &status) const;
442 
443 
457  virtual int32_t groupNumberFromName(const UnicodeString &groupName, UErrorCode &status) const;
458 
459 
476  virtual int32_t groupNumberFromName(const char *groupName, int32_t nameLength, UErrorCode &status) const;
477 
478 
517  virtual int32_t split(const UnicodeString &input,
518  UnicodeString dest[],
519  int32_t destCapacity,
520  UErrorCode &status) const;
521 
522 
561  virtual int32_t split(UText *input,
562  UText *dest[],
563  int32_t destCapacity,
564  UErrorCode &status) const;
565 
566 
572  virtual UClassID getDynamicClassID() const override;
573 
579  static UClassID U_EXPORT2 getStaticClassID();
580 
581 private:
582  //
583  // Implementation Data
584  //
585  UText *fPattern; // The original pattern string.
586  UnicodeString *fPatternString; // The original pattern UncodeString if relevant
587  uint32_t fFlags; // The flags used when compiling the pattern.
588  //
589  UVector64 *fCompiledPat; // The compiled pattern p-code.
590  UnicodeString fLiteralText; // Any literal string data from the pattern,
591  // after un-escaping, for use during the match.
592 
593  UVector *fSets; // Any UnicodeSets referenced from the pattern.
594  Regex8BitSet *fSets8; // (and fast sets for latin-1 range.)
595 
596 
597  UErrorCode fDeferredStatus; // status if some prior error has left this
598  // RegexPattern in an unusable state.
599 
600  int32_t fMinMatchLen; // Minimum Match Length. All matches will have length
601  // >= this value. For some patterns, this calculated
602  // value may be less than the true shortest
603  // possible match.
604 
605  int32_t fFrameSize; // Size of a state stack frame in the
606  // execution engine.
607 
608  int32_t fDataSize; // The size of the data needed by the pattern that
609  // does not go on the state stack, but has just
610  // a single copy per matcher.
611 
612  UVector32 *fGroupMap; // Map from capture group number to position of
613  // the group's variables in the matcher stack frame.
614 
615  int32_t fStartType; // Info on how a match must start.
616  int32_t fInitialStringIdx; //
617  int32_t fInitialStringLen;
618  UnicodeSet *fInitialChars;
619  UChar32 fInitialChar;
620  Regex8BitSet *fInitialChars8;
621  UBool fNeedsAltInput;
622 
623  UHashtable *fNamedCaptureMap; // Map from capture group names to numbers.
624 
625  friend class RegexCompile;
626  friend class RegexMatcher;
627  friend class RegexCImpl;
628 
629  //
630  // Implementation Methods
631  //
632  void init(); // Common initialization, for use by constructors.
633  bool initNamedCaptureMap(); // Lazy init for fNamedCaptureMap.
634  void zap(); // Common cleanup
635 
636  void dumpOp(int32_t index) const;
637 
638  public:
639 #ifndef U_HIDE_INTERNAL_API
644  void dumpPattern() const;
645 #endif /* U_HIDE_INTERNAL_API */
646 };
647 
648 
649 
659 class U_I18N_API RegexMatcher final : public UObject {
660 public:
661 
675  RegexMatcher(const UnicodeString &regexp, uint32_t flags, UErrorCode &status);
676 
691  RegexMatcher(UText *regexp, uint32_t flags, UErrorCode &status);
692 
713  RegexMatcher(const UnicodeString &regexp, const UnicodeString &input,
714  uint32_t flags, UErrorCode &status);
715 
736  RegexMatcher(UText *regexp, UText *input,
737  uint32_t flags, UErrorCode &status);
738 
739 private:
751  RegexMatcher(const UnicodeString &regexp, const char16_t *input,
752  uint32_t flags, UErrorCode &status) = delete;
753 public:
754 
755 
761  virtual ~RegexMatcher();
762 
763 
770  virtual UBool matches(UErrorCode &status);
771 
772 
783  virtual UBool matches(int64_t startIndex, UErrorCode &status);
784 
785 
799  virtual UBool lookingAt(UErrorCode &status);
800 
801 
815  virtual UBool lookingAt(int64_t startIndex, UErrorCode &status);
816 
817 
830  virtual UBool find();
831 
832 
847  virtual UBool find(UErrorCode &status);
848 
858  virtual UBool find(int64_t start, UErrorCode &status);
859 
860 
870  virtual UnicodeString group(UErrorCode &status) const;
871 
872 
890  virtual UnicodeString group(int32_t groupNum, UErrorCode &status) const;
891 
897  virtual int32_t groupCount() const;
898 
899 
914  virtual UText *group(UText *dest, int64_t &group_len, UErrorCode &status) const;
915 
936  virtual UText *group(int32_t groupNum, UText *dest, int64_t &group_len, UErrorCode &status) const;
937 
945  virtual int32_t start(UErrorCode &status) const;
946 
954  virtual int64_t start64(UErrorCode &status) const;
955 
956 
970  virtual int32_t start(int32_t group, UErrorCode &status) const;
971 
985  virtual int64_t start64(int32_t group, UErrorCode &status) const;
986 
1000  virtual int32_t end(UErrorCode &status) const;
1001 
1015  virtual int64_t end64(UErrorCode &status) const;
1016 
1017 
1035  virtual int32_t end(int32_t group, UErrorCode &status) const;
1036 
1054  virtual int64_t end64(int32_t group, UErrorCode &status) const;
1055 
1064  virtual RegexMatcher &reset();
1065 
1066 
1082  virtual RegexMatcher &reset(int64_t index, UErrorCode &status);
1083 
1084 
1102  virtual RegexMatcher &reset(const UnicodeString &input);
1103 
1104 
1118  virtual RegexMatcher &reset(UText *input);
1119 
1120 
1145  virtual RegexMatcher &refreshInputText(UText *input, UErrorCode &status);
1146 
1147 private:
1160  RegexMatcher &reset(const char16_t *input) = delete;
1161 public:
1162 
1170  virtual const UnicodeString &input() const;
1171 
1180  virtual UText *inputText() const;
1181 
1192  virtual UText *getInput(UText *dest, UErrorCode &status) const;
1193 
1194 
1213  virtual RegexMatcher &region(int64_t start, int64_t limit, UErrorCode &status);
1214 
1226  virtual RegexMatcher &region(int64_t regionStart, int64_t regionLimit, int64_t startIndex, UErrorCode &status);
1227 
1236  virtual int32_t regionStart() const;
1237 
1246  virtual int64_t regionStart64() const;
1247 
1248 
1257  virtual int32_t regionEnd() const;
1258 
1267  virtual int64_t regionEnd64() const;
1268 
1277  virtual UBool hasTransparentBounds() const;
1278 
1298 
1299 
1307  virtual UBool hasAnchoringBounds() const;
1308 
1309 
1323 
1324 
1337  virtual UBool hitEnd() const;
1338 
1348  virtual UBool requireEnd() const;
1349 
1350 
1356  virtual const RegexPattern &pattern() const;
1357 
1358 
1375  virtual UnicodeString replaceAll(const UnicodeString &replacement, UErrorCode &status);
1376 
1377 
1398  virtual UText *replaceAll(UText *replacement, UText *dest, UErrorCode &status);
1399 
1400 
1421  virtual UnicodeString replaceFirst(const UnicodeString &replacement, UErrorCode &status);
1422 
1423 
1448  virtual UText *replaceFirst(UText *replacement, UText *dest, UErrorCode &status);
1449 
1450 
1479  const UnicodeString &replacement, UErrorCode &status);
1480 
1481 
1510  UText *replacement, UErrorCode &status);
1511 
1512 
1524 
1525 
1539  virtual UText *appendTail(UText *dest, UErrorCode &status);
1540 
1541 
1565  virtual int32_t split(const UnicodeString &input,
1566  UnicodeString dest[],
1567  int32_t destCapacity,
1568  UErrorCode &status);
1569 
1570 
1594  virtual int32_t split(UText *input,
1595  UText *dest[],
1596  int32_t destCapacity,
1597  UErrorCode &status);
1598 
1620  virtual void setTimeLimit(int32_t limit, UErrorCode &status);
1621 
1628  virtual int32_t getTimeLimit() const;
1629 
1651  virtual void setStackLimit(int32_t limit, UErrorCode &status);
1652 
1660  virtual int32_t getStackLimit() const;
1661 
1662 
1676  virtual void setMatchCallback(URegexMatchCallback *callback,
1677  const void *context,
1678  UErrorCode &status);
1679 
1680 
1691  virtual void getMatchCallback(URegexMatchCallback *&callback,
1692  const void *&context,
1693  UErrorCode &status);
1694 
1695 
1710  const void *context,
1711  UErrorCode &status);
1712 
1713 
1725  const void *&context,
1726  UErrorCode &status);
1727 
1728 #ifndef U_HIDE_INTERNAL_API
1734  void setTrace(UBool state);
1735 #endif /* U_HIDE_INTERNAL_API */
1736 
1742  static UClassID U_EXPORT2 getStaticClassID();
1743 
1749  virtual UClassID getDynamicClassID() const override;
1750 
1751 private:
1752  // Constructors and other object boilerplate are private.
1753  // Instances of RegexMatcher can not be assigned, copied, cloned, etc.
1754  RegexMatcher() = delete; // default constructor not implemented
1755  RegexMatcher(const RegexPattern *pat);
1756  RegexMatcher(const RegexMatcher &other) = delete;
1757  RegexMatcher &operator =(const RegexMatcher &rhs) = delete;
1758  void init(UErrorCode &status); // Common initialization
1759  void init2(UText *t, UErrorCode &e); // Common initialization, part 2.
1760 
1761  friend class RegexPattern;
1762  friend class RegexCImpl;
1763 public:
1764 #ifndef U_HIDE_INTERNAL_API
1766  void resetPreserveRegion(); // Reset matcher state, but preserve any region.
1767 #endif /* U_HIDE_INTERNAL_API */
1768 private:
1769 
1770  //
1771  // MatchAt This is the internal interface to the match engine itself.
1772  // Match status comes back in matcher member variables.
1773  //
1774  void MatchAt(int64_t startIdx, UBool toEnd, UErrorCode &status);
1775  inline void backTrack(int64_t &inputIdx, int32_t &patIdx);
1776  UBool isWordBoundary(int64_t pos); // perform Perl-like \b test
1777  UBool isUWordBoundary(int64_t pos, UErrorCode &status); // perform RBBI based \b test
1778  // Find a grapheme cluster boundary using a break iterator. For handling \X in regexes.
1779  int64_t followingGCBoundary(int64_t pos, UErrorCode &status);
1780  REStackFrame *resetStack();
1781  inline REStackFrame *StateSave(REStackFrame *fp, int64_t savePatIdx, UErrorCode &status);
1782  void IncrementTime(UErrorCode &status);
1783 
1784  // Call user find callback function, if set. Return true if operation should be interrupted.
1785  inline UBool findProgressInterrupt(int64_t matchIndex, UErrorCode &status);
1786 
1787  int64_t appendGroup(int32_t groupNum, UText *dest, UErrorCode &status) const;
1788 
1789  UBool findUsingChunk(UErrorCode &status);
1790  void MatchChunkAt(int32_t startIdx, UBool toEnd, UErrorCode &status);
1791  UBool isChunkWordBoundary(int32_t pos);
1792 
1793  const RegexPattern *fPattern;
1794  RegexPattern *fPatternOwned; // Non-nullptr if this matcher owns the pattern, and
1795  // should delete it when through.
1796 
1797  const UnicodeString *fInput; // The string being matched. Only used for input()
1798  UText *fInputText; // The text being matched. Is never nullptr.
1799  UText *fAltInputText; // A shallow copy of the text being matched.
1800  // Only created if the pattern contains backreferences.
1801  int64_t fInputLength; // Full length of the input text.
1802  int32_t fFrameSize; // The size of a frame in the backtrack stack.
1803 
1804  int64_t fRegionStart; // Start of the input region, default = 0.
1805  int64_t fRegionLimit; // End of input region, default to input.length.
1806 
1807  int64_t fAnchorStart; // Region bounds for anchoring operations (^ or $).
1808  int64_t fAnchorLimit; // See useAnchoringBounds
1809 
1810  int64_t fLookStart; // Region bounds for look-ahead/behind and
1811  int64_t fLookLimit; // and other boundary tests. See
1812  // useTransparentBounds
1813 
1814  int64_t fActiveStart; // Currently active bounds for matching.
1815  int64_t fActiveLimit; // Usually is the same as region, but
1816  // is changed to fLookStart/Limit when
1817  // entering look around regions.
1818 
1819  UBool fTransparentBounds; // True if using transparent bounds.
1820  UBool fAnchoringBounds; // True if using anchoring bounds.
1821 
1822  UBool fMatch; // True if the last attempted match was successful.
1823  int64_t fMatchStart; // Position of the start of the most recent match
1824  int64_t fMatchEnd; // First position after the end of the most recent match
1825  // Zero if no previous match, even when a region
1826  // is active.
1827  int64_t fLastMatchEnd; // First position after the end of the previous match,
1828  // or -1 if there was no previous match.
1829  int64_t fAppendPosition; // First position after the end of the previous
1830  // appendReplacement(). As described by the
1831  // JavaDoc for Java Matcher, where it is called
1832  // "append position"
1833  UBool fHitEnd; // True if the last match touched the end of input.
1834  UBool fRequireEnd; // True if the last match required end-of-input
1835  // (matched $ or Z)
1836 
1837  UVector64 *fStack;
1838  REStackFrame *fFrame; // After finding a match, the last active stack frame,
1839  // which will contain the capture group results.
1840  // NOT valid while match engine is running.
1841 
1842  int64_t *fData; // Data area for use by the compiled pattern.
1843  int64_t fSmallData[8]; // Use this for data if it's enough.
1844 
1845  int32_t fTimeLimit; // Max time (in arbitrary steps) to let the
1846  // match engine run. Zero for unlimited.
1847 
1848  int32_t fTime; // Match time, accumulates while matching.
1849  int32_t fTickCounter; // Low bits counter for time. Counts down StateSaves.
1850  // Kept separately from fTime to keep as much
1851  // code as possible out of the inline
1852  // StateSave function.
1853 
1854  int32_t fStackLimit; // Maximum memory size to use for the backtrack
1855  // stack, in bytes. Zero for unlimited.
1856 
1857  URegexMatchCallback *fCallbackFn; // Pointer to match progress callback funct.
1858  // nullptr if there is no callback.
1859  const void *fCallbackContext; // User Context ptr for callback function.
1860 
1861  URegexFindProgressCallback *fFindProgressCallbackFn; // Pointer to match progress callback funct.
1862  // nullptr if there is no callback.
1863  const void *fFindProgressCallbackContext; // User Context ptr for callback function.
1864 
1865 
1866  UBool fInputUniStrMaybeMutable; // Set when fInputText wraps a UnicodeString that may be mutable - compatibility.
1867 
1868  UBool fTraceDebug; // Set true for debug tracing of match engine.
1869 
1870  UErrorCode fDeferredStatus; // Save error state that cannot be immediately
1871  // reported, or that permanently disables this matcher.
1872 
1873  BreakIterator *fWordBreakItr;
1874  BreakIterator *fGCBreakItr;
1875 };
1876 
1877 U_NAMESPACE_END
1878 #endif // UCONFIG_NO_REGULAR_EXPRESSIONS
1879 
1880 #endif /* U_SHOW_CPLUSPLUS_API */
1881 
1882 #endif
The BreakIterator class implements methods for finding the location of boundaries in text.
Definition: brkiter.h:108
class RegexMatcher bundles together a regular expression pattern and input text to which the expressi...
Definition: regex.h:659
virtual int64_t end64(int32_t group, UErrorCode &status) const
Returns the index in the input string of the character following the text matched by the specified ca...
static UClassID getStaticClassID()
ICU "poor man's RTTI", returns a UClassID for this class.
virtual UBool hasTransparentBounds() const
Queries the transparency of region bounds for this matcher.
void resetPreserveRegion()
virtual UBool hasAnchoringBounds() const
Return true if this matcher is using anchoring bounds.
virtual int32_t end(UErrorCode &status) const
Returns the index in the input string of the first character following the text matched during the pr...
virtual RegexMatcher & reset(const UnicodeString &input)
Resets this matcher with a new input string.
virtual UnicodeString replaceFirst(const UnicodeString &replacement, UErrorCode &status)
Replaces the first substring of the input that matches the pattern with the replacement string.
virtual void getMatchCallback(URegexMatchCallback *&callback, const void *&context, UErrorCode &status)
Get the callback function for this URegularExpression.
virtual int32_t groupCount() const
Returns the number of capturing groups in this matcher's pattern.
virtual UnicodeString group(UErrorCode &status) const
Returns a string containing the text matched by the previous match.
virtual UnicodeString replaceAll(const UnicodeString &replacement, UErrorCode &status)
Replaces every substring of the input that matches the pattern with the given replacement string.
virtual void setFindProgressCallback(URegexFindProgressCallback *callback, const void *context, UErrorCode &status)
Set a progress callback function for use with find operations on this Matcher.
virtual RegexMatcher & reset()
Resets this matcher.
virtual UText * getInput(UText *dest, UErrorCode &status) const
Returns the input string being matched, either by copying it into the provided UText parameter or by ...
virtual int32_t split(const UnicodeString &input, UnicodeString dest[], int32_t destCapacity, UErrorCode &status)
Split a string into fields.
virtual UBool lookingAt(int64_t startIndex, UErrorCode &status)
Attempts to match the input string, starting from the specified index, against the pattern.
virtual UBool matches(int64_t startIndex, UErrorCode &status)
Resets the matcher, then attempts to match the input beginning at the specified startIndex,...
virtual int32_t start(UErrorCode &status) const
Returns the index in the input string of the start of the text matched during the previous match oper...
virtual UBool find()
Find the next pattern match in the input string.
RegexMatcher(UText *regexp, UText *input, uint32_t flags, UErrorCode &status)
Construct a RegexMatcher for a regular expression.
virtual const RegexPattern & pattern() const
Returns the pattern that is interpreted by this matcher.
virtual RegexMatcher & region(int64_t start, int64_t limit, UErrorCode &status)
Sets the limits of this matcher's region.
virtual RegexMatcher & refreshInputText(UText *input, UErrorCode &status)
Set the subject text string upon which the regular expression is looking for matches without changing...
RegexMatcher(const UnicodeString &regexp, const UnicodeString &input, uint32_t flags, UErrorCode &status)
Construct a RegexMatcher for a regular expression.
virtual int32_t getTimeLimit() const
Get the time limit, if any, for match operations made with this Matcher.
virtual UText * group(int32_t groupNum, UText *dest, int64_t &group_len, UErrorCode &status) const
Returns a shallow clone of the entire live input string with the UText current native index set to th...
virtual void setTimeLimit(int32_t limit, UErrorCode &status)
Set a processing time limit for match operations with this Matcher.
virtual void setMatchCallback(URegexMatchCallback *callback, const void *context, UErrorCode &status)
Set a callback function for use with this Matcher.
virtual int64_t regionStart64() const
Reports the start index of this matcher's region.
virtual int32_t start(int32_t group, UErrorCode &status) const
Returns the index in the input string of the start of the text matched by the specified capture group...
virtual UnicodeString & appendTail(UnicodeString &dest)
As the final step in a find-and-replace operation, append the remainder of the input string,...
virtual RegexMatcher & reset(UText *input)
Resets this matcher with a new input string.
virtual ~RegexMatcher()
Destructor.
virtual UBool lookingAt(UErrorCode &status)
Attempts to match the input string, starting from the beginning of the region, against the pattern.
virtual const UnicodeString & input() const
Returns the input string being matched.
virtual UText * replaceAll(UText *replacement, UText *dest, UErrorCode &status)
Replaces every substring of the input that matches the pattern with the given replacement string.
virtual RegexMatcher & region(int64_t regionStart, int64_t regionLimit, int64_t startIndex, UErrorCode &status)
Identical to region(start, limit, status) but also allows a start position without resetting the regi...
virtual RegexMatcher & reset(int64_t index, UErrorCode &status)
Resets this matcher, and set the current input position.
virtual int64_t regionEnd64() const
Reports the end (limit) index (exclusive) of this matcher's region.
virtual UBool find(UErrorCode &status)
Find the next pattern match in the input string.
RegexMatcher(UText *regexp, uint32_t flags, UErrorCode &status)
Construct a RegexMatcher for a regular expression.
virtual RegexMatcher & useAnchoringBounds(UBool b)
Set whether this matcher is using Anchoring Bounds for its region.
virtual UBool matches(UErrorCode &status)
Attempts to match the entire input region against the pattern.
virtual RegexMatcher & appendReplacement(UnicodeString &dest, const UnicodeString &replacement, UErrorCode &status)
Implements a replace operation intended to be used as part of an incremental find-and-replace.
void setTrace(UBool state)
setTrace Debug function, enable/disable tracing of the matching engine.
virtual UClassID getDynamicClassID() const override
ICU "poor man's RTTI", returns a UClassID for the actual class.
virtual int32_t split(UText *input, UText *dest[], int32_t destCapacity, UErrorCode &status)
Split a string into fields.
RegexMatcher(const UnicodeString &regexp, uint32_t flags, UErrorCode &status)
Construct a RegexMatcher for a regular expression.
virtual RegexMatcher & useTransparentBounds(UBool b)
Sets the transparency of region bounds for this matcher.
virtual int32_t getStackLimit() const
Get the size of the heap storage available for use by the back tracking stack.
virtual int32_t end(int32_t group, UErrorCode &status) const
Returns the index in the input string of the character following the text matched by the specified ca...
virtual UText * group(UText *dest, int64_t &group_len, UErrorCode &status) const
Returns a shallow clone of the entire live input string with the UText current native index set to th...
virtual int64_t start64(UErrorCode &status) const
Returns the index in the input string of the start of the text matched during the previous match oper...
virtual int64_t end64(UErrorCode &status) const
Returns the index in the input string of the first character following the text matched during the pr...
virtual UText * replaceFirst(UText *replacement, UText *dest, UErrorCode &status)
Replaces the first substring of the input that matches the pattern with the replacement string.
virtual void setStackLimit(int32_t limit, UErrorCode &status)
Set the amount of heap storage available for use by the match backtracking stack.
virtual int32_t regionEnd() const
Reports the end (limit) index (exclusive) of this matcher's region.
virtual UText * inputText() const
Returns the input string being matched.
virtual RegexMatcher & appendReplacement(UText *dest, UText *replacement, UErrorCode &status)
Implements a replace operation intended to be used as part of an incremental find-and-replace.
virtual int64_t start64(int32_t group, UErrorCode &status) const
Returns the index in the input string of the start of the text matched by the specified capture group...
virtual void getFindProgressCallback(URegexFindProgressCallback *&callback, const void *&context, UErrorCode &status)
Get the find progress callback function for this URegularExpression.
virtual UBool hitEnd() const
Return true if the most recent matching operation attempted to access additional input beyond the ava...
virtual UText * appendTail(UText *dest, UErrorCode &status)
As the final step in a find-and-replace operation, append the remainder of the input string,...
virtual UBool requireEnd() const
Return true the most recent match succeeded and additional input could cause it to fail.
virtual UBool find(int64_t start, UErrorCode &status)
Resets this RegexMatcher and then attempts to find the next substring of the input string that matche...
virtual int32_t regionStart() const
Reports the start index of this matcher's region.
virtual UnicodeString group(int32_t groupNum, UErrorCode &status) const
Returns a string containing the text captured by the given group during the previous match operation.
Class RegexPattern represents a compiled regular expression.
Definition: regex.h:87
static RegexPattern * compile(UText *regex, uint32_t flags, UParseError &pe, UErrorCode &status)
Compiles the regular expression in string form into a RegexPattern object using the specified URegexp...
virtual UClassID getDynamicClassID() const override
ICU "poor man's RTTI", returns a UClassID for the actual class.
virtual ~RegexPattern()
Destructor.
virtual uint32_t flags() const
Get the URegexpFlag match mode flags that were used when compiling this pattern.
void dumpPattern() const
Dump a compiled pattern.
virtual RegexPattern * clone() const
Create an exact copy of this RegexPattern object.
static UBool matches(UText *regex, UText *input, UParseError &pe, UErrorCode &status)
Test whether a string matches a regular expression.
static RegexPattern * compile(const UnicodeString &regex, UParseError &pe, UErrorCode &status)
Compiles the regular expression in string form into a RegexPattern object.
bool operator!=(const RegexPattern &that) const
Comparison operator.
Definition: regex.h:132
static RegexPattern * compile(UText *regex, uint32_t flags, UErrorCode &status)
Compiles the regular expression in string form into a RegexPattern object using the specified URegexp...
virtual UText * patternText(UErrorCode &status) const
Returns the regular expression from which this pattern was compiled.
static UClassID getStaticClassID()
ICU "poor man's RTTI", returns a UClassID for this class.
virtual int32_t split(const UnicodeString &input, UnicodeString dest[], int32_t destCapacity, UErrorCode &status) const
Split a string into fields.
virtual int32_t split(UText *input, UText *dest[], int32_t destCapacity, UErrorCode &status) const
Split a string into fields.
virtual RegexMatcher * matcher(const UnicodeString &input, UErrorCode &status) const
Creates a RegexMatcher that will match the given input against this pattern.
static RegexPattern * compile(const UnicodeString &regex, uint32_t flags, UErrorCode &status)
Compiles the regular expression in string form into a RegexPattern object using the specified URegexp...
static RegexPattern * compile(const UnicodeString &regex, uint32_t flags, UParseError &pe, UErrorCode &status)
Compiles the regular expression in string form into a RegexPattern object using the specified URegexp...
static UBool matches(const UnicodeString &regex, const UnicodeString &input, UParseError &pe, UErrorCode &status)
Test whether a string matches a regular expression.
virtual int32_t groupNumberFromName(const UnicodeString &groupName, UErrorCode &status) const
Get the group number corresponding to a named capture group.
RegexPattern(const RegexPattern &source)
Copy Constructor.
static RegexPattern * compile(UText *regex, UParseError &pe, UErrorCode &status)
Compiles the regular expression in string form into a RegexPattern object.
bool operator==(const RegexPattern &that) const
Comparison operator.
RegexPattern()
default constructor.
virtual UnicodeString pattern() const
Returns the regular expression from which this pattern was compiled.
virtual RegexMatcher * matcher(UErrorCode &status) const
Creates a RegexMatcher that will match against this pattern.
virtual int32_t groupNumberFromName(const char *groupName, int32_t nameLength, UErrorCode &status) const
Get the group number corresponding to a named capture group.
UObject is the common ICU "boilerplate" class.
Definition: uobject.h:223
A mutable set of Unicode characters and multicharacter strings.
Definition: uniset.h:285
UnicodeString is a string class that stores Unicode characters directly and provides similar function...
Definition: unistr.h:296
struct UHashtable UHashtable
Definition: msgfmt.h:43
U_EXPORT UBool operator==(const StringPiece &x, const StringPiece &y)
Global operator == for StringPiece.
C API: Parse Error Information.
A UParseError struct is used to returned detailed information about parsing errors.
Definition: parseerr.h:58
UText struct.
Definition: utext.h:1328
int32_t UChar32
Define UChar32 as a type for single Unicode code points.
Definition: umachine.h:427
int8_t UBool
The ICU boolean type, a signed-byte integer.
Definition: umachine.h:247
C++ API: Unicode String.
C++ API: Common ICU base class UObject.
void * UClassID
UClassID is used to identify classes without using the compiler's RTTI.
Definition: uobject.h:96
C API: Regular Expressions.
UBool URegexFindProgressCallback(const void *context, int64_t matchIndex)
Function pointer for a regular expression find callback function.
Definition: uregex.h:1576
UBool URegexMatchCallback(const void *context, int32_t steps)
Function pointer for a regular expression matching callback function.
Definition: uregex.h:1502
C API: Abstract Unicode Text API.
Basic definitions for ICU, for both C and C++ APIs.
UErrorCode
Standard ICU4C error code type, a substitute for exceptions.
Definition: utypes.h:430
#define U_I18N_API
Set to export library symbols from inside the i18n library, and to import them from outside.
Definition: utypes.h:316