ICU 70.1  70.1
uregex.h
Go to the documentation of this file.
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 **********************************************************************
5 * Copyright (C) 2004-2016, International Business Machines
6 * Corporation and others. All Rights Reserved.
7 **********************************************************************
8 * file name: uregex.h
9 * encoding: UTF-8
10 * indentation:4
11 *
12 * created on: 2004mar09
13 * created by: Andy Heninger
14 *
15 * ICU Regular Expressions, API for C
16 */
17 
25 #ifndef UREGEX_H
26 #define UREGEX_H
27 
28 #include "unicode/utext.h"
29 #include "unicode/utypes.h"
30 
31 #if !UCONFIG_NO_REGULAR_EXPRESSIONS
32 
33 #include "unicode/parseerr.h"
34 
35 #if U_SHOW_CPLUSPLUS_API
36 #include "unicode/localpointer.h"
37 #endif // U_SHOW_CPLUSPLUS_API
38 
39 struct URegularExpression;
46 
47 
52 typedef enum URegexpFlag{
53 
54 #ifndef U_HIDE_DRAFT_API
59 #endif /* U_HIDE_DRAFT_API */
62 
65 
69 
81 
87 
94 
103 
112 
114 
137 U_CAPI URegularExpression * U_EXPORT2
138 uregex_open( const UChar *pattern,
139  int32_t patternLength,
140  uint32_t flags,
141  UParseError *pe,
142  UErrorCode *status);
143 
167 U_CAPI URegularExpression * U_EXPORT2
169  uint32_t flags,
170  UParseError *pe,
171  UErrorCode *status);
172 
173 #if !UCONFIG_NO_CONVERSION
197 U_CAPI URegularExpression * U_EXPORT2
198 uregex_openC( const char *pattern,
199  uint32_t flags,
200  UParseError *pe,
201  UErrorCode *status);
202 #endif
203 
204 
205 
213 U_CAPI void U_EXPORT2
215 
216 #if U_SHOW_CPLUSPLUS_API
217 
218 U_NAMESPACE_BEGIN
219 
230 
231 U_NAMESPACE_END
232 
233 #endif
234 
253 U_CAPI URegularExpression * U_EXPORT2
255 
273 U_CAPI const UChar * U_EXPORT2
275  int32_t *patLength,
276  UErrorCode *status);
277 
289 U_CAPI UText * U_EXPORT2
291  UErrorCode *status);
292 
301 U_CAPI int32_t U_EXPORT2
303  UErrorCode *status);
304 
305 
326 U_CAPI void U_EXPORT2
328  const UChar *text,
329  int32_t textLength,
330  UErrorCode *status);
331 
332 
349 U_CAPI void U_EXPORT2
351  UText *text,
352  UErrorCode *status);
353 
374 U_CAPI const UChar * U_EXPORT2
376  int32_t *textLength,
377  UErrorCode *status);
378 
395 U_CAPI UText * U_EXPORT2
397  UText *dest,
398  UErrorCode *status);
399 
425 U_CAPI void U_EXPORT2
427  UText *text,
428  UErrorCode *status);
429 
450 U_CAPI UBool U_EXPORT2
452  int32_t startIndex,
453  UErrorCode *status);
454 
476 U_CAPI UBool U_EXPORT2
478  int64_t startIndex,
479  UErrorCode *status);
480 
504 U_CAPI UBool U_EXPORT2
506  int32_t startIndex,
507  UErrorCode *status);
508 
533 U_CAPI UBool U_EXPORT2
535  int64_t startIndex,
536  UErrorCode *status);
537 
557 U_CAPI UBool U_EXPORT2
559  int32_t startIndex,
560  UErrorCode *status);
561 
582 U_CAPI UBool U_EXPORT2
584  int64_t startIndex,
585  UErrorCode *status);
586 
600 U_CAPI UBool U_EXPORT2
602  UErrorCode *status);
603 
611 U_CAPI int32_t U_EXPORT2
613  UErrorCode *status);
614 
631 U_CAPI int32_t U_EXPORT2
633  const UChar *groupName,
634  int32_t nameLength,
635  UErrorCode *status);
636 
637 
655 U_CAPI int32_t U_EXPORT2
657  const char *groupName,
658  int32_t nameLength,
659  UErrorCode *status);
660 
677 U_CAPI int32_t U_EXPORT2
679  int32_t groupNum,
680  UChar *dest,
681  int32_t destCapacity,
682  UErrorCode *status);
683 
706 U_CAPI UText * U_EXPORT2
708  int32_t groupNum,
709  UText *dest,
710  int64_t *groupLength,
711  UErrorCode *status);
712 
727 U_CAPI int32_t U_EXPORT2
729  int32_t groupNum,
730  UErrorCode *status);
731 
747 U_CAPI int64_t U_EXPORT2
749  int32_t groupNum,
750  UErrorCode *status);
751 
765 U_CAPI int32_t U_EXPORT2
767  int32_t groupNum,
768  UErrorCode *status);
769 
784 U_CAPI int64_t U_EXPORT2
786  int32_t groupNum,
787  UErrorCode *status);
788 
802 U_CAPI void U_EXPORT2
804  int32_t index,
805  UErrorCode *status);
806 
821 U_CAPI void U_EXPORT2
823  int64_t index,
824  UErrorCode *status);
825 
846 U_CAPI void U_EXPORT2
848  int32_t regionStart,
849  int32_t regionLimit,
850  UErrorCode *status);
851 
873 U_CAPI void U_EXPORT2
875  int64_t regionStart,
876  int64_t regionLimit,
877  UErrorCode *status);
878 
893 U_CAPI void U_EXPORT2
895  int64_t regionStart,
896  int64_t regionLimit,
897  int64_t startIndex,
898  UErrorCode *status);
899 
909 U_CAPI int32_t U_EXPORT2
911  UErrorCode *status);
912 
923 U_CAPI int64_t U_EXPORT2
925  UErrorCode *status);
926 
937 U_CAPI int32_t U_EXPORT2
939  UErrorCode *status);
940 
952 U_CAPI int64_t U_EXPORT2
954  UErrorCode *status);
955 
966 U_CAPI UBool U_EXPORT2
968  UErrorCode *status);
969 
970 
990 U_CAPI void U_EXPORT2
992  UBool b,
993  UErrorCode *status);
994 
995 
1005 U_CAPI UBool U_EXPORT2
1007  UErrorCode *status);
1008 
1009 
1023 U_CAPI void U_EXPORT2
1025  UBool b,
1026  UErrorCode *status);
1027 
1038 U_CAPI UBool U_EXPORT2
1040  UErrorCode *status);
1041 
1053 U_CAPI UBool U_EXPORT2
1055  UErrorCode *status);
1056 
1057 
1058 
1059 
1060 
1085 U_CAPI int32_t U_EXPORT2
1087  const UChar *replacementText,
1088  int32_t replacementLength,
1089  UChar *destBuf,
1090  int32_t destCapacity,
1091  UErrorCode *status);
1092 
1114 U_CAPI UText * U_EXPORT2
1116  UText *replacement,
1117  UText *dest,
1118  UErrorCode *status);
1119 
1144 U_CAPI int32_t U_EXPORT2
1146  const UChar *replacementText,
1147  int32_t replacementLength,
1148  UChar *destBuf,
1149  int32_t destCapacity,
1150  UErrorCode *status);
1151 
1173 U_CAPI UText * U_EXPORT2
1175  UText *replacement,
1176  UText *dest,
1177  UErrorCode *status);
1178 
1225 U_CAPI int32_t U_EXPORT2
1227  const UChar *replacementText,
1228  int32_t replacementLength,
1229  UChar **destBuf,
1230  int32_t *destCapacity,
1231  UErrorCode *status);
1232 
1255 U_CAPI void U_EXPORT2
1257  UText *replacementText,
1258  UText *dest,
1259  UErrorCode *status);
1260 
1285 U_CAPI int32_t U_EXPORT2
1287  UChar **destBuf,
1288  int32_t *destCapacity,
1289  UErrorCode *status);
1290 
1309 U_CAPI UText * U_EXPORT2
1311  UText *dest,
1312  UErrorCode *status);
1313 
1365 U_CAPI int32_t U_EXPORT2
1367  UChar *destBuf,
1368  int32_t destCapacity,
1369  int32_t *requiredCapacity,
1370  UChar *destFields[],
1371  int32_t destFieldsCapacity,
1372  UErrorCode *status);
1373 
1400 U_CAPI int32_t U_EXPORT2
1402  UText *destFields[],
1403  int32_t destFieldsCapacity,
1404  UErrorCode *status);
1405 
1428 U_CAPI void U_EXPORT2
1430  int32_t limit,
1431  UErrorCode *status);
1432 
1442 U_CAPI int32_t U_EXPORT2
1444  UErrorCode *status);
1445 
1466 U_CAPI void U_EXPORT2
1468  int32_t limit,
1469  UErrorCode *status);
1470 
1478 U_CAPI int32_t U_EXPORT2
1480  UErrorCode *status);
1481 
1482 
1503  const void *context,
1504  int32_t steps);
1506 
1521 U_CAPI void U_EXPORT2
1523  URegexMatchCallback *callback,
1524  const void *context,
1525  UErrorCode *status);
1526 
1527 
1539 U_CAPI void U_EXPORT2
1541  URegexMatchCallback **callback,
1542  const void **context,
1543  UErrorCode *status);
1544 
1577  const void *context,
1578  int64_t matchIndex);
1580 
1581 
1593 U_CAPI void U_EXPORT2
1595  URegexFindProgressCallback *callback,
1596  const void *context,
1597  UErrorCode *status);
1598 
1610 U_CAPI void U_EXPORT2
1612  URegexFindProgressCallback **callback,
1613  const void **context,
1614  UErrorCode *status);
1615 
1616 #endif /* !UCONFIG_NO_REGULAR_EXPRESSIONS */
1617 #endif /* UREGEX_H */
"Smart pointer" class, closes a URegularExpression via uregex_close().
C++ API: "Smart pointers" for use with and in ICU4C C++ code.
#define U_DEFINE_LOCAL_OPEN_POINTER(LocalPointerClassName, Type, closeFunction)
"Smart pointer" definition macro, deletes objects via the closeFunction.
Definition: localpointer.h:550
C API: Parse Error Information.
#define U_CALLCONV
Similar to U_CDECL_BEGIN/U_CDECL_END, this qualifier is necessary in callback function typedefs to ma...
Definition: platform.h:870
A UParseError struct is used to returned detailed information about parsing errors.
Definition: parseerr.h:58
UText struct.
Definition: utext.h:1328
#define U_CDECL_END
This is used to end a declaration of a library private ICU C API.
Definition: umachine.h:86
int8_t UBool
The ICU boolean type, a signed-byte integer.
Definition: umachine.h:269
#define U_CAPI
This is used to declare a function as a public ICU C API.
Definition: umachine.h:110
char16_t UChar
The base type for UTF-16 code units and pointers.
Definition: umachine.h:418
#define U_CDECL_BEGIN
This is used to begin a declaration of a library private ICU C API.
Definition: umachine.h:85
UBool URegexFindProgressCallback(const void *context, int64_t matchIndex)
Function pointer for a regular expression find callback function.
Definition: uregex.h:1576
U_CAPI UBool uregex_find64(URegularExpression *regexp, int64_t startIndex, UErrorCode *status)
64bit version of uregex_find.
U_CAPI UText * uregex_patternUText(const URegularExpression *regexp, UErrorCode *status)
Returns the source text of the pattern for this regular expression.
UBool URegexMatchCallback(const void *context, int32_t steps)
Function pointer for a regular expression matching callback function.
Definition: uregex.h:1502
U_CAPI const UChar * uregex_pattern(const URegularExpression *regexp, int32_t *patLength, UErrorCode *status)
Returns a pointer to the source form of the pattern for this regular expression.
U_CAPI int32_t uregex_regionEnd(const URegularExpression *regexp, UErrorCode *status)
Reports the end index (exclusive) of the matching region for this URegularExpression.
U_CAPI void uregex_appendReplacementUText(URegularExpression *regexp, UText *replacementText, UText *dest, UErrorCode *status)
Implements a replace operation intended to be used as part of an incremental find-and-replace.
U_CAPI UText * uregex_replaceAllUText(URegularExpression *regexp, UText *replacement, UText *dest, UErrorCode *status)
Replaces every substring of the input that matches the pattern with the given replacement string.
U_CAPI URegularExpression * uregex_openUText(UText *pattern, uint32_t flags, UParseError *pe, UErrorCode *status)
Open (compile) an ICU regular expression.
U_CAPI void uregex_reset64(URegularExpression *regexp, int64_t index, UErrorCode *status)
64bit version of uregex_reset.
U_CAPI int64_t uregex_start64(URegularExpression *regexp, int32_t groupNum, UErrorCode *status)
64bit version of uregex_start.
U_CAPI void uregex_setRegionAndStart(URegularExpression *regexp, int64_t regionStart, int64_t regionLimit, int64_t startIndex, UErrorCode *status)
Set the matching region and the starting index for subsequent matches in a single operation.
U_CAPI int32_t uregex_appendReplacement(URegularExpression *regexp, const UChar *replacementText, int32_t replacementLength, UChar **destBuf, int32_t *destCapacity, UErrorCode *status)
Implements a replace operation intended to be used as part of an incremental find-and-replace.
U_CAPI int32_t uregex_getTimeLimit(const URegularExpression *regexp, UErrorCode *status)
Get the time limit for for matches with this URegularExpression.
U_CAPI void uregex_useTransparentBounds(URegularExpression *regexp, UBool b, UErrorCode *status)
Sets the transparency of region bounds for this URegularExpression.
U_CAPI int32_t uregex_groupNumberFromCName(URegularExpression *regexp, const char *groupName, int32_t nameLength, UErrorCode *status)
Get the group number corresponding to a named capture group.
U_CAPI int32_t uregex_split(URegularExpression *regexp, UChar *destBuf, int32_t destCapacity, int32_t *requiredCapacity, UChar *destFields[], int32_t destFieldsCapacity, UErrorCode *status)
Split a string into fields.
U_CAPI UBool uregex_matches64(URegularExpression *regexp, int64_t startIndex, UErrorCode *status)
64bit version of uregex_matches.
U_CAPI void uregex_reset(URegularExpression *regexp, int32_t index, UErrorCode *status)
Reset any saved state from the previous match.
U_CAPI int32_t uregex_flags(const URegularExpression *regexp, UErrorCode *status)
Get the match mode flags that were specified when compiling this regular expression.
U_CAPI UText * uregex_groupUText(URegularExpression *regexp, int32_t groupNum, UText *dest, int64_t *groupLength, UErrorCode *status)
Returns a shallow immutable clone of the entire input string with the current index set to the beginn...
U_CAPI int64_t uregex_regionStart64(const URegularExpression *regexp, UErrorCode *status)
64bit version of uregex_regionStart.
U_CAPI void uregex_setUText(URegularExpression *regexp, UText *text, UErrorCode *status)
Set the subject text string upon which the regular expression will look for matches.
U_CAPI void uregex_setRegion64(URegularExpression *regexp, int64_t regionStart, int64_t regionLimit, UErrorCode *status)
64bit version of uregex_setRegion.
U_CAPI int64_t uregex_end64(URegularExpression *regexp, int32_t groupNum, UErrorCode *status)
64bit version of uregex_end.
struct URegularExpression URegularExpression
Structure representing a compiled regular expression, plus the results of a match operation.
Definition: uregex.h:45
U_CAPI UBool uregex_find(URegularExpression *regexp, int32_t startIndex, UErrorCode *status)
Find the first matching substring of the input string that matches the pattern.
U_CAPI const UChar * uregex_getText(URegularExpression *regexp, int32_t *textLength, UErrorCode *status)
Get the subject text that is currently associated with this regular expression object.
U_CAPI void uregex_useAnchoringBounds(URegularExpression *regexp, UBool b, UErrorCode *status)
Set whether this URegularExpression is using Anchoring Bounds for its region.
U_CAPI URegularExpression * uregex_open(const UChar *pattern, int32_t patternLength, uint32_t flags, UParseError *pe, UErrorCode *status)
Open (compile) an ICU regular expression.
U_CAPI int32_t uregex_appendTail(URegularExpression *regexp, UChar **destBuf, int32_t *destCapacity, UErrorCode *status)
As the final step in a find-and-replace operation, append the remainder of the input string,...
U_CAPI void uregex_close(URegularExpression *regexp)
Close the regular expression, recovering all resources (memory) it was holding.
U_CAPI URegularExpression * uregex_openC(const char *pattern, uint32_t flags, UParseError *pe, UErrorCode *status)
Open (compile) an ICU regular expression.
U_CAPI UBool uregex_lookingAt(URegularExpression *regexp, int32_t startIndex, UErrorCode *status)
Attempts to match the input string, starting from the specified index, against the pattern.
URegexpFlag
Constants for Regular Expression Match Modes.
Definition: uregex.h:52
@ UREGEX_DOTALL
If set, '.
Definition: uregex.h:68
@ UREGEX_COMMENTS
Allow white space and comments within patterns.
Definition: uregex.h:64
@ UREGEX_MULTILINE
Control behavior of "$" and "^" If set, recognize line terminators within string, otherwise,...
Definition: uregex.h:86
@ UREGEX_LITERAL
If set, treat the entire pattern as a literal string.
Definition: uregex.h:80
@ UREGEX_CASE_INSENSITIVE
Enable case insensitive matching.
Definition: uregex.h:61
@ UREGEX_CANON_EQ
Forces normalization of pattern and strings.
Definition: uregex.h:58
@ UREGEX_ERROR_ON_UNKNOWN_ESCAPES
Error on Unrecognized backslash escapes.
Definition: uregex.h:111
@ UREGEX_UWORD
Unicode word boundaries.
Definition: uregex.h:102
@ UREGEX_UNIX_LINES
Unix-only line endings.
Definition: uregex.h:93
U_CAPI UBool uregex_findNext(URegularExpression *regexp, UErrorCode *status)
Find the next pattern match in the input string.
U_CAPI UText * uregex_getUText(URegularExpression *regexp, UText *dest, UErrorCode *status)
Get the subject text that is currently associated with this regular expression object.
U_CAPI UBool uregex_requireEnd(const URegularExpression *regexp, UErrorCode *status)
Return true the most recent match succeeded and additional input could cause it to fail.
U_CAPI int32_t uregex_groupCount(URegularExpression *regexp, UErrorCode *status)
Get the number of capturing groups in this regular expression's pattern.
U_CAPI void uregex_refreshUText(URegularExpression *regexp, UText *text, UErrorCode *status)
Set the subject text string upon which the regular expression is looking for matches without changing...
U_CAPI int32_t uregex_getStackLimit(const URegularExpression *regexp, UErrorCode *status)
Get the size of the heap storage available for use by the back tracking stack.
U_CAPI int32_t uregex_replaceFirst(URegularExpression *regexp, const UChar *replacementText, int32_t replacementLength, UChar *destBuf, int32_t destCapacity, UErrorCode *status)
Replaces the first substring of the input that matches the pattern with the given replacement string.
U_CAPI int32_t uregex_regionStart(const URegularExpression *regexp, UErrorCode *status)
Reports the start index of the matching region.
U_CAPI int32_t uregex_start(URegularExpression *regexp, int32_t groupNum, UErrorCode *status)
Returns the index in the input string of the start of the text matched by the specified capture group...
U_CAPI int64_t uregex_regionEnd64(const URegularExpression *regexp, UErrorCode *status)
64bit version of uregex_regionEnd.
U_CAPI UText * uregex_appendTailUText(URegularExpression *regexp, UText *dest, UErrorCode *status)
As the final step in a find-and-replace operation, append the remainder of the input string,...
U_CAPI URegularExpression * uregex_clone(const URegularExpression *regexp, UErrorCode *status)
Make a copy of a compiled regular expression.
U_CAPI void uregex_setText(URegularExpression *regexp, const UChar *text, int32_t textLength, UErrorCode *status)
Set the subject text string upon which the regular expression will look for matches.
U_CAPI int32_t uregex_replaceAll(URegularExpression *regexp, const UChar *replacementText, int32_t replacementLength, UChar *destBuf, int32_t destCapacity, UErrorCode *status)
Replaces every substring of the input that matches the pattern with the given replacement string.
U_CAPI void uregex_setStackLimit(URegularExpression *regexp, int32_t limit, UErrorCode *status)
Set the amount of heap storage available for use by the match backtracking stack.
U_CAPI void uregex_getMatchCallback(const URegularExpression *regexp, URegexMatchCallback **callback, const void **context, UErrorCode *status)
Get the callback function for this URegularExpression.
U_CAPI void uregex_setRegion(URegularExpression *regexp, int32_t regionStart, int32_t regionLimit, UErrorCode *status)
Sets the limits of the matching region for this URegularExpression.
U_CAPI int32_t uregex_group(URegularExpression *regexp, int32_t groupNum, UChar *dest, int32_t destCapacity, UErrorCode *status)
Extract the string for the specified matching expression or subexpression.
U_CAPI UBool uregex_hasAnchoringBounds(const URegularExpression *regexp, UErrorCode *status)
Return true if this URegularExpression is using anchoring bounds.
U_CAPI void uregex_setTimeLimit(URegularExpression *regexp, int32_t limit, UErrorCode *status)
Set a processing time limit for match operations with this URegularExpression.
U_CAPI UBool uregex_lookingAt64(URegularExpression *regexp, int64_t startIndex, UErrorCode *status)
64bit version of uregex_lookingAt.
U_CAPI void uregex_setFindProgressCallback(URegularExpression *regexp, URegexFindProgressCallback *callback, const void *context, UErrorCode *status)
Set the find progress callback function for this URegularExpression.
U_CAPI void uregex_setMatchCallback(URegularExpression *regexp, URegexMatchCallback *callback, const void *context, UErrorCode *status)
Set a callback function for this URegularExpression.
U_CAPI UText * uregex_replaceFirstUText(URegularExpression *regexp, UText *replacement, UText *dest, UErrorCode *status)
Replaces the first substring of the input that matches the pattern with the given replacement string.
U_CAPI int32_t uregex_end(URegularExpression *regexp, int32_t groupNum, UErrorCode *status)
Returns the index in the input string of the position following the end of the text matched by the sp...
U_CAPI void uregex_getFindProgressCallback(const URegularExpression *regexp, URegexFindProgressCallback **callback, const void **context, UErrorCode *status)
Get the find progress callback function for this URegularExpression.
U_CAPI UBool uregex_hasTransparentBounds(const URegularExpression *regexp, UErrorCode *status)
Queries the transparency of region bounds for this URegularExpression.
U_CAPI UBool uregex_hitEnd(const URegularExpression *regexp, UErrorCode *status)
Return true if the most recent matching operation touched the end of the text being processed.
U_CAPI UBool uregex_matches(URegularExpression *regexp, int32_t startIndex, UErrorCode *status)
Attempts to match the input string against the pattern.
U_CAPI int32_t uregex_groupNumberFromName(URegularExpression *regexp, const UChar *groupName, int32_t nameLength, UErrorCode *status)
Get the group number corresponding to a named capture group.
U_CAPI int32_t uregex_splitUText(URegularExpression *regexp, UText *destFields[], int32_t destFieldsCapacity, UErrorCode *status)
Split a string into fields.
C API: Abstract Unicode Text API.
Basic definitions for ICU, for both C and C++ APIs.
UErrorCode
Standard ICU4C error code type, a substitute for exceptions.
Definition: utypes.h:415