ICU 73.1 73.1
uregex.h
Go to the documentation of this file.
1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
3/*
4**********************************************************************
5* Copyright (C) 2004-2016, International Business Machines
6* Corporation and others. All Rights Reserved.
7**********************************************************************
8* file name: uregex.h
9* encoding: UTF-8
10* indentation:4
11*
12* created on: 2004mar09
13* created by: Andy Heninger
14*
15* ICU Regular Expressions, API for C
16*/
17
25#ifndef UREGEX_H
26#define UREGEX_H
27
28#include "unicode/utext.h"
29#include "unicode/utypes.h"
30
31#if !UCONFIG_NO_REGULAR_EXPRESSIONS
32
33#include "unicode/parseerr.h"
34
35#if U_SHOW_CPLUSPLUS_API
37#endif // U_SHOW_CPLUSPLUS_API
38
46
47
52typedef enum URegexpFlag{
53
54#ifndef U_HIDE_DRAFT_API
59#endif /* U_HIDE_DRAFT_API */
62
65
69
81
87
94
103
112
114
137U_CAPI URegularExpression * U_EXPORT2
138uregex_open( const UChar *pattern,
139 int32_t patternLength,
140 uint32_t flags,
141 UParseError *pe,
142 UErrorCode *status);
143
167U_CAPI URegularExpression * U_EXPORT2
169 uint32_t flags,
170 UParseError *pe,
171 UErrorCode *status);
172
173#if !UCONFIG_NO_CONVERSION
197U_CAPI URegularExpression * U_EXPORT2
198uregex_openC( const char *pattern,
199 uint32_t flags,
200 UParseError *pe,
201 UErrorCode *status);
202#endif
203
204
205
213U_CAPI void U_EXPORT2
215
216#if U_SHOW_CPLUSPLUS_API
217
218U_NAMESPACE_BEGIN
219
230
231U_NAMESPACE_END
232
233#endif
234
253U_CAPI URegularExpression * U_EXPORT2
255
273U_CAPI const UChar * U_EXPORT2
275 int32_t *patLength,
276 UErrorCode *status);
277
289U_CAPI UText * U_EXPORT2
291 UErrorCode *status);
292
301U_CAPI int32_t U_EXPORT2
303 UErrorCode *status);
304
305
326U_CAPI void U_EXPORT2
328 const UChar *text,
329 int32_t textLength,
330 UErrorCode *status);
331
332
349U_CAPI void U_EXPORT2
351 UText *text,
352 UErrorCode *status);
353
374U_CAPI const UChar * U_EXPORT2
376 int32_t *textLength,
377 UErrorCode *status);
378
395U_CAPI UText * U_EXPORT2
397 UText *dest,
398 UErrorCode *status);
399
425U_CAPI void U_EXPORT2
427 UText *text,
428 UErrorCode *status);
429
450U_CAPI UBool U_EXPORT2
452 int32_t startIndex,
453 UErrorCode *status);
454
476U_CAPI UBool U_EXPORT2
478 int64_t startIndex,
479 UErrorCode *status);
480
504U_CAPI UBool U_EXPORT2
506 int32_t startIndex,
507 UErrorCode *status);
508
533U_CAPI UBool U_EXPORT2
535 int64_t startIndex,
536 UErrorCode *status);
537
557U_CAPI UBool U_EXPORT2
559 int32_t startIndex,
560 UErrorCode *status);
561
582U_CAPI UBool U_EXPORT2
584 int64_t startIndex,
585 UErrorCode *status);
586
600U_CAPI UBool U_EXPORT2
602 UErrorCode *status);
603
611U_CAPI int32_t U_EXPORT2
613 UErrorCode *status);
614
631U_CAPI int32_t U_EXPORT2
633 const UChar *groupName,
634 int32_t nameLength,
635 UErrorCode *status);
636
637
655U_CAPI int32_t U_EXPORT2
657 const char *groupName,
658 int32_t nameLength,
659 UErrorCode *status);
660
677U_CAPI int32_t U_EXPORT2
679 int32_t groupNum,
680 UChar *dest,
681 int32_t destCapacity,
682 UErrorCode *status);
683
706U_CAPI UText * U_EXPORT2
708 int32_t groupNum,
709 UText *dest,
710 int64_t *groupLength,
711 UErrorCode *status);
712
727U_CAPI int32_t U_EXPORT2
729 int32_t groupNum,
730 UErrorCode *status);
731
747U_CAPI int64_t U_EXPORT2
749 int32_t groupNum,
750 UErrorCode *status);
751
765U_CAPI int32_t U_EXPORT2
767 int32_t groupNum,
768 UErrorCode *status);
769
784U_CAPI int64_t U_EXPORT2
786 int32_t groupNum,
787 UErrorCode *status);
788
802U_CAPI void U_EXPORT2
804 int32_t index,
805 UErrorCode *status);
806
821U_CAPI void U_EXPORT2
823 int64_t index,
824 UErrorCode *status);
825
846U_CAPI void U_EXPORT2
848 int32_t regionStart,
849 int32_t regionLimit,
850 UErrorCode *status);
851
873U_CAPI void U_EXPORT2
875 int64_t regionStart,
876 int64_t regionLimit,
877 UErrorCode *status);
878
893U_CAPI void U_EXPORT2
895 int64_t regionStart,
896 int64_t regionLimit,
897 int64_t startIndex,
898 UErrorCode *status);
899
909U_CAPI int32_t U_EXPORT2
911 UErrorCode *status);
912
923U_CAPI int64_t U_EXPORT2
925 UErrorCode *status);
926
937U_CAPI int32_t U_EXPORT2
939 UErrorCode *status);
940
952U_CAPI int64_t U_EXPORT2
954 UErrorCode *status);
955
966U_CAPI UBool U_EXPORT2
968 UErrorCode *status);
969
970
990U_CAPI void U_EXPORT2
992 UBool b,
993 UErrorCode *status);
994
995
1005U_CAPI UBool U_EXPORT2
1007 UErrorCode *status);
1008
1009
1023U_CAPI void U_EXPORT2
1025 UBool b,
1026 UErrorCode *status);
1027
1038U_CAPI UBool U_EXPORT2
1040 UErrorCode *status);
1041
1053U_CAPI UBool U_EXPORT2
1055 UErrorCode *status);
1056
1057
1058
1059
1060
1085U_CAPI int32_t U_EXPORT2
1087 const UChar *replacementText,
1088 int32_t replacementLength,
1089 UChar *destBuf,
1090 int32_t destCapacity,
1091 UErrorCode *status);
1092
1114U_CAPI UText * U_EXPORT2
1116 UText *replacement,
1117 UText *dest,
1118 UErrorCode *status);
1119
1144U_CAPI int32_t U_EXPORT2
1146 const UChar *replacementText,
1147 int32_t replacementLength,
1148 UChar *destBuf,
1149 int32_t destCapacity,
1150 UErrorCode *status);
1151
1173U_CAPI UText * U_EXPORT2
1175 UText *replacement,
1176 UText *dest,
1177 UErrorCode *status);
1178
1225U_CAPI int32_t U_EXPORT2
1227 const UChar *replacementText,
1228 int32_t replacementLength,
1229 UChar **destBuf,
1230 int32_t *destCapacity,
1231 UErrorCode *status);
1232
1255U_CAPI void U_EXPORT2
1257 UText *replacementText,
1258 UText *dest,
1259 UErrorCode *status);
1260
1285U_CAPI int32_t U_EXPORT2
1287 UChar **destBuf,
1288 int32_t *destCapacity,
1289 UErrorCode *status);
1290
1309U_CAPI UText * U_EXPORT2
1311 UText *dest,
1312 UErrorCode *status);
1313
1365U_CAPI int32_t U_EXPORT2
1367 UChar *destBuf,
1368 int32_t destCapacity,
1369 int32_t *requiredCapacity,
1370 UChar *destFields[],
1371 int32_t destFieldsCapacity,
1372 UErrorCode *status);
1373
1400U_CAPI int32_t U_EXPORT2
1402 UText *destFields[],
1403 int32_t destFieldsCapacity,
1404 UErrorCode *status);
1405
1428U_CAPI void U_EXPORT2
1430 int32_t limit,
1431 UErrorCode *status);
1432
1442U_CAPI int32_t U_EXPORT2
1444 UErrorCode *status);
1445
1466U_CAPI void U_EXPORT2
1468 int32_t limit,
1469 UErrorCode *status);
1470
1478U_CAPI int32_t U_EXPORT2
1480 UErrorCode *status);
1481
1482
1503 const void *context,
1504 int32_t steps);
1506
1521U_CAPI void U_EXPORT2
1523 URegexMatchCallback *callback,
1524 const void *context,
1525 UErrorCode *status);
1526
1527
1539U_CAPI void U_EXPORT2
1541 URegexMatchCallback **callback,
1542 const void **context,
1543 UErrorCode *status);
1544
1577 const void *context,
1578 int64_t matchIndex);
1580
1581
1593U_CAPI void U_EXPORT2
1596 const void *context,
1597 UErrorCode *status);
1598
1610U_CAPI void U_EXPORT2
1612 URegexFindProgressCallback **callback,
1613 const void **context,
1614 UErrorCode *status);
1615
1616#endif /* !UCONFIG_NO_REGULAR_EXPRESSIONS */
1617#endif /* UREGEX_H */
"Smart pointer" class, closes a URegularExpression via uregex_close().
C++ API: "Smart pointers" for use with and in ICU4C C++ code.
#define U_DEFINE_LOCAL_OPEN_POINTER(LocalPointerClassName, Type, closeFunction)
"Smart pointer" definition macro, deletes objects via the closeFunction.
Definition: localpointer.h:550
C API: Parse Error Information.
#define U_CALLCONV
Similar to U_CDECL_BEGIN/U_CDECL_END, this qualifier is necessary in callback function typedefs to ma...
Definition: platform.h:866
A UParseError struct is used to returned detailed information about parsing errors.
Definition: parseerr.h:58
UText struct.
Definition: utext.h:1328
#define U_CDECL_END
This is used to end a declaration of a library private ICU C API.
Definition: umachine.h:86
int8_t UBool
The ICU boolean type, a signed-byte integer.
Definition: umachine.h:247
#define U_CAPI
This is used to declare a function as a public ICU C API.
Definition: umachine.h:110
char16_t UChar
The base type for UTF-16 code units and pointers.
Definition: umachine.h:386
#define U_CDECL_BEGIN
This is used to begin a declaration of a library private ICU C API.
Definition: umachine.h:85
UBool URegexFindProgressCallback(const void *context, int64_t matchIndex)
Function pointer for a regular expression find callback function.
Definition: uregex.h:1576
U_CAPI UBool uregex_find64(URegularExpression *regexp, int64_t startIndex, UErrorCode *status)
64bit version of uregex_find.
UBool URegexMatchCallback(const void *context, int32_t steps)
Function pointer for a regular expression matching callback function.
Definition: uregex.h:1502
U_CAPI int32_t uregex_regionEnd(const URegularExpression *regexp, UErrorCode *status)
Reports the end index (exclusive) of the matching region for this URegularExpression.
U_CAPI void uregex_appendReplacementUText(URegularExpression *regexp, UText *replacementText, UText *dest, UErrorCode *status)
Implements a replace operation intended to be used as part of an incremental find-and-replace.
U_CAPI void uregex_reset64(URegularExpression *regexp, int64_t index, UErrorCode *status)
64bit version of uregex_reset.
U_CAPI int64_t uregex_start64(URegularExpression *regexp, int32_t groupNum, UErrorCode *status)
64bit version of uregex_start.
U_CAPI void uregex_setRegionAndStart(URegularExpression *regexp, int64_t regionStart, int64_t regionLimit, int64_t startIndex, UErrorCode *status)
Set the matching region and the starting index for subsequent matches in a single operation.
U_CAPI int32_t uregex_appendReplacement(URegularExpression *regexp, const UChar *replacementText, int32_t replacementLength, UChar **destBuf, int32_t *destCapacity, UErrorCode *status)
Implements a replace operation intended to be used as part of an incremental find-and-replace.
U_CAPI int32_t uregex_getTimeLimit(const URegularExpression *regexp, UErrorCode *status)
Get the time limit for for matches with this URegularExpression.
U_CAPI void uregex_useTransparentBounds(URegularExpression *regexp, UBool b, UErrorCode *status)
Sets the transparency of region bounds for this URegularExpression.
U_CAPI int32_t uregex_groupNumberFromCName(URegularExpression *regexp, const char *groupName, int32_t nameLength, UErrorCode *status)
Get the group number corresponding to a named capture group.
U_CAPI int32_t uregex_split(URegularExpression *regexp, UChar *destBuf, int32_t destCapacity, int32_t *requiredCapacity, UChar *destFields[], int32_t destFieldsCapacity, UErrorCode *status)
Split a string into fields.
U_CAPI UBool uregex_matches64(URegularExpression *regexp, int64_t startIndex, UErrorCode *status)
64bit version of uregex_matches.
U_CAPI void uregex_reset(URegularExpression *regexp, int32_t index, UErrorCode *status)
Reset any saved state from the previous match.
U_CAPI int32_t uregex_flags(const URegularExpression *regexp, UErrorCode *status)
Get the match mode flags that were specified when compiling this regular expression.
U_CAPI int64_t uregex_regionStart64(const URegularExpression *regexp, UErrorCode *status)
64bit version of uregex_regionStart.
U_CAPI void uregex_setUText(URegularExpression *regexp, UText *text, UErrorCode *status)
Set the subject text string upon which the regular expression will look for matches.
U_CAPI void uregex_setRegion64(URegularExpression *regexp, int64_t regionStart, int64_t regionLimit, UErrorCode *status)
64bit version of uregex_setRegion.
U_CAPI int64_t uregex_end64(URegularExpression *regexp, int32_t groupNum, UErrorCode *status)
64bit version of uregex_end.
struct URegularExpression URegularExpression
Structure representing a compiled regular expression, plus the results of a match operation.
Definition: uregex.h:45
U_CAPI UBool uregex_find(URegularExpression *regexp, int32_t startIndex, UErrorCode *status)
Find the first matching substring of the input string that matches the pattern.
U_CAPI UText * uregex_replaceFirstUText(URegularExpression *regexp, UText *replacement, UText *dest, UErrorCode *status)
Replaces the first substring of the input that matches the pattern with the given replacement string.
U_CAPI UText * uregex_appendTailUText(URegularExpression *regexp, UText *dest, UErrorCode *status)
As the final step in a find-and-replace operation, append the remainder of the input string,...
U_CAPI void uregex_useAnchoringBounds(URegularExpression *regexp, UBool b, UErrorCode *status)
Set whether this URegularExpression is using Anchoring Bounds for its region.
U_CAPI UText * uregex_getUText(URegularExpression *regexp, UText *dest, UErrorCode *status)
Get the subject text that is currently associated with this regular expression object.
U_CAPI int32_t uregex_appendTail(URegularExpression *regexp, UChar **destBuf, int32_t *destCapacity, UErrorCode *status)
As the final step in a find-and-replace operation, append the remainder of the input string,...
U_CAPI void uregex_close(URegularExpression *regexp)
Close the regular expression, recovering all resources (memory) it was holding.
U_CAPI URegularExpression * uregex_openUText(UText *pattern, uint32_t flags, UParseError *pe, UErrorCode *status)
Open (compile) an ICU regular expression.
U_CAPI UBool uregex_lookingAt(URegularExpression *regexp, int32_t startIndex, UErrorCode *status)
Attempts to match the input string, starting from the specified index, against the pattern.
URegexpFlag
Constants for Regular Expression Match Modes.
Definition: uregex.h:52
@ UREGEX_DOTALL
If set, '.
Definition: uregex.h:68
@ UREGEX_COMMENTS
Allow white space and comments within patterns.
Definition: uregex.h:64
@ UREGEX_MULTILINE
Control behavior of "$" and "^" If set, recognize line terminators within string, otherwise,...
Definition: uregex.h:86
@ UREGEX_LITERAL
If set, treat the entire pattern as a literal string.
Definition: uregex.h:80
@ UREGEX_CASE_INSENSITIVE
Enable case insensitive matching.
Definition: uregex.h:61
@ UREGEX_CANON_EQ
Forces normalization of pattern and strings.
Definition: uregex.h:58
@ UREGEX_ERROR_ON_UNKNOWN_ESCAPES
Error on Unrecognized backslash escapes.
Definition: uregex.h:111
@ UREGEX_UWORD
Unicode word boundaries.
Definition: uregex.h:102
@ UREGEX_UNIX_LINES
Unix-only line endings.
Definition: uregex.h:93
U_CAPI UText * uregex_patternUText(const URegularExpression *regexp, UErrorCode *status)
Returns the source text of the pattern for this regular expression.
U_CAPI const UChar * uregex_getText(URegularExpression *regexp, int32_t *textLength, UErrorCode *status)
Get the subject text that is currently associated with this regular expression object.
U_CAPI const UChar * uregex_pattern(const URegularExpression *regexp, int32_t *patLength, UErrorCode *status)
Returns a pointer to the source form of the pattern for this regular expression.
U_CAPI UBool uregex_findNext(URegularExpression *regexp, UErrorCode *status)
Find the next pattern match in the input string.
U_CAPI URegularExpression * uregex_openC(const char *pattern, uint32_t flags, UParseError *pe, UErrorCode *status)
Open (compile) an ICU regular expression.
U_CAPI UBool uregex_requireEnd(const URegularExpression *regexp, UErrorCode *status)
Return true the most recent match succeeded and additional input could cause it to fail.
U_CAPI URegularExpression * uregex_open(const UChar *pattern, int32_t patternLength, uint32_t flags, UParseError *pe, UErrorCode *status)
Open (compile) an ICU regular expression.
U_CAPI int32_t uregex_groupCount(URegularExpression *regexp, UErrorCode *status)
Get the number of capturing groups in this regular expression's pattern.
U_CAPI void uregex_refreshUText(URegularExpression *regexp, UText *text, UErrorCode *status)
Set the subject text string upon which the regular expression is looking for matches without changing...
U_CAPI int32_t uregex_getStackLimit(const URegularExpression *regexp, UErrorCode *status)
Get the size of the heap storage available for use by the back tracking stack.
U_CAPI int32_t uregex_replaceFirst(URegularExpression *regexp, const UChar *replacementText, int32_t replacementLength, UChar *destBuf, int32_t destCapacity, UErrorCode *status)
Replaces the first substring of the input that matches the pattern with the given replacement string.
U_CAPI int32_t uregex_regionStart(const URegularExpression *regexp, UErrorCode *status)
Reports the start index of the matching region.
U_CAPI UText * uregex_groupUText(URegularExpression *regexp, int32_t groupNum, UText *dest, int64_t *groupLength, UErrorCode *status)
Returns a shallow immutable clone of the entire input string with the current index set to the beginn...
U_CAPI int32_t uregex_start(URegularExpression *regexp, int32_t groupNum, UErrorCode *status)
Returns the index in the input string of the start of the text matched by the specified capture group...
U_CAPI int64_t uregex_regionEnd64(const URegularExpression *regexp, UErrorCode *status)
64bit version of uregex_regionEnd.
U_CAPI void uregex_setText(URegularExpression *regexp, const UChar *text, int32_t textLength, UErrorCode *status)
Set the subject text string upon which the regular expression will look for matches.
U_CAPI int32_t uregex_replaceAll(URegularExpression *regexp, const UChar *replacementText, int32_t replacementLength, UChar *destBuf, int32_t destCapacity, UErrorCode *status)
Replaces every substring of the input that matches the pattern with the given replacement string.
U_CAPI void uregex_setStackLimit(URegularExpression *regexp, int32_t limit, UErrorCode *status)
Set the amount of heap storage available for use by the match backtracking stack.
U_CAPI void uregex_getMatchCallback(const URegularExpression *regexp, URegexMatchCallback **callback, const void **context, UErrorCode *status)
Get the callback function for this URegularExpression.
U_CAPI void uregex_setRegion(URegularExpression *regexp, int32_t regionStart, int32_t regionLimit, UErrorCode *status)
Sets the limits of the matching region for this URegularExpression.
U_CAPI int32_t uregex_group(URegularExpression *regexp, int32_t groupNum, UChar *dest, int32_t destCapacity, UErrorCode *status)
Extract the string for the specified matching expression or subexpression.
U_CAPI UBool uregex_hasAnchoringBounds(const URegularExpression *regexp, UErrorCode *status)
Return true if this URegularExpression is using anchoring bounds.
U_CAPI void uregex_setTimeLimit(URegularExpression *regexp, int32_t limit, UErrorCode *status)
Set a processing time limit for match operations with this URegularExpression.
U_CAPI UBool uregex_lookingAt64(URegularExpression *regexp, int64_t startIndex, UErrorCode *status)
64bit version of uregex_lookingAt.
U_CAPI UText * uregex_replaceAllUText(URegularExpression *regexp, UText *replacement, UText *dest, UErrorCode *status)
Replaces every substring of the input that matches the pattern with the given replacement string.
U_CAPI void uregex_setFindProgressCallback(URegularExpression *regexp, URegexFindProgressCallback *callback, const void *context, UErrorCode *status)
Set the find progress callback function for this URegularExpression.
U_CAPI URegularExpression * uregex_clone(const URegularExpression *regexp, UErrorCode *status)
Make a copy of a compiled regular expression.
U_CAPI void uregex_setMatchCallback(URegularExpression *regexp, URegexMatchCallback *callback, const void *context, UErrorCode *status)
Set a callback function for this URegularExpression.
U_CAPI int32_t uregex_end(URegularExpression *regexp, int32_t groupNum, UErrorCode *status)
Returns the index in the input string of the position following the end of the text matched by the sp...
U_CAPI void uregex_getFindProgressCallback(const URegularExpression *regexp, URegexFindProgressCallback **callback, const void **context, UErrorCode *status)
Get the find progress callback function for this URegularExpression.
U_CAPI UBool uregex_hasTransparentBounds(const URegularExpression *regexp, UErrorCode *status)
Queries the transparency of region bounds for this URegularExpression.
U_CAPI UBool uregex_hitEnd(const URegularExpression *regexp, UErrorCode *status)
Return true if the most recent matching operation touched the end of the text being processed.
U_CAPI UBool uregex_matches(URegularExpression *regexp, int32_t startIndex, UErrorCode *status)
Attempts to match the input string against the pattern.
U_CAPI int32_t uregex_groupNumberFromName(URegularExpression *regexp, const UChar *groupName, int32_t nameLength, UErrorCode *status)
Get the group number corresponding to a named capture group.
U_CAPI int32_t uregex_splitUText(URegularExpression *regexp, UText *destFields[], int32_t destFieldsCapacity, UErrorCode *status)
Split a string into fields.
C API: Abstract Unicode Text API.
Basic definitions for ICU, for both C and C++ APIs.
UErrorCode
Standard ICU4C error code type, a substitute for exceptions.
Definition: utypes.h:415