ICU 70.1  70.1
utext.h
Go to the documentation of this file.
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 *******************************************************************************
5 *
6 * Copyright (C) 2004-2012, International Business Machines
7 * Corporation and others. All Rights Reserved.
8 *
9 *******************************************************************************
10 * file name: utext.h
11 * encoding: UTF-8
12 * tab size: 8 (not used)
13 * indentation:4
14 *
15 * created on: 2004oct06
16 * created by: Markus W. Scherer
17 */
18 
19 #ifndef __UTEXT_H__
20 #define __UTEXT_H__
21 
140 #include "unicode/utypes.h"
141 #include "unicode/uchar.h"
142 #if U_SHOW_CPLUSPLUS_API
143 #include "unicode/localpointer.h"
144 #include "unicode/rep.h"
145 #include "unicode/unistr.h"
146 #include "unicode/chariter.h"
147 #endif
148 
149 
151 
152 struct UText;
153 typedef struct UText UText;
156 /***************************************************************************************
157  *
158  * C Functions for creating UText wrappers around various kinds of text strings.
159  *
160  ****************************************************************************************/
161 
162 
183 U_CAPI UText * U_EXPORT2
185 
207 U_CAPI UText * U_EXPORT2
208 utext_openUTF8(UText *ut, const char *s, int64_t length, UErrorCode *status);
209 
210 
225 U_CAPI UText * U_EXPORT2
226 utext_openUChars(UText *ut, const UChar *s, int64_t length, UErrorCode *status);
227 
228 
229 #if U_SHOW_CPLUSPLUS_API
242 U_CAPI UText * U_EXPORT2
244 
245 
258 U_CAPI UText * U_EXPORT2
260 
261 
274 U_CAPI UText * U_EXPORT2
276 
289 U_CAPI UText * U_EXPORT2
291 
292 #endif
293 
294 
352 U_CAPI UText * U_EXPORT2
353 utext_clone(UText *dest, const UText *src, UBool deep, UBool readOnly, UErrorCode *status);
354 
355 
367 U_CAPI UBool U_EXPORT2
368 utext_equals(const UText *a, const UText *b);
369 
370 
371 /*****************************************************************************
372  *
373  * Functions to work with the text represented by a UText wrapper
374  *
375  *****************************************************************************/
376 
388 U_CAPI int64_t U_EXPORT2
390 
404 U_CAPI UBool U_EXPORT2
406 
432 U_CAPI UChar32 U_EXPORT2
433 utext_char32At(UText *ut, int64_t nativeIndex);
434 
435 
446 U_CAPI UChar32 U_EXPORT2
448 
449 
468 U_CAPI UChar32 U_EXPORT2
470 
471 
489 U_CAPI UChar32 U_EXPORT2
491 
492 
511 U_CAPI UChar32 U_EXPORT2
512 utext_next32From(UText *ut, int64_t nativeIndex);
513 
514 
515 
531 U_CAPI UChar32 U_EXPORT2
532 utext_previous32From(UText *ut, int64_t nativeIndex);
533 
546 U_CAPI int64_t U_EXPORT2
548 
572 U_CAPI void U_EXPORT2
573 utext_setNativeIndex(UText *ut, int64_t nativeIndex);
574 
591 U_CAPI UBool U_EXPORT2
592 utext_moveIndex32(UText *ut, int32_t delta);
593 
616 U_CAPI int64_t U_EXPORT2
618 
619 
654 U_CAPI int32_t U_EXPORT2
656  int64_t nativeStart, int64_t nativeLimit,
657  UChar *dest, int32_t destCapacity,
658  UErrorCode *status);
659 
660 
661 
662 /************************************************************************************
663  *
664  * #define inline versions of selected performance-critical text access functions
665  * Caution: do not use auto increment++ or decrement-- expressions
666  * as parameters to these macros.
667  *
668  * For most use, where there is no extreme performance constraint, the
669  * normal, non-inline functions are a better choice. The resulting code
670  * will be smaller, and, if the need ever arises, easier to debug.
671  *
672  * These are implemented as #defines rather than real functions
673  * because there is no fully portable way to do inline functions in plain C.
674  *
675  ************************************************************************************/
676 
677 #ifndef U_HIDE_INTERNAL_API
687 #define UTEXT_CURRENT32(ut) \
688  ((ut)->chunkOffset < (ut)->chunkLength && ((ut)->chunkContents)[(ut)->chunkOffset]<0xd800 ? \
689  ((ut)->chunkContents)[((ut)->chunkOffset)] : utext_current32(ut))
690 #endif /* U_HIDE_INTERNAL_API */
691 
703 #define UTEXT_NEXT32(ut) \
704  ((ut)->chunkOffset < (ut)->chunkLength && ((ut)->chunkContents)[(ut)->chunkOffset]<0xd800 ? \
705  ((ut)->chunkContents)[((ut)->chunkOffset)++] : utext_next32(ut))
706 
717 #define UTEXT_PREVIOUS32(ut) \
718  ((ut)->chunkOffset > 0 && \
719  (ut)->chunkContents[(ut)->chunkOffset-1] < 0xd800 ? \
720  (ut)->chunkContents[--((ut)->chunkOffset)] : utext_previous32(ut))
721 
734 #define UTEXT_GETNATIVEINDEX(ut) \
735  ((ut)->chunkOffset <= (ut)->nativeIndexingLimit? \
736  (ut)->chunkNativeStart+(ut)->chunkOffset : \
737  (ut)->pFuncs->mapOffsetToNative(ut))
738 
750 #define UTEXT_SETNATIVEINDEX(ut, ix) UPRV_BLOCK_MACRO_BEGIN { \
751  int64_t __offset = (ix) - (ut)->chunkNativeStart; \
752  if (__offset>=0 && __offset<(int64_t)(ut)->nativeIndexingLimit && (ut)->chunkContents[__offset]<0xdc00) { \
753  (ut)->chunkOffset=(int32_t)__offset; \
754  } else { \
755  utext_setNativeIndex((ut), (ix)); \
756  } \
757 } UPRV_BLOCK_MACRO_END
758 
759 
760 
761 /************************************************************************************
762  *
763  * Functions related to writing or modifying the text.
764  * These will work only with modifiable UTexts. Attempting to
765  * modify a read-only UText will return an error status.
766  *
767  ************************************************************************************/
768 
769 
788 U_CAPI UBool U_EXPORT2
790 
791 
800 U_CAPI UBool U_EXPORT2
802 
803 
831 U_CAPI int32_t U_EXPORT2
833  int64_t nativeStart, int64_t nativeLimit,
834  const UChar *replacementText, int32_t replacementLength,
835  UErrorCode *status);
836 
837 
838 
871 U_CAPI void U_EXPORT2
873  int64_t nativeStart, int64_t nativeLimit,
874  int64_t destIndex,
875  UBool move,
876  UErrorCode *status);
877 
878 
900 U_CAPI void U_EXPORT2
902 
903 
910 enum {
944 };
945 
983 typedef UText * U_CALLCONV
984 UTextClone(UText *dest, const UText *src, UBool deep, UErrorCode *status);
985 
986 
995 typedef int64_t U_CALLCONV
997 
1023 typedef UBool U_CALLCONV
1024 UTextAccess(UText *ut, int64_t nativeIndex, UBool forward);
1025 
1053 typedef int32_t U_CALLCONV
1055  int64_t nativeStart, int64_t nativeLimit,
1056  UChar *dest, int32_t destCapacity,
1057  UErrorCode *status);
1058 
1088 typedef int32_t U_CALLCONV
1090  int64_t nativeStart, int64_t nativeLimit,
1091  const UChar *replacementText, int32_t replacmentLength,
1092  UErrorCode *status);
1093 
1122 typedef void U_CALLCONV
1124  int64_t nativeStart, int64_t nativeLimit,
1125  int64_t nativeDest,
1126  UBool move,
1127  UErrorCode *status);
1128 
1142 typedef int64_t U_CALLCONV
1144 
1160 typedef int32_t U_CALLCONV
1161 UTextMapNativeIndexToUTF16(const UText *ut, int64_t nativeIndex);
1162 
1163 
1181 typedef void U_CALLCONV
1183 
1184 
1194 struct UTextFuncs {
1209  int32_t tableSize;
1210 
1217 
1218 
1226 
1235 
1243 
1251 
1259 
1267 
1275 
1283 
1291 
1297 
1303 
1309 
1310 };
1315 typedef struct UTextFuncs UTextFuncs;
1316 
1328 struct UText {
1341  uint32_t magic;
1342 
1343 
1349  int32_t flags;
1350 
1351 
1358 
1365  int32_t sizeOfStruct;
1366 
1367  /* ------ 16 byte alignment boundary ----------- */
1368 
1369 
1376 
1381  int32_t extraSize;
1382 
1391 
1392  /* ---- 16 byte alignment boundary------ */
1393 
1399 
1405  int32_t chunkOffset;
1406 
1411  int32_t chunkLength;
1412 
1413  /* ---- 16 byte alignment boundary-- */
1414 
1415 
1423 
1429 
1435  void *pExtra;
1436 
1443  const void *context;
1444 
1445  /* --- 16 byte alignment boundary--- */
1446 
1452  const void *p;
1458  const void *q;
1464  const void *r;
1465 
1471  void *privP;
1472 
1473 
1474  /* --- 16 byte alignment boundary--- */
1475 
1476 
1482  int64_t a;
1483 
1489  int32_t b;
1490 
1496  int32_t c;
1497 
1498  /* ---- 16 byte alignment boundary---- */
1499 
1500 
1506  int64_t privA;
1512  int32_t privB;
1518  int32_t privC;
1519 };
1520 
1521 
1538 U_CAPI UText * U_EXPORT2
1539 utext_setup(UText *ut, int32_t extraSpace, UErrorCode *status);
1540 
1541 // do not use #ifndef U_HIDE_INTERNAL_API around the following!
1547 enum {
1548  UTEXT_MAGIC = 0x345ad82c
1549 };
1550 
1558 #define UTEXT_INITIALIZER { \
1559  UTEXT_MAGIC, /* magic */ \
1560  0, /* flags */ \
1561  0, /* providerProps */ \
1562  sizeof(UText), /* sizeOfStruct */ \
1563  0, /* chunkNativeLimit */ \
1564  0, /* extraSize */ \
1565  0, /* nativeIndexingLimit */ \
1566  0, /* chunkNativeStart */ \
1567  0, /* chunkOffset */ \
1568  0, /* chunkLength */ \
1569  NULL, /* chunkContents */ \
1570  NULL, /* pFuncs */ \
1571  NULL, /* pExtra */ \
1572  NULL, /* context */ \
1573  NULL, NULL, NULL, /* p, q, r */ \
1574  NULL, /* privP */ \
1575  0, 0, 0, /* a, b, c */ \
1576  0, 0, 0 /* privA,B,C, */ \
1577  }
1578 
1579 
1581 
1582 
1583 #if U_SHOW_CPLUSPLUS_API
1584 
1585 U_NAMESPACE_BEGIN
1586 
1597 
1598 U_NAMESPACE_END
1599 
1600 #endif
1601 
1602 
1603 #endif
C++ API: Character Iterator.
"Smart pointer" class, closes a UText via utext_close().
Abstract class that defines an API for iteration on text objects.
Definition: chariter.h:361
Replaceable is an abstract base class representing a string of characters that supports the replaceme...
Definition: rep.h:77
UnicodeString is a string class that stores Unicode characters directly and provides similar function...
Definition: unistr.h:296
C++ API: "Smart pointers" for use with and in ICU4C C++ code.
#define U_DEFINE_LOCAL_OPEN_POINTER(LocalPointerClassName, Type, closeFunction)
"Smart pointer" definition macro, deletes objects via the closeFunction.
Definition: localpointer.h:550
#define U_CALLCONV
Similar to U_CDECL_BEGIN/U_CDECL_END, this qualifier is necessary in callback function typedefs to ma...
Definition: platform.h:870
C++ API: Replaceable String.
(public) Function dispatch table for UText.
Definition: utext.h:1194
int32_t reserved1
(private) Alignment padding.
Definition: utext.h:1216
int32_t reserved2
Definition: utext.h:1216
UTextMapOffsetToNative * mapOffsetToNative
(public) Function pointer for UTextMapOffsetToNative.
Definition: utext.h:1274
UTextAccess * access
(public) Function pointer for UTextAccess.
Definition: utext.h:1242
UTextExtract * extract
(public) Function pointer for UTextExtract.
Definition: utext.h:1250
int32_t reserved3
Definition: utext.h:1216
UTextClose * spare3
(private) Spare function pointer
Definition: utext.h:1308
int32_t tableSize
(public) Function table size, sizeof(UTextFuncs) Intended for use should the table grow to accommodat...
Definition: utext.h:1209
UTextNativeLength * nativeLength
(public) function pointer for UTextLength May be expensive to compute!
Definition: utext.h:1234
UTextReplace * replace
(public) Function pointer for UTextReplace.
Definition: utext.h:1258
UTextClose * spare2
(private) Spare function pointer
Definition: utext.h:1302
UTextClose * spare1
(private) Spare function pointer
Definition: utext.h:1296
UTextCopy * copy
(public) Function pointer for UTextCopy.
Definition: utext.h:1266
UTextMapNativeIndexToUTF16 * mapNativeIndexToUTF16
(public) Function pointer for UTextMapNativeIndexToUTF16.
Definition: utext.h:1282
UTextClose * close
(public) Function pointer for UTextClose.
Definition: utext.h:1290
UTextClone * clone
(public) Function pointer for UTextClone
Definition: utext.h:1225
UText struct.
Definition: utext.h:1328
int32_t b
(protected) Integer field reserved for use by the text provider.
Definition: utext.h:1489
int32_t chunkOffset
(protected) Current iteration position within the text chunk (UTF-16 buffer).
Definition: utext.h:1405
const void * p
(protected) Pointer fields available for use by the text provider.
Definition: utext.h:1452
int32_t extraSize
(protected) Size in bytes of the extra space (pExtra).
Definition: utext.h:1381
int64_t privA
Private field reserved for future use by the UText framework itself.
Definition: utext.h:1506
int32_t nativeIndexingLimit
(protected) The highest chunk offset where native indexing and chunk (UTF-16) indexing correspond.
Definition: utext.h:1390
int32_t flags
Definition: utext.h:1349
int32_t sizeOfStruct
(public) sizeOfStruct=sizeof(UText) Allows possible backward compatible extension.
Definition: utext.h:1365
void * pExtra
(protected) Pointer to additional space requested by the text provider during the utext_open operatio...
Definition: utext.h:1435
int64_t chunkNativeLimit
(protected) Native index of the first character position following the current chunk.
Definition: utext.h:1375
const void * r
(protected) Pointer fields available for use by the text provider.
Definition: utext.h:1464
int32_t privB
Private field reserved for future use by the UText framework itself.
Definition: utext.h:1512
uint32_t magic
Definition: utext.h:1341
const UTextFuncs * pFuncs
(public) Pointer to Dispatch table for accessing functions for this UText.
Definition: utext.h:1428
int32_t chunkLength
(protected) Length the text chunk (UTF-16 buffer), in UChars.
Definition: utext.h:1411
int32_t providerProperties
Text provider properties.
Definition: utext.h:1357
const UChar * chunkContents
(protected) pointer to a chunk of text in UTF-16 format.
Definition: utext.h:1422
int32_t privC
Private field reserved for future use by the UText framework itself.
Definition: utext.h:1518
const void * q
(protected) Pointer fields available for use by the text provider.
Definition: utext.h:1458
void * privP
Private field reserved for future use by the UText framework itself.
Definition: utext.h:1471
int64_t chunkNativeStart
(protected) Native index of the first character in the text chunk.
Definition: utext.h:1398
const void * context
(protected) Pointer to string or text-containing object or similar.
Definition: utext.h:1443
int32_t c
(protected) Integer field reserved for use by the text provider.
Definition: utext.h:1496
int64_t a
(protected) Integer field reserved for use by the text provider.
Definition: utext.h:1482
C API: Unicode Properties.
int32_t UChar32
Define UChar32 as a type for single Unicode code points.
Definition: umachine.h:467
#define U_CDECL_END
This is used to end a declaration of a library private ICU C API.
Definition: umachine.h:86
int8_t UBool
The ICU boolean type, a signed-byte integer.
Definition: umachine.h:269
#define U_CAPI
This is used to declare a function as a public ICU C API.
Definition: umachine.h:110
char16_t UChar
The base type for UTF-16 code units and pointers.
Definition: umachine.h:418
#define U_CDECL_BEGIN
This is used to begin a declaration of a library private ICU C API.
Definition: umachine.h:85
C++ API: Unicode String.
int32_t UTextReplace(UText *ut, int64_t nativeStart, int64_t nativeLimit, const UChar *replacementText, int32_t replacmentLength, UErrorCode *status)
Function type declaration for UText.replace().
Definition: utext.h:1089
U_CAPI UText * utext_openUnicodeString(UText *ut, icu::UnicodeString *s, UErrorCode *status)
Open a writable UText for a non-const UnicodeString.
U_CAPI UChar32 utext_previous32From(UText *ut, int64_t nativeIndex)
Set the iteration index, and return the code point preceding the one specified by the initial index.
U_CAPI UChar32 utext_previous32(UText *ut)
Move the iterator position to the character (code point) whose index precedes the current position,...
U_CAPI UBool utext_moveIndex32(UText *ut, int32_t delta)
Move the iterator position by delta code points.
U_CAPI UText * utext_openUChars(UText *ut, const UChar *s, int64_t length, UErrorCode *status)
Open a read-only UText for UChar * string.
int64_t UTextNativeLength(UText *ut)
Function type declaration for UText.nativeLength().
Definition: utext.h:996
U_CAPI UBool utext_hasMetaData(const UText *ut)
Test whether there is meta data associated with the text.
int32_t UTextExtract(UText *ut, int64_t nativeStart, int64_t nativeLimit, UChar *dest, int32_t destCapacity, UErrorCode *status)
Function type declaration for UText.extract().
Definition: utext.h:1054
void UTextClose(UText *ut)
Function type declaration for UText.utextClose().
Definition: utext.h:1182
U_CAPI int32_t utext_extract(UText *ut, int64_t nativeStart, int64_t nativeLimit, UChar *dest, int32_t destCapacity, UErrorCode *status)
Extract text from a UText into a UChar buffer.
U_CAPI UText * utext_openUTF8(UText *ut, const char *s, int64_t length, UErrorCode *status)
Open a read-only UText implementation for UTF-8 strings.
U_CAPI UText * utext_close(UText *ut)
Close function for UText instances.
@ UTEXT_PROVIDER_HAS_META_DATA
There is meta data associated with the text.
Definition: utext.h:935
@ UTEXT_PROVIDER_STABLE_CHUNKS
Text chunks remain valid and usable until the text object is modified or deleted, not just until the ...
Definition: utext.h:922
@ UTEXT_PROVIDER_LENGTH_IS_EXPENSIVE
It is potentially time consuming for the provider to determine the length of the text.
Definition: utext.h:915
@ UTEXT_PROVIDER_OWNS_TEXT
Text provider owns the text storage.
Definition: utext.h:943
@ UTEXT_PROVIDER_WRITABLE
The provider supports modifying the text via the replace() and copy() functions.
Definition: utext.h:929
UBool UTextAccess(UText *ut, int64_t nativeIndex, UBool forward)
Function type declaration for UText.access().
Definition: utext.h:1024
U_CAPI UText * utext_clone(UText *dest, const UText *src, UBool deep, UBool readOnly, UErrorCode *status)
Clone a UText.
int64_t UTextMapOffsetToNative(const UText *ut)
Function type declaration for UText.mapOffsetToNative().
Definition: utext.h:1143
U_CAPI void utext_copy(UText *ut, int64_t nativeStart, int64_t nativeLimit, int64_t destIndex, UBool move, UErrorCode *status)
Copy or move a substring from one position to another within the text, while retaining any metadata a...
int32_t UTextMapNativeIndexToUTF16(const UText *ut, int64_t nativeIndex)
Function type declaration for UText.mapIndexToUTF16().
Definition: utext.h:1161
U_CAPI UText * utext_openCharacterIterator(UText *ut, icu::CharacterIterator *ci, UErrorCode *status)
Open a UText implementation over an ICU CharacterIterator.
U_CAPI UText * utext_setup(UText *ut, int32_t extraSpace, UErrorCode *status)
Common function for use by Text Provider implementations to allocate and/or initialize a new UText st...
U_CAPI UChar32 utext_char32At(UText *ut, int64_t nativeIndex)
Returns the code point at the requested index, or U_SENTINEL (-1) if it is out of bounds.
U_CAPI UBool utext_equals(const UText *a, const UText *b)
Compare two UText objects for equality.
U_CAPI int64_t utext_nativeLength(UText *ut)
Get the length of the text.
U_CAPI UBool utext_isLengthExpensive(const UText *ut)
Return true if calculating the length of the text could be expensive.
void UTextCopy(UText *ut, int64_t nativeStart, int64_t nativeLimit, int64_t nativeDest, UBool move, UErrorCode *status)
Function type declaration for UText.copy().
Definition: utext.h:1123
U_CAPI void utext_freeze(UText *ut)
U_CAPI int64_t utext_getPreviousNativeIndex(UText *ut)
Get the native index of the character preceding the current position.
U_CAPI UBool utext_isWritable(const UText *ut)
Return true if the text can be written (modified) with utext_replace() or utext_copy().
U_CAPI int32_t utext_replace(UText *ut, int64_t nativeStart, int64_t nativeLimit, const UChar *replacementText, int32_t replacementLength, UErrorCode *status)
Replace a range of the original text with a replacement text.
U_CAPI UText * utext_openReplaceable(UText *ut, icu::Replaceable *rep, UErrorCode *status)
Open a writable UText implementation for an ICU Replaceable object.
U_CAPI void utext_setNativeIndex(UText *ut, int64_t nativeIndex)
Set the current iteration position to the nearest code point boundary at or preceding the specified i...
U_CAPI UChar32 utext_next32From(UText *ut, int64_t nativeIndex)
Set the iteration index and return the code point at that index.
UText * UTextClone(UText *dest, const UText *src, UBool deep, UErrorCode *status)
Function type declaration for UText.clone().
Definition: utext.h:984
U_CAPI int64_t utext_getNativeIndex(const UText *ut)
Get the current iterator position, which can range from 0 to the length of the text.
U_CAPI UText * utext_openConstUnicodeString(UText *ut, const icu::UnicodeString *s, UErrorCode *status)
Open a UText for a const UnicodeString.
U_CAPI UChar32 utext_next32(UText *ut)
Get the code point at the current iteration position of the UText, and advance the position to the fi...
U_CAPI UChar32 utext_current32(UText *ut)
Get the code point at the current iteration position, or U_SENTINEL (-1) if the iteration has reached...
Basic definitions for ICU, for both C and C++ APIs.
UErrorCode
Standard ICU4C error code type, a substitute for exceptions.
Definition: utypes.h:415