ICU 74.1 74.1
unorm2.h
Go to the documentation of this file.
1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
3/*
4*******************************************************************************
5*
6* Copyright (C) 2009-2015, International Business Machines
7* Corporation and others. All Rights Reserved.
8*
9*******************************************************************************
10* file name: unorm2.h
11* encoding: UTF-8
12* tab size: 8 (not used)
13* indentation:4
14*
15* created on: 2009dec15
16* created by: Markus W. Scherer
17*/
18
19#ifndef __UNORM2_H__
20#define __UNORM2_H__
21
33#include "unicode/utypes.h"
35#include "unicode/uset.h"
36
37#if U_SHOW_CPLUSPLUS_API
39#endif // U_SHOW_CPLUSPLUS_API
40
48typedef enum {
91
117
122struct UNormalizer2;
125#if !UCONFIG_NO_NORMALIZATION
126
138U_CAPI const UNormalizer2 * U_EXPORT2
140
152U_CAPI const UNormalizer2 * U_EXPORT2
154
166U_CAPI const UNormalizer2 * U_EXPORT2
168
180U_CAPI const UNormalizer2 * U_EXPORT2
182
197U_CAPI const UNormalizer2 * U_EXPORT2
199
200#ifndef U_HIDE_DRAFT_API
215U_CAPI const UNormalizer2 * U_EXPORT2
217#endif // U_HIDE_DRAFT_API
218
240U_CAPI const UNormalizer2 * U_EXPORT2
241unorm2_getInstance(const char *packageName,
242 const char *name,
244 UErrorCode *pErrorCode);
245
261U_CAPI UNormalizer2 * U_EXPORT2
262unorm2_openFiltered(const UNormalizer2 *norm2, const USet *filterSet, UErrorCode *pErrorCode);
263
270U_CAPI void U_EXPORT2
272
273#if U_SHOW_CPLUSPLUS_API
274
275U_NAMESPACE_BEGIN
276
287
288U_NAMESPACE_END
289
290#endif
291
308U_CAPI int32_t U_EXPORT2
310 const UChar *src, int32_t length,
311 UChar *dest, int32_t capacity,
312 UErrorCode *pErrorCode);
331U_CAPI int32_t U_EXPORT2
333 UChar *first, int32_t firstLength, int32_t firstCapacity,
334 const UChar *second, int32_t secondLength,
335 UErrorCode *pErrorCode);
354U_CAPI int32_t U_EXPORT2
356 UChar *first, int32_t firstLength, int32_t firstCapacity,
357 const UChar *second, int32_t secondLength,
358 UErrorCode *pErrorCode);
359
379U_CAPI int32_t U_EXPORT2
381 UChar32 c, UChar *decomposition, int32_t capacity,
382 UErrorCode *pErrorCode);
383
413U_CAPI int32_t U_EXPORT2
415 UChar32 c, UChar *decomposition, int32_t capacity,
416 UErrorCode *pErrorCode);
417
433U_CAPI UChar32 U_EXPORT2
435
445U_CAPI uint8_t U_EXPORT2
447
464U_CAPI UBool U_EXPORT2
466 const UChar *s, int32_t length,
467 UErrorCode *pErrorCode);
468
488 const UChar *s, int32_t length,
489 UErrorCode *pErrorCode);
490
515U_CAPI int32_t U_EXPORT2
517 const UChar *s, int32_t length,
518 UErrorCode *pErrorCode);
519
529U_CAPI UBool U_EXPORT2
531
541U_CAPI UBool U_EXPORT2
543
552U_CAPI UBool U_EXPORT2
554
621U_CAPI int32_t U_EXPORT2
622unorm_compare(const UChar *s1, int32_t length1,
623 const UChar *s2, int32_t length2,
624 uint32_t options,
625 UErrorCode *pErrorCode);
626
627#endif /* !UCONFIG_NO_NORMALIZATION */
628#endif /* __UNORM2_H__ */
"Smart pointer" class, closes a UNormalizer2 via unorm2_close().
C++ API: "Smart pointers" for use with and in ICU4C C++ code.
#define U_DEFINE_LOCAL_OPEN_POINTER(LocalPointerClassName, Type, closeFunction)
"Smart pointer" definition macro, deletes objects via the closeFunction.
Definition: localpointer.h:550
C API: Bit set option bit constants for various string and character processing functions.
int32_t UChar32
Define UChar32 as a type for single Unicode code points.
Definition: umachine.h:435
int8_t UBool
The ICU boolean type, a signed-byte integer.
Definition: umachine.h:247
#define U_CAPI
This is used to declare a function as a public ICU C API.
Definition: umachine.h:110
char16_t UChar
The base type for UTF-16 code units and pointers.
Definition: umachine.h:386
UNormalizationCheckResult
Result values for normalization quick check functions.
Definition: unorm2.h:97
@ UNORM_YES
The input string is in the normalization form.
Definition: unorm2.h:107
@ UNORM_MAYBE
The input string may or may not be in the normalization form.
Definition: unorm2.h:115
@ UNORM_NO
The input string is not in the normalization form.
Definition: unorm2.h:102
U_CAPI int32_t unorm2_getRawDecomposition(const UNormalizer2 *norm2, UChar32 c, UChar *decomposition, int32_t capacity, UErrorCode *pErrorCode)
Gets the raw decomposition mapping of c.
U_CAPI uint8_t unorm2_getCombiningClass(const UNormalizer2 *norm2, UChar32 c)
Gets the combining class of c.
U_CAPI UBool unorm2_isNormalized(const UNormalizer2 *norm2, const UChar *s, int32_t length, UErrorCode *pErrorCode)
Tests if the string is normalized.
U_CAPI int32_t unorm2_normalize(const UNormalizer2 *norm2, const UChar *src, int32_t length, UChar *dest, int32_t capacity, UErrorCode *pErrorCode)
Writes the normalized form of the source string to the destination string (replacing its contents) an...
U_CAPI int32_t unorm2_append(const UNormalizer2 *norm2, UChar *first, int32_t firstLength, int32_t firstCapacity, const UChar *second, int32_t secondLength, UErrorCode *pErrorCode)
Appends the second string to the first string (merging them at the boundary) and returns the length o...
U_CAPI const UNormalizer2 * unorm2_getNFKCCasefoldInstance(UErrorCode *pErrorCode)
Returns a UNormalizer2 instance for Unicode toNFKC_Casefold() normalization which is equivalent to ap...
U_CAPI UBool unorm2_hasBoundaryAfter(const UNormalizer2 *norm2, UChar32 c)
Tests if the character always has a normalization boundary after it, regardless of context.
U_CAPI void unorm2_close(UNormalizer2 *norm2)
Closes a UNormalizer2 instance from unorm2_openFiltered().
U_CAPI const UNormalizer2 * unorm2_getNFDInstance(UErrorCode *pErrorCode)
Returns a UNormalizer2 instance for Unicode NFD normalization.
UNormalization2Mode
Constants for normalization modes.
Definition: unorm2.h:48
@ UNORM2_COMPOSE
Decomposition followed by composition.
Definition: unorm2.h:57
@ UNORM2_FCD
"Fast C or D" form.
Definition: unorm2.h:79
@ UNORM2_COMPOSE_CONTIGUOUS
Compose only contiguously.
Definition: unorm2.h:89
@ UNORM2_DECOMPOSE
Map, and reorder canonically.
Definition: unorm2.h:66
U_CAPI const UNormalizer2 * unorm2_getNFCInstance(UErrorCode *pErrorCode)
Returns a UNormalizer2 instance for Unicode NFC normalization.
U_CAPI const UNormalizer2 * unorm2_getNFKCInstance(UErrorCode *pErrorCode)
Returns a UNormalizer2 instance for Unicode NFKC normalization.
U_CAPI int32_t unorm2_spanQuickCheckYes(const UNormalizer2 *norm2, const UChar *s, int32_t length, UErrorCode *pErrorCode)
Returns the end of the normalized substring of the input string.
U_CAPI UChar32 unorm2_composePair(const UNormalizer2 *norm2, UChar32 a, UChar32 b)
Performs pairwise composition of a & b and returns the composite if there is one.
U_CAPI const UNormalizer2 * unorm2_getNFKCSimpleCasefoldInstance(UErrorCode *pErrorCode)
Returns a UNormalizer2 instance for a variant of Unicode toNFKC_Casefold() normalization which is equ...
U_CAPI UBool unorm2_hasBoundaryBefore(const UNormalizer2 *norm2, UChar32 c)
Tests if the character always has a normalization boundary before it, regardless of context.
U_CAPI const UNormalizer2 * unorm2_getNFKDInstance(UErrorCode *pErrorCode)
Returns a UNormalizer2 instance for Unicode NFKD normalization.
U_CAPI UBool unorm2_isInert(const UNormalizer2 *norm2, UChar32 c)
Tests if the character is normalization-inert.
U_CAPI int32_t unorm2_getDecomposition(const UNormalizer2 *norm2, UChar32 c, UChar *decomposition, int32_t capacity, UErrorCode *pErrorCode)
Gets the decomposition mapping of c.
U_CAPI int32_t unorm2_normalizeSecondAndAppend(const UNormalizer2 *norm2, UChar *first, int32_t firstLength, int32_t firstCapacity, const UChar *second, int32_t secondLength, UErrorCode *pErrorCode)
Appends the normalized form of the second string to the first string (merging them at the boundary) a...
U_CAPI UNormalizationCheckResult unorm2_quickCheck(const UNormalizer2 *norm2, const UChar *s, int32_t length, UErrorCode *pErrorCode)
Tests if the string is normalized.
U_CAPI const UNormalizer2 * unorm2_getInstance(const char *packageName, const char *name, UNormalization2Mode mode, UErrorCode *pErrorCode)
Returns a UNormalizer2 instance which uses the specified data file (packageName/name similar to ucnv_...
U_CAPI int32_t unorm_compare(const UChar *s1, int32_t length1, const UChar *s2, int32_t length2, uint32_t options, UErrorCode *pErrorCode)
Compares two strings for canonical equivalence.
struct UNormalizer2 UNormalizer2
C typedef for struct UNormalizer2.
Definition: unorm2.h:123
U_CAPI UNormalizer2 * unorm2_openFiltered(const UNormalizer2 *norm2, const USet *filterSet, UErrorCode *pErrorCode)
Constructs a filtered normalizer wrapping any UNormalizer2 instance and a filter set.
C API: Unicode Set.
struct USet USet
USet is the C API type corresponding to C++ class UnicodeSet.
Definition: uset.h:50
Basic definitions for ICU, for both C and C++ APIs.
UErrorCode
Standard ICU4C error code type, a substitute for exceptions.
Definition: utypes.h:415