33#if !defined(USET_DEFINED) && !defined(U_IN_DOXYGEN)
64#define U_UNICODE_VERSION "15.1"
158#define UCHAR_MIN_VALUE 0
168#define UCHAR_MAX_VALUE 0x10ffff
174#define U_MASK(x) ((uint32_t)1<<(x))
536#ifndef U_HIDE_DRAFT_API
557#ifndef U_HIDE_DEPRECATED_API
672#ifndef U_HIDE_DRAFT_API
681#ifndef U_HIDE_DEPRECATED_API
700#ifndef U_HIDE_DEPRECATED_API
713#ifndef U_HIDE_DEPRECATED_API
732#ifndef U_HIDE_DEPRECATED_API
758#ifndef U_HIDE_DEPRECATED_API
771#ifndef U_HIDE_DEPRECATED_API
787#ifndef U_HIDE_DRAFT_API
803#ifndef U_HIDE_DEPRECATED_API
915#define U_GC_CN_MASK U_MASK(U_GENERAL_OTHER_TYPES)
918#define U_GC_LU_MASK U_MASK(U_UPPERCASE_LETTER)
920#define U_GC_LL_MASK U_MASK(U_LOWERCASE_LETTER)
922#define U_GC_LT_MASK U_MASK(U_TITLECASE_LETTER)
924#define U_GC_LM_MASK U_MASK(U_MODIFIER_LETTER)
926#define U_GC_LO_MASK U_MASK(U_OTHER_LETTER)
929#define U_GC_MN_MASK U_MASK(U_NON_SPACING_MARK)
931#define U_GC_ME_MASK U_MASK(U_ENCLOSING_MARK)
933#define U_GC_MC_MASK U_MASK(U_COMBINING_SPACING_MARK)
936#define U_GC_ND_MASK U_MASK(U_DECIMAL_DIGIT_NUMBER)
938#define U_GC_NL_MASK U_MASK(U_LETTER_NUMBER)
940#define U_GC_NO_MASK U_MASK(U_OTHER_NUMBER)
943#define U_GC_ZS_MASK U_MASK(U_SPACE_SEPARATOR)
945#define U_GC_ZL_MASK U_MASK(U_LINE_SEPARATOR)
947#define U_GC_ZP_MASK U_MASK(U_PARAGRAPH_SEPARATOR)
950#define U_GC_CC_MASK U_MASK(U_CONTROL_CHAR)
952#define U_GC_CF_MASK U_MASK(U_FORMAT_CHAR)
954#define U_GC_CO_MASK U_MASK(U_PRIVATE_USE_CHAR)
956#define U_GC_CS_MASK U_MASK(U_SURROGATE)
959#define U_GC_PD_MASK U_MASK(U_DASH_PUNCTUATION)
961#define U_GC_PS_MASK U_MASK(U_START_PUNCTUATION)
963#define U_GC_PE_MASK U_MASK(U_END_PUNCTUATION)
965#define U_GC_PC_MASK U_MASK(U_CONNECTOR_PUNCTUATION)
967#define U_GC_PO_MASK U_MASK(U_OTHER_PUNCTUATION)
970#define U_GC_SM_MASK U_MASK(U_MATH_SYMBOL)
972#define U_GC_SC_MASK U_MASK(U_CURRENCY_SYMBOL)
974#define U_GC_SK_MASK U_MASK(U_MODIFIER_SYMBOL)
976#define U_GC_SO_MASK U_MASK(U_OTHER_SYMBOL)
979#define U_GC_PI_MASK U_MASK(U_INITIAL_PUNCTUATION)
981#define U_GC_PF_MASK U_MASK(U_FINAL_PUNCTUATION)
986 (U_GC_LU_MASK|U_GC_LL_MASK|U_GC_LT_MASK|U_GC_LM_MASK|U_GC_LO_MASK)
989#define U_GC_LC_MASK \
990 (U_GC_LU_MASK|U_GC_LL_MASK|U_GC_LT_MASK)
993#define U_GC_M_MASK (U_GC_MN_MASK|U_GC_ME_MASK|U_GC_MC_MASK)
996#define U_GC_N_MASK (U_GC_ND_MASK|U_GC_NL_MASK|U_GC_NO_MASK)
999#define U_GC_Z_MASK (U_GC_ZS_MASK|U_GC_ZL_MASK|U_GC_ZP_MASK)
1002#define U_GC_C_MASK \
1003 (U_GC_CN_MASK|U_GC_CC_MASK|U_GC_CF_MASK|U_GC_CO_MASK|U_GC_CS_MASK)
1006#define U_GC_P_MASK \
1007 (U_GC_PD_MASK|U_GC_PS_MASK|U_GC_PE_MASK|U_GC_PC_MASK|U_GC_PO_MASK| \
1008 U_GC_PI_MASK|U_GC_PF_MASK)
1011#define U_GC_S_MASK (U_GC_SM_MASK|U_GC_SC_MASK|U_GC_SK_MASK|U_GC_SO_MASK)
1071#ifndef U_HIDE_DEPRECATED_API
1101#ifndef U_HIDE_DEPRECATED_API
1955#ifndef U_HIDE_DEPRECATED_API
1992#ifndef U_HIDE_DEPRECATED_API
2017#ifndef U_HIDE_DEPRECATED_API
2029#ifndef U_HIDE_DEPRECATED_API
2052 U_SHORT_PROPERTY_NAME,
2053 U_LONG_PROPERTY_NAME,
2054#ifndef U_HIDE_DEPRECATED_API
2094#ifndef U_HIDE_DEPRECATED_API
2124#ifndef U_HIDE_DEPRECATED_API
2148 U_JG_NO_JOINING_GROUP,
2257#ifndef U_HIDE_DEPRECATED_API
2308#ifndef U_HIDE_DEPRECATED_API
2340 U_WB_EXTENDNUMLET = 7,
2372#ifndef U_HIDE_DEPRECATED_API
2410 U_SB_SCONTINUE = 14,
2411#ifndef U_HIDE_DEPRECATED_API
2437 U_LB_ALPHABETIC = 2,
2438 U_LB_BREAK_BOTH = 3,
2439 U_LB_BREAK_AFTER = 4,
2440 U_LB_BREAK_BEFORE = 5,
2441 U_LB_MANDATORY_BREAK = 6,
2442 U_LB_CONTINGENT_BREAK = 7,
2443 U_LB_CLOSE_PUNCTUATION = 8,
2444 U_LB_COMBINING_MARK = 9,
2445 U_LB_CARRIAGE_RETURN = 10,
2446 U_LB_EXCLAMATION = 11,
2449 U_LB_IDEOGRAPHIC = 14,
2453 U_LB_INFIX_NUMERIC = 16,
2454 U_LB_LINE_FEED = 17,
2455 U_LB_NONSTARTER = 18,
2457 U_LB_OPEN_PUNCTUATION = 20,
2458 U_LB_POSTFIX_NUMERIC = 21,
2459 U_LB_PREFIX_NUMERIC = 22,
2460 U_LB_QUOTATION = 23,
2461 U_LB_COMPLEX_CONTEXT = 24,
2462 U_LB_SURROGATE = 25,
2464 U_LB_BREAK_SYMBOLS = 27,
2504#ifndef U_HIDE_DEPRECATED_API
2532#ifndef U_HIDE_DEPRECATED_API
2556 U_HST_NOT_APPLICABLE,
2559 U_HST_TRAILING_JAMO,
2562#ifndef U_HIDE_DEPRECATED_API
2730#ifndef U_HIDE_DRAFT_API
3074#define U_NO_NUMERIC_VALUE ((double)-123456789.)
3584#define U_GET_GC_MASK(c) U_MASK(u_charType(c))
3628#if !UCONFIG_NO_NORMALIZATION
3714 char *buffer, int32_t bufferLength,
3717#ifndef U_HIDE_DEPRECATED_API
3738 char *dest, int32_t destCapacity,
3849U_CAPI const char* U_EXPORT2
3922U_CAPI const char* U_EXPORT2
3996#ifndef U_HIDE_DRAFT_API
4315#if !UCONFIG_NO_NORMALIZATION
C API: Bit set option bit constants for various string and character processing functions.
U_CAPI UBool u_isupper(UChar32 c)
Determines whether the specified code point has the general category "Lu" (uppercase letter).
U_CAPI UBool u_isUAlphabetic(UChar32 c)
Check if a code point has the Alphabetic Unicode property.
UBlockCode
Constants for Unicode blocks, see the Unicode Data file Blocks.txt.
@ UBLOCK_UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED
@ UBLOCK_COUNT
One more than the highest normal UBlockCode value.
@ UBLOCK_OPTICAL_CHARACTER_RECOGNITION
@ UBLOCK_LATIN_EXTENDED_G
@ UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_B
@ UBLOCK_INSCRIPTIONAL_PAHLAVI
@ UBLOCK_YIJING_HEXAGRAM_SYMBOLS
@ UBLOCK_SUPPLEMENTAL_SYMBOLS_AND_PICTOGRAPHS
@ UBLOCK_ARABIC_EXTENDED_B
@ UBLOCK_HALFWIDTH_AND_FULLWIDTH_FORMS
@ UBLOCK_CJK_SYMBOLS_AND_PUNCTUATION
@ UBLOCK_CURRENCY_SYMBOLS
@ UBLOCK_MISCELLANEOUS_SYMBOLS
@ UBLOCK_ANCIENT_GREEK_NUMBERS
@ UBLOCK_SUTTON_SIGNWRITING
@ UBLOCK_ANATOLIAN_HIEROGLYPHS
@ UBLOCK_SYMBOLS_FOR_LEGACY_COMPUTING
@ UBLOCK_SUPPLEMENTAL_PUNCTUATION
@ UBLOCK_MEROITIC_HIEROGLYPHS
@ UBLOCK_INDIC_SIYAQ_NUMBERS
@ UBLOCK_KAKTOVIK_NUMERALS
@ UBLOCK_COPTIC_EPACT_NUMBERS
@ UBLOCK_SUPERSCRIPTS_AND_SUBSCRIPTS
@ UBLOCK_TAMIL_SUPPLEMENT
@ UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_C
@ UBLOCK_NYIAKENG_PUACHUE_HMONG
@ UBLOCK_PHONETIC_EXTENSIONS_SUPPLEMENT
@ UBLOCK_ETHIOPIC_EXTENDED_B
@ UBLOCK_TRANSPORT_AND_MAP_SYMBOLS
@ UBLOCK_IDEOGRAPHIC_DESCRIPTION_CHARACTERS
@ UBLOCK_SYMBOLS_AND_PICTOGRAPHS_EXTENDED_A
@ UBLOCK_PRIVATE_USE_AREA
Same as UBLOCK_PRIVATE_USE.
@ UBLOCK_SINHALA_ARCHAIC_NUMBERS
@ UBLOCK_MATHEMATICAL_ALPHANUMERIC_SYMBOLS
@ UBLOCK_KHITAN_SMALL_SCRIPT
@ UBLOCK_ENCLOSED_ALPHANUMERIC_SUPPLEMENT
@ UBLOCK_CUNEIFORM_NUMBERS_AND_PUNCTUATION
@ UBLOCK_COMBINING_DIACRITICAL_MARKS_SUPPLEMENT
@ UBLOCK_VARIATION_SELECTORS_SUPPLEMENT
@ UBLOCK_COMBINING_DIACRITICAL_MARKS_EXTENDED
@ UBLOCK_EGYPTIAN_HIEROGLYPHS
@ UBLOCK_TANGUT_COMPONENTS
@ UBLOCK_CHEROKEE_SUPPLEMENT
@ UBLOCK_LATIN_EXTENDED_A
@ UBLOCK_SUNDANESE_SUPPLEMENT
@ UBLOCK_CYRILLIC_EXTENDED_A
@ UBLOCK_HANGUL_JAMO_EXTENDED_B
@ UBLOCK_SUPPLEMENTAL_ARROWS_A
@ UBLOCK_DEVANAGARI_EXTENDED_A
@ UBLOCK_INSCRIPTIONAL_PARTHIAN
@ UBLOCK_ARABIC_EXTENDED_A
@ UBLOCK_CYRILLIC_EXTENDED_C
@ UBLOCK_OTTOMAN_SIYAQ_NUMBERS
@ UBLOCK_LINEAR_B_SYLLABARY
@ UBLOCK_ENCLOSED_CJK_LETTERS_AND_MONTHS
@ UBLOCK_SPACING_MODIFIER_LETTERS
@ UBLOCK_MEROITIC_CURSIVE
@ UBLOCK_CYRILLIC_EXTENDED_B
@ UBLOCK_GENERAL_PUNCTUATION
@ UBLOCK_MONGOLIAN_SUPPLEMENT
@ UBLOCK_MISCELLANEOUS_TECHNICAL
@ UBLOCK_TAI_XUAN_JING_SYMBOLS
@ UBLOCK_CONTROL_PICTURES
@ UBLOCK_GREEK
Unicode 3.2 renames this block to "Greek and Coptic".
@ UBLOCK_COUNTING_ROD_NUMERALS
@ UBLOCK_LATIN_EXTENDED_E
@ UBLOCK_LINEAR_B_IDEOGRAMS
@ UBLOCK_RUMI_NUMERAL_SYMBOLS
@ UBLOCK_HIGH_PRIVATE_USE_SURROGATES
@ UBLOCK_MEETEI_MAYEK_EXTENSIONS
@ UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_H
@ UBLOCK_COMMON_INDIC_NUMBER_FORMS
@ UBLOCK_ZANABAZAR_SQUARE
@ UBLOCK_ARABIC_EXTENDED_C
@ UBLOCK_ENCLOSED_ALPHANUMERICS
@ UBLOCK_COMBINING_HALF_MARKS
@ UBLOCK_GLAGOLITIC_SUPPLEMENT
@ UBLOCK_IMPERIAL_ARAMAIC
@ UBLOCK_BRAILLE_PATTERNS
@ UBLOCK_MATHEMATICAL_OPERATORS
@ UBLOCK_NO_BLOCK
New No_Block value in Unicode 4.
@ UBLOCK_TANGUT_SUPPLEMENT
@ UBLOCK_SMALL_FORM_VARIANTS
@ UBLOCK_GEORGIAN_EXTENDED
@ UBLOCK_MISCELLANEOUS_MATHEMATICAL_SYMBOLS_B
@ UBLOCK_LATIN_EXTENDED_D
@ UBLOCK_LATIN_EXTENDED_ADDITIONAL
@ UBLOCK_EGYPTIAN_HIEROGLYPH_FORMAT_CONTROLS
@ UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_D
@ UBLOCK_SHORTHAND_FORMAT_CONTROLS
@ UBLOCK_COMBINING_MARKS_FOR_SYMBOLS
Unicode 3.2 renames this block to "Combining Diacritical Marks for Symbols".
@ UBLOCK_MISCELLANEOUS_SYMBOLS_AND_PICTOGRAPHS
@ UBLOCK_UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS
@ UBLOCK_KATAKANA_PHONETIC_EXTENSIONS
@ UBLOCK_SUPPLEMENTAL_MATHEMATICAL_OPERATORS
@ UBLOCK_ETHIOPIC_EXTENDED
@ UBLOCK_PRIVATE_USE
Same as UBLOCK_PRIVATE_USE_AREA.
@ UBLOCK_GEORGIAN_SUPPLEMENT
@ UBLOCK_HANGUL_COMPATIBILITY_JAMO
@ UBLOCK_ARABIC_SUPPLEMENT
@ UBLOCK_HANGUL_SYLLABLES
@ UBLOCK_MISCELLANEOUS_SYMBOLS_AND_ARROWS
@ UBLOCK_CJK_COMPATIBILITY
@ UBLOCK_CJK_RADICALS_SUPPLEMENT
@ UBLOCK_ARABIC_PRESENTATION_FORMS_B
@ UBLOCK_ARABIC_PRESENTATION_FORMS_A
@ UBLOCK_ENCLOSED_IDEOGRAPHIC_SUPPLEMENT
@ UBLOCK_BOPOMOFO_EXTENDED
@ UBLOCK_IDEOGRAPHIC_SYMBOLS_AND_PUNCTUATION
@ UBLOCK_LATIN_EXTENDED_F
@ UBLOCK_CYPRIOT_SYLLABARY
@ UBLOCK_ETHIOPIC_SUPPLEMENT
@ UBLOCK_OLD_SOUTH_ARABIAN
@ UBLOCK_SUPPLEMENTARY_PRIVATE_USE_AREA_B
@ UBLOCK_BAMUM_SUPPLEMENT
@ UBLOCK_CYRILLIC_SUPPLEMENT
@ UBLOCK_MYANMAR_EXTENDED_B
@ UBLOCK_ANCIENT_GREEK_MUSICAL_NOTATION
@ UBLOCK_SYRIAC_SUPPLEMENT
@ UBLOCK_ORNAMENTAL_DINGBATS
@ UBLOCK_OLD_NORTH_ARABIAN
@ UBLOCK_DEVANAGARI_EXTENDED
@ UBLOCK_SUPPLEMENTARY_PRIVATE_USE_AREA_A
@ UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_I
@ UBLOCK_ARABIC_MATHEMATICAL_ALPHABETIC_SYMBOLS
@ UBLOCK_SUPPLEMENTAL_ARROWS_C
@ UBLOCK_LATIN_EXTENDED_B
@ UBLOCK_MODIFIER_TONE_LETTERS
@ UBLOCK_CJK_UNIFIED_IDEOGRAPHS
@ UBLOCK_CJK_COMPATIBILITY_FORMS
@ UBLOCK_CJK_COMPATIBILITY_IDEOGRAPHS
@ UBLOCK_VEDIC_EXTENSIONS
@ UBLOCK_CYRILLIC_EXTENDED_D
@ UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A
@ UBLOCK_LATIN_1_SUPPLEMENT
@ UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_G
@ UBLOCK_MISCELLANEOUS_MATHEMATICAL_SYMBOLS_A
@ UBLOCK_CAUCASIAN_ALBANIAN
@ UBLOCK_ALCHEMICAL_SYMBOLS
@ UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_F
@ UBLOCK_MYANMAR_EXTENDED_A
@ UBLOCK_EARLY_DYNASTIC_CUNEIFORM
@ UBLOCK_PHONETIC_EXTENSIONS
@ UBLOCK_GEOMETRIC_SHAPES
@ UBLOCK_LATIN_EXTENDED_C
@ UBLOCK_SUPPLEMENTAL_ARROWS_B
@ UBLOCK_ETHIOPIC_EXTENDED_A
@ UBLOCK_GEOMETRIC_SHAPES_EXTENDED
@ UBLOCK_UNIFIED_CANADIAN_ABORIGINAL_SYLLABICS_EXTENDED_A
@ UBLOCK_ALPHABETIC_PRESENTATION_FORMS
@ UBLOCK_LETTERLIKE_SYMBOLS
@ UBLOCK_ZNAMENNY_MUSICAL_NOTATION
@ UBLOCK_CYRILLIC_SUPPLEMENTARY
Unicode 4.0.1 renames the "Cyrillic Supplementary" block to "Cyrillic Supplement".
@ UBLOCK_HANGUL_JAMO_EXTENDED_A
@ UBLOCK_BYZANTINE_MUSICAL_SYMBOLS
@ UBLOCK_SMALL_KANA_EXTENSION
@ UBLOCK_CJK_COMPATIBILITY_IDEOGRAPHS_SUPPLEMENT
@ UBLOCK_COMBINING_DIACRITICAL_MARKS
@ UBLOCK_CJK_UNIFIED_IDEOGRAPHS_EXTENSION_E
@ UBLOCK_VARIATION_SELECTORS
U_CAPI UBool u_isblank(UChar32 c)
Determines whether the specified code point is a "blank" or "horizontal space", a character that visi...
U_CAPI UChar32 u_charFromName(UCharNameChoice nameChoice, const char *name, UErrorCode *pErrorCode)
Find a Unicode character by its name and return its code point value.
UCharNameChoice
Selector constants for u_charName().
@ U_CHAR_NAME_CHOICE_COUNT
One more than the highest normal UCharNameChoice value.
@ U_UNICODE_10_CHAR_NAME
The Unicode_1_Name property value which is of little practical value.
@ U_CHAR_NAME_ALIAS
Corrected name from NameAliases.txt.
@ U_EXTENDED_CHAR_NAME
Standard or synthetic character name.
@ U_UNICODE_CHAR_NAME
Unicode character name (Name property).
U_CAPI UBool u_isUWhiteSpace(UChar32 c)
Check if a code point has the White_Space Unicode property.
UIdentifierType
Identifier Type constants.
@ U_ID_TYPE_DEFAULT_IGNORABLE
@ U_ID_TYPE_NOT_CHARACTER
U_CAPI int32_t u_charName(UChar32 code, UCharNameChoice nameChoice, char *buffer, int32_t bufferLength, UErrorCode *pErrorCode)
Retrieve the name of a Unicode character.
U_CAPI int32_t u_getIntPropertyValue(UChar32 c, UProperty which)
Get the property value for an enumerated or integer Unicode property for a code point.
U_CAPI bool u_hasIDType(UChar32 c, UIdentifierType type)
Does the set of Identifier_Type values code point c contain the given type?
U_CAPI UBlockCode ublock_getCode(UChar32 c)
Returns the Unicode allocation block that contains the character.
U_CAPI UBool u_hasBinaryProperty(UChar32 c, UProperty which)
Check a binary Unicode property for a code point.
U_CAPI double u_getNumericValue(UChar32 c)
Get the numeric value for a Unicode code point as defined in the Unicode Character Database.
U_CAPI UChar32 u_toupper(UChar32 c)
The given character is mapped to its uppercase equivalent according to UnicodeData....
U_CAPI UProperty u_getPropertyEnum(const char *alias)
Return the UProperty enum for a given property name, as specified in the Unicode database file Proper...
UJoiningType
Joining Type constants.
@ U_JT_COUNT
One more than the highest normal UJoiningType value.
U_CAPI UBool u_isbase(UChar32 c)
Non-standard: Determines whether the specified code point is a base character.
UIdentifierStatus
Identifier Status constants.
U_CAPI UBool u_isdigit(UChar32 c)
Determines whether the specified code point is a digit character according to Java.
U_CAPI UBool u_isMirrored(UChar32 c)
Determines whether the code point has the Bidi_Mirrored property.
U_CAPI void u_charAge(UChar32 c, UVersionInfo versionArray)
Get the "age" of the code point.
U_CAPI UBool u_isspace(UChar32 c)
Determines if the specified character is a space character or not.
U_CAPI int32_t u_getIntPropertyMaxValue(UProperty which)
Get the maximum value for an enumerated/integer/binary Unicode property.
UIndicPositionalCategory
Indic Positional Category constants.
@ U_INPC_VISUAL_ORDER_LEFT
@ U_INPC_TOP_AND_BOTTOM_AND_RIGHT
@ U_INPC_TOP_AND_BOTTOM_AND_LEFT
@ U_INPC_TOP_AND_LEFT_AND_RIGHT
@ U_INPC_BOTTOM_AND_RIGHT
U_CAPI void u_getUnicodeVersion(UVersionInfo versionArray)
Gets the Unicode version information.
UCharDirection
This specifies the language directional property of a character set.
@ U_EUROPEAN_NUMBER_TERMINATOR
ET.
@ U_RIGHT_TO_LEFT_ARABIC
AL.
@ U_POP_DIRECTIONAL_ISOLATE
PDI.
@ U_COMMON_NUMBER_SEPARATOR
CS.
@ U_DIR_NON_SPACING_MARK
NSM.
@ U_FIRST_STRONG_ISOLATE
FSI.
@ U_POP_DIRECTIONAL_FORMAT
PDF.
@ U_CHAR_DIRECTION_COUNT
One more than the highest UCharDirection value.
@ U_WHITE_SPACE_NEUTRAL
WS.
@ U_RIGHT_TO_LEFT_OVERRIDE
RLO.
@ U_RIGHT_TO_LEFT_EMBEDDING
RLE.
@ U_EUROPEAN_NUMBER_SEPARATOR
ES.
@ U_LEFT_TO_RIGHT_ISOLATE
LRI.
@ U_LEFT_TO_RIGHT_OVERRIDE
LRO.
@ U_LEFT_TO_RIGHT_EMBEDDING
LRE.
@ U_RIGHT_TO_LEFT_ISOLATE
RLI.
UPropertyNameChoice
Selector constants for u_getPropertyName() and u_getPropertyValueName().
@ U_PROPERTY_NAME_CHOICE_COUNT
One more than the highest normal UPropertyNameChoice value.
UBool UEnumCharNamesFn(void *context, UChar32 code, UCharNameChoice nameChoice, const char *name, int32_t length)
Type of a callback function for u_enumCharNames() that gets called for each Unicode character with th...
U_CAPI UBool u_isUUppercase(UChar32 c)
Check if a code point has the Uppercase Unicode property.
U_CAPI int8_t u_charType(UChar32 c)
Returns the general category value for the code point.
U_CAPI UChar32 u_forDigit(int32_t digit, int8_t radix)
Determines the character representation for a specific digit in the specified radix.
U_CAPI UBool u_isprint(UChar32 c)
Determines whether the specified code point is a printable character.
ULineBreak
Line Break constants.
@ U_LB_CONDITIONAL_JAPANESE_STARTER
@ U_LB_INSEPARABLE
Renamed from the misspelled "inseperable" in Unicode 4.0.1/ICU 3.0.
@ U_LB_COUNT
One more than the highest normal ULineBreak value.
@ U_LB_REGIONAL_INDICATOR
int32_t u_getISOComment(UChar32 c, char *dest, int32_t destCapacity, UErrorCode *pErrorCode)
Returns an empty string.
U_CAPI UBool u_islower(UChar32 c)
Determines whether the specified code point has the general category "Ll" (lowercase letter).
U_CAPI UBool u_isISOControl(UChar32 c)
Determines whether the specified code point is an ISO control code.
UCharCategory
Data for enumerated Unicode general category types.
@ U_GENERAL_OTHER_TYPES
Cn "Other, Not Assigned (no characters in [UnicodeData.txt] have this property)" (same as U_UNASSIGNE...
@ U_PARAGRAPH_SEPARATOR
Zp.
@ U_INITIAL_PUNCTUATION
Pi.
@ U_UNASSIGNED
Non-category for unassigned and non-character code points.
@ U_COMBINING_SPACING_MARK
Mc.
@ U_CONNECTOR_PUNCTUATION
Pc.
@ U_CHAR_CATEGORY_COUNT
One higher than the last enum UCharCategory constant.
@ U_DECIMAL_DIGIT_NUMBER
Nd.
U_CAPI UBool u_isWhitespace(UChar32 c)
Determines if the specified code point is a whitespace character according to Java/ICU.
UVerticalOrientation
Vertical Orientation constants.
@ U_VO_TRANSFORMED_UPRIGHT
@ U_VO_TRANSFORMED_ROTATED
U_CAPI UBool u_isdefined(UChar32 c)
Determines whether the specified code point is "defined", which usually means that it is assigned a c...
UJoiningGroup
Joining Group constants.
@ U_JG_BURUSHASKI_YEH_BARREE
@ U_JG_HANIFI_ROHINGYA_PA
@ U_JG_MANICHAEAN_THAMEDH
@ U_JG_COUNT
One more than the highest normal UJoiningGroup value.
@ U_JG_MANICHAEAN_DHAMEDH
@ U_JG_MANICHAEAN_HUNDRED
@ U_JG_HANIFI_ROHINGYA_KINNA_YA
UHangulSyllableType
Hangul Syllable Type constants.
@ U_HST_COUNT
One more than the highest normal UHangulSyllableType value.
U_CAPI UChar32 u_getBidiPairedBracket(UChar32 c)
Maps the specified character to its paired bracket character.
U_CAPI UBool u_isIDStart(UChar32 c)
Determines if the specified character is permissible as the first character in an identifier accordin...
USentenceBreak
Sentence Break constants.
@ U_SB_COUNT
One more than the highest normal USentenceBreak value.
U_CAPI int32_t u_getPropertyValueEnum(UProperty property, const char *alias)
Return the property value integer for a given value name, as specified in the Unicode database file P...
UBool UCharEnumTypeRange(const void *context, UChar32 start, UChar32 limit, UCharCategory type)
Callback from u_enumCharTypes(), is called for each contiguous range of code points c (where start<=c...
UEastAsianWidth
East Asian Width constants.
@ U_EA_COUNT
One more than the highest normal UEastAsianWidth value.
U_CAPI UChar32 u_totitle(UChar32 c)
The given character is mapped to its titlecase equivalent according to UnicodeData....
U_CAPI UBool u_isULowercase(UChar32 c)
Check if a code point has the Lowercase Unicode property.
U_CAPI const UCPMap * u_getIntPropertyMap(UProperty property, UErrorCode *pErrorCode)
Returns an immutable UCPMap for an enumerated/catalog/int-valued property.
U_CAPI UBool u_isJavaIDPart(UChar32 c)
Determines if the specified character is permissible in a Java identifier.
U_CAPI UCharDirection u_charDirection(UChar32 c)
Returns the bidirectional category value for the code point, which is used in the Unicode bidirection...
U_CAPI UBool u_istitle(UChar32 c)
Determines whether the specified code point is a titlecase letter.
U_CAPI UBool u_iscntrl(UChar32 c)
Determines whether the specified code point is a control character (as defined by this function).
U_CAPI const char * u_getPropertyValueName(UProperty property, int32_t value, UPropertyNameChoice nameChoice)
Return the Unicode name for a given property value, as given in the Unicode database file PropertyVal...
UGraphemeClusterBreak
Grapheme Cluster Break constants.
@ U_GCB_REGIONAL_INDICATOR
@ U_GCB_COUNT
One more than the highest normal UGraphemeClusterBreak value.
U_CAPI const USet * u_getBinaryPropertySet(UProperty property, UErrorCode *pErrorCode)
Returns a frozen USet for a binary property.
U_CAPI uint8_t u_getCombiningClass(UChar32 c)
Returns the combining class of the code point as specified in UnicodeData.txt.
U_CAPI void u_enumCharTypes(UCharEnumTypeRange *enumRange, const void *context)
Enumerate efficiently all code points with their Unicode general categories.
U_CAPI UBool u_isxdigit(UChar32 c)
Determines whether the specified code point is a hexadecimal digit.
U_CAPI void u_enumCharNames(UChar32 start, UChar32 limit, UEnumCharNamesFn *fn, void *context, UCharNameChoice nameChoice, UErrorCode *pErrorCode)
Enumerate all assigned Unicode characters between the start and limit code points (start inclusive,...
U_CAPI UChar32 u_charMirror(UChar32 c)
Maps the specified character to a "mirror-image" character.
U_CAPI UBool u_isJavaSpaceChar(UChar32 c)
Determine if the specified code point is a space character according to Java.
U_CAPI UBool u_isIDPart(UChar32 c)
Determines if the specified character is permissible as a non-initial character of an identifier acco...
U_CAPI UChar32 u_foldCase(UChar32 c, uint32_t options)
The given character is mapped to its case folding equivalent according to UnicodeData....
U_CAPI UBool u_isgraph(UChar32 c)
Determines whether the specified code point is a "graphic" character (printable, excluding spaces).
U_CAPI int32_t u_charDigitValue(UChar32 c)
Returns the decimal digit value of a decimal digit character.
U_CAPI UBool u_isIDIgnorable(UChar32 c)
Determines if the specified character should be regarded as an ignorable character in an identifier,...
U_CAPI int32_t u_getIDTypes(UChar32 c, UIdentifierType *types, int32_t capacity, UErrorCode *pErrorCode)
Writes code point c's Identifier_Type as a list of UIdentifierType values to the output types array a...
U_CAPI UBool u_ispunct(UChar32 c)
Determines whether the specified code point is a punctuation character.
UNumericType
Numeric Type constants.
@ U_NT_COUNT
One more than the highest normal UNumericType value.
UDecompositionType
Decomposition Type constants.
@ U_DT_COUNT
One more than the highest normal UDecompositionType value.
U_CAPI UBool u_stringHasBinaryProperty(const UChar *s, int32_t length, UProperty which)
Returns true if the property is true for the string.
U_CAPI int32_t u_digit(UChar32 ch, int8_t radix)
Returns the decimal digit value of the code point in the specified radix.
UProperty
Selection constants for Unicode properties.
@ UCHAR_GRAPHEME_CLUSTER_BREAK
Enumerated property Grapheme_Cluster_Break (new in Unicode 4.1).
@ UCHAR_BIDI_PAIRED_BRACKET
String property Bidi_Paired_Bracket (new in Unicode 6.3).
@ UCHAR_ALPHABETIC
Binary property Alphabetic.
@ UCHAR_POSIX_GRAPH
Binary property graph (a C/POSIX character class).
@ UCHAR_RGI_EMOJI_TAG_SEQUENCE
Binary property of strings RGI_Emoji_Tag_Sequence.
@ UCHAR_RGI_EMOJI
Binary property of strings RGI_Emoji.
@ UCHAR_BLOCK
Enumerated property Block.
@ UCHAR_INVALID_CODE
Represents a nonexistent or invalid property or property value.
@ UCHAR_SEGMENT_STARTER
Binary Property Segment_Starter.
@ UCHAR_PREPENDED_CONCATENATION_MARK
Binary property Prepended_Concatenation_Mark.
@ UCHAR_REGIONAL_INDICATOR
Binary property Regional_Indicator.
@ UCHAR_OTHER_PROPERTY_START
First constant for Unicode properties with unusual value types.
@ UCHAR_S_TERM
Binary property STerm (new in Unicode 4.0.1).
@ UCHAR_WHITE_SPACE
Binary property White_Space.
@ UCHAR_CANONICAL_COMBINING_CLASS
Enumerated property Canonical_Combining_Class.
@ UCHAR_SOFT_DOTTED
Binary property Soft_Dotted (new in Unicode 3.2).
@ UCHAR_GRAPHEME_LINK
Binary property Grapheme_Link (new in Unicode 3.2).
@ UCHAR_PATTERN_SYNTAX
Binary property Pattern_Syntax (new in Unicode 4.1).
@ UCHAR_GRAPHEME_EXTEND
Binary property Grapheme_Extend (new in Unicode 3.2).
@ UCHAR_CASED
Binary property Cased.
@ UCHAR_XID_START
Binary property XID_Start.
@ UCHAR_RGI_EMOJI_ZWJ_SEQUENCE
Binary property of strings RGI_Emoji_ZWJ_Sequence.
@ UCHAR_VERTICAL_ORIENTATION
Enumerated property Vertical_Orientation.
@ UCHAR_NFKC_INERT
Binary property NFKC_Inert.
@ UCHAR_INT_LIMIT
One more than the last constant for enumerated/integer Unicode properties.
@ UCHAR_RGI_EMOJI_FLAG_SEQUENCE
Binary property of strings RGI_Emoji_Flag_Sequence.
@ UCHAR_OTHER_PROPERTY_LIMIT
One more than the last constant for Unicode properties with unusual value types.
@ UCHAR_PATTERN_WHITE_SPACE
Binary property Pattern_White_Space (new in Unicode 4.1).
@ UCHAR_VARIATION_SELECTOR
Binary property Variation_Selector (new in Unicode 4.0.1).
@ UCHAR_NUMERIC_VALUE
Double property Numeric_Value.
@ UCHAR_DOUBLE_START
First constant for double Unicode properties.
@ UCHAR_HEX_DIGIT
Binary property Hex_Digit.
@ UCHAR_SIMPLE_TITLECASE_MAPPING
String property Simple_Titlecase_Mapping.
@ UCHAR_TRAIL_CANONICAL_COMBINING_CLASS
Enumerated property Trail_Canonical_Combining_Class.
@ UCHAR_DIACRITIC
Binary property Diacritic.
@ UCHAR_XID_CONTINUE
Binary property XID_Continue.
@ UCHAR_NFKC_QUICK_CHECK
Enumerated property NFKC_Quick_Check.
@ UCHAR_HYPHEN
Binary property Hyphen.
@ UCHAR_RADICAL
Binary property Radical (new in Unicode 3.2).
@ UCHAR_BIDI_PAIRED_BRACKET_TYPE
Enumerated property Bidi_Paired_Bracket_Type (new in Unicode 6.3).
@ UCHAR_ID_COMPAT_MATH_CONTINUE
Binary property ID_Compat_Math_Continue.
@ UCHAR_DASH
Binary property Dash.
@ UCHAR_ASCII_HEX_DIGIT
Binary property ASCII_Hex_Digit.
@ UCHAR_ID_CONTINUE
Binary property ID_Continue.
@ UCHAR_NFKD_INERT
Binary property NFKD_Inert.
@ UCHAR_BIDI_MIRRORING_GLYPH
String property Bidi_Mirroring_Glyph.
@ UCHAR_LEAD_CANONICAL_COMBINING_CLASS
Enumerated property Lead_Canonical_Combining_Class.
@ UCHAR_UNICODE_1_NAME
String property Unicode_1_Name.
@ UCHAR_BIDI_MIRRORED
Binary property Bidi_Mirrored.
@ UCHAR_CASE_IGNORABLE
Binary property Case_Ignorable.
@ UCHAR_EAST_ASIAN_WIDTH
Enumerated property East_Asian_Width.
@ UCHAR_STRING_LIMIT
One more than the last constant for string Unicode properties.
@ UCHAR_IDENTIFIER_TYPE
Miscellaneous property Identifier_Type.
@ UCHAR_EMOJI_PRESENTATION
Binary property Emoji_Presentation.
@ UCHAR_GRAPHEME_BASE
Binary property Grapheme_Base (new in Unicode 3.2).
@ UCHAR_NFKD_QUICK_CHECK
Enumerated property NFKD_Quick_Check.
@ UCHAR_NAME
String property Name.
@ UCHAR_UPPERCASE
Binary property Uppercase.
@ UCHAR_CASE_FOLDING
String property Case_Folding.
@ UCHAR_INDIC_SYLLABIC_CATEGORY
Enumerated property Indic_Syllabic_Category.
@ UCHAR_MATH
Binary property Math.
@ UCHAR_NUMERIC_TYPE
Enumerated property Numeric_Type.
@ UCHAR_RGI_EMOJI_MODIFIER_SEQUENCE
Binary property of strings RGI_Emoji_Modifier_Sequence.
@ UCHAR_ISO_COMMENT
Deprecated string property ISO_Comment.
@ UCHAR_SCRIPT
Enumerated property Script.
@ UCHAR_CHANGES_WHEN_TITLECASED
Binary property Changes_When_Titlecased.
@ UCHAR_EXTENDED_PICTOGRAPHIC
Binary property Extended_Pictographic.
@ UCHAR_DEPRECATED
Binary property Deprecated (new in Unicode 3.2).
@ UCHAR_CHANGES_WHEN_UPPERCASED
Binary property Changes_When_Uppercased.
@ UCHAR_CHANGES_WHEN_CASEFOLDED
Binary property Changes_When_Casefolded.
@ UCHAR_INDIC_POSITIONAL_CATEGORY
Enumerated property Indic_Positional_Category.
@ UCHAR_DEFAULT_IGNORABLE_CODE_POINT
Binary property Default_Ignorable_Code_Point (new in Unicode 3.2).
@ UCHAR_GENERAL_CATEGORY
Enumerated property General_Category.
@ UCHAR_INT_START
First constant for enumerated/integer Unicode properties.
@ UCHAR_EMOJI_MODIFIER
Binary property Emoji_Modifier.
@ UCHAR_NFD_QUICK_CHECK
Enumerated property NFD_Quick_Check.
@ UCHAR_IDS_BINARY_OPERATOR
Binary property IDS_Binary_Operator (new in Unicode 3.2).
@ UCHAR_BINARY_START
First constant for binary Unicode properties.
@ UCHAR_IDS_UNARY_OPERATOR
Binary property IDS_Unary_Operator.
@ UCHAR_TERMINAL_PUNCTUATION
Binary property Terminal_Punctuation.
@ UCHAR_GENERAL_CATEGORY_MASK
Bitmask property General_Category_Mask.
@ UCHAR_MASK_START
First constant for bit-mask Unicode properties.
@ UCHAR_DECOMPOSITION_TYPE
Enumerated property Decomposition_Type.
@ UCHAR_TITLECASE_MAPPING
String property Titlecase_Mapping.
@ UCHAR_HANGUL_SYLLABLE_TYPE
Enumerated property Hangul_Syllable_Type, new in Unicode 4.
@ UCHAR_LINE_BREAK
Enumerated property Line_Break.
@ UCHAR_SIMPLE_UPPERCASE_MAPPING
String property Simple_Uppercase_Mapping.
@ UCHAR_POSIX_ALNUM
Binary property alnum (a C/POSIX character class).
@ UCHAR_JOINING_TYPE
Enumerated property Joining_Type.
@ UCHAR_EMOJI_KEYCAP_SEQUENCE
Binary property of strings Emoji_Keycap_Sequence.
@ UCHAR_QUOTATION_MARK
Binary property Quotation_Mark.
@ UCHAR_NFC_INERT
Binary property NFC_Inert.
@ UCHAR_LOWERCASE_MAPPING
String property Lowercase_Mapping.
@ UCHAR_SIMPLE_CASE_FOLDING
String property Simple_Case_Folding.
@ UCHAR_JOIN_CONTROL
Binary property Join_Control.
@ UCHAR_NONCHARACTER_CODE_POINT
Binary property Noncharacter_Code_Point.
@ UCHAR_BIDI_CONTROL
Binary property Bidi_Control.
@ UCHAR_CHANGES_WHEN_LOWERCASED
Binary property Changes_When_Lowercased.
@ UCHAR_BINARY_LIMIT
One more than the last constant for binary Unicode properties.
@ UCHAR_IDS_TRINARY_OPERATOR
Binary property IDS_Trinary_Operator (new in Unicode 3.2).
@ UCHAR_ID_START
Binary property ID_Start.
@ UCHAR_AGE
String property Age.
@ UCHAR_WORD_BREAK
Enumerated property Word_Break (new in Unicode 4.1).
@ UCHAR_DOUBLE_LIMIT
One more than the last constant for double Unicode properties.
@ UCHAR_EMOJI_MODIFIER_BASE
Binary property Emoji_Modifier_Base.
@ UCHAR_EMOJI_COMPONENT
Binary property Emoji_Component.
@ UCHAR_POSIX_BLANK
Binary property blank (a C/POSIX character class).
@ UCHAR_SIMPLE_LOWERCASE_MAPPING
String property Simple_Lowercase_Mapping.
@ UCHAR_NFD_INERT
Binary property NFD_Inert.
@ UCHAR_POSIX_PRINT
Binary property print (a C/POSIX character class).
@ UCHAR_SENTENCE_BREAK
Enumerated property Sentence_Break (new in Unicode 4.1).
@ UCHAR_IDEOGRAPHIC
Binary property Ideographic.
@ UCHAR_ID_COMPAT_MATH_START
Binary property ID_Compat_Math_Start.
@ UCHAR_UNIFIED_IDEOGRAPH
Binary property Unified_Ideograph (new in Unicode 3.2).
@ UCHAR_CHANGES_WHEN_NFKC_CASEFOLDED
Binary property Changes_When_NFKC_Casefolded.
@ UCHAR_NFC_QUICK_CHECK
Enumerated property NFC_Quick_Check.
@ UCHAR_CASE_SENSITIVE
Binary property Case_Sensitive.
@ UCHAR_UPPERCASE_MAPPING
String property Uppercase_Mapping.
@ UCHAR_BIDI_CLASS
Enumerated property Bidi_Class.
@ UCHAR_BASIC_EMOJI
Binary property of strings Basic_Emoji.
@ UCHAR_MASK_LIMIT
One more than the last constant for bit-mask Unicode properties.
@ UCHAR_IDENTIFIER_STATUS
Enumerated property Identifier_Status.
@ UCHAR_JOINING_GROUP
Enumerated property Joining_Group.
@ UCHAR_LOGICAL_ORDER_EXCEPTION
Binary property Logical_Order_Exception (new in Unicode 3.2).
@ UCHAR_EXTENDER
Binary property Extender.
@ UCHAR_STRING_START
First constant for string Unicode properties.
@ UCHAR_SCRIPT_EXTENSIONS
Miscellaneous property Script_Extensions (new in Unicode 6.0).
@ UCHAR_FULL_COMPOSITION_EXCLUSION
Binary property Full_Composition_Exclusion.
@ UCHAR_EMOJI
Binary property Emoji.
@ UCHAR_LOWERCASE
Binary property Lowercase.
@ UCHAR_CHANGES_WHEN_CASEMAPPED
Binary property Changes_When_Casemapped.
@ UCHAR_POSIX_XDIGIT
Binary property xdigit (a C/POSIX character class).
U_CAPI int32_t u_getIntPropertyMinValue(UProperty which)
Get the minimum value for an enumerated/integer/binary Unicode property.
U_CAPI UBool u_isalnum(UChar32 c)
Determines whether the specified code point is an alphanumeric character (letter or digit) according ...
U_CAPI UBool u_isalpha(UChar32 c)
Determines whether the specified code point is a letter character.
U_CAPI UChar32 u_tolower(UChar32 c)
The given character is mapped to its lowercase equivalent according to UnicodeData....
U_CAPI int32_t u_getFC_NFKC_Closure(UChar32 c, UChar *dest, int32_t destCapacity, UErrorCode *pErrorCode)
Get the FC_NFKC_Closure property string for a character.
UIndicSyllabicCategory
Indic Syllabic Category constants.
@ U_INSC_INVISIBLE_STACKER
@ U_INSC_SYLLABLE_MODIFIER
@ U_INSC_MODIFYING_LETTER
@ U_INSC_REGISTER_SHIFTER
@ U_INSC_CONSONANT_KILLER
@ U_INSC_CONSONANT_HEAD_LETTER
@ U_INSC_BRAHMI_JOINING_NUMBER
@ U_INSC_CANTILLATION_MARK
@ U_INSC_VOWEL_INDEPENDENT
@ U_INSC_CONSONANT_MEDIAL
@ U_INSC_CONSONANT_WITH_STACKER
@ U_INSC_CONSONANT_PREFIXED
@ U_INSC_CONSONANT_SUCCEEDING_REPHA
@ U_INSC_CONSONANT_PLACEHOLDER
@ U_INSC_CONSONANT_PRECEDING_REPHA
@ U_INSC_CONSONANT_SUBJOINED
@ U_INSC_CONSONANT_INITIAL_POSTFIXED
UWordBreakValues
Word Break constants.
@ U_WB_REGIONAL_INDICATOR
@ U_WB_COUNT
One more than the highest normal UWordBreakValues value.
U_CAPI UBool u_isJavaIDStart(UChar32 c)
Determines if the specified character is permissible as the first character in a Java identifier.
UBidiPairedBracketType
Bidi Paired Bracket Type constants.
@ U_BPT_CLOSE
Close paired bracket.
@ U_BPT_COUNT
One more than the highest normal UBidiPairedBracketType value.
@ U_BPT_NONE
Not a paired bracket.
@ U_BPT_OPEN
Open paired bracket.
U_CAPI const char * u_getPropertyName(UProperty property, UPropertyNameChoice nameChoice)
Return the Unicode name for a given property, as given in the Unicode database file PropertyAliases....
C API: This file defines an abstract map from Unicode code points to integer values.
struct UCPMap UCPMap
Abstract map from Unicode code points (U+0000..U+10FFFF) to integer values.
int32_t UChar32
Define UChar32 as a type for single Unicode code points.
#define U_DEPRECATED
This is used to declare a function as a deprecated public ICU C API
#define U_CDECL_END
This is used to end a declaration of a library private ICU C API.
int8_t UBool
The ICU boolean type, a signed-byte integer.
#define U_CAPI
This is used to declare a function as a public ICU C API.
char16_t UChar
The base type for UTF-16 code units and pointers.
#define U_CDECL_BEGIN
This is used to begin a declaration of a library private ICU C API.
struct USet USet
USet is the C API type corresponding to C++ class UnicodeSet.
Basic definitions for ICU, for both C and C++ APIs.
UErrorCode
Standard ICU4C error code type, a substitute for exceptions.
uint8_t UVersionInfo[U_MAX_VERSION_LENGTH]
The binary form of a version on ICU APIs is an array of 4 uint8_t.