ICU 77.1  77.1
All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
uspoof.h
Go to the documentation of this file.
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4 ***************************************************************************
5 * Copyright (C) 2008-2016, International Business Machines Corporation
6 * and others. All Rights Reserved.
7 ***************************************************************************
8 * file name: uspoof.h
9 * encoding: UTF-8
10 * tab size: 8 (not used)
11 * indentation:4
12 *
13 * created on: 2008Feb13
14 * created by: Andy Heninger
15 *
16 * Unicode Spoof Detection
17 */
18 
19 #ifndef USPOOF_H
20 #define USPOOF_H
21 
22 #include "unicode/ubidi.h"
23 #include "unicode/utypes.h"
24 #include "unicode/uset.h"
25 #include "unicode/parseerr.h"
26 
27 #if !UCONFIG_NO_NORMALIZATION
28 
29 
30 #if U_SHOW_CPLUSPLUS_API
31 #include "unicode/localpointer.h"
32 #include "unicode/unistr.h"
33 #include "unicode/uniset.h"
34 #endif
35 
36 
422 
423 struct USpoofChecker;
427 typedef struct USpoofChecker USpoofChecker;
429 struct USpoofCheckResult;
434 typedef struct USpoofCheckResult USpoofCheckResult;
435 
443 typedef enum USpoofChecks {
453 
463 
473 
484 
485 #ifndef U_HIDE_DEPRECATED_API
492 #endif /* U_HIDE_DEPRECATED_API */
493 
508 
509 #ifndef U_HIDE_DEPRECATED_API
516 #endif /* U_HIDE_DEPRECATED_API */
517 
525 
532 
540 
561 
568 
581  USPOOF_AUX_INFO = 0x40000000
582 
584 
585 
595  typedef enum URestrictionLevel {
602  USPOOF_ASCII = 0x10000000,
643  USPOOF_UNRESTRICTIVE = 0x60000000,
650 #ifndef U_HIDE_INTERNAL_API
656 #endif /* U_HIDE_INTERNAL_API */
658 
669 U_CAPI USpoofChecker * U_EXPORT2
671 
672 
694 U_CAPI USpoofChecker * U_EXPORT2
695 uspoof_openFromSerialized(const void *data, int32_t length, int32_t *pActualLength,
696  UErrorCode *pErrorCode);
697 
728 U_CAPI USpoofChecker * U_EXPORT2
729 uspoof_openFromSource(const char *confusables, int32_t confusablesLen,
730  const char *confusablesWholeScript, int32_t confusablesWholeScriptLen,
731  int32_t *errType, UParseError *pe, UErrorCode *status);
732 
733 
739 U_CAPI void U_EXPORT2
741 
751 U_CAPI USpoofChecker * U_EXPORT2
753 
754 
795 U_CAPI void U_EXPORT2
796 uspoof_setChecks(USpoofChecker *sc, int32_t checks, UErrorCode *status);
797 
809 U_CAPI int32_t U_EXPORT2
811 
823 U_CAPI void U_EXPORT2
825 
826 
834 U_CAPI URestrictionLevel U_EXPORT2
836 
879 U_CAPI void U_EXPORT2
880 uspoof_setAllowedLocales(USpoofChecker *sc, const char *localesList, UErrorCode *status);
881 
903 U_CAPI const char * U_EXPORT2
905 
906 
925 U_CAPI void U_EXPORT2
927 
928 
949 U_CAPI const USet * U_EXPORT2
951 
952 
985 U_CAPI int32_t U_EXPORT2
987  const UChar *id, int32_t length,
988  int32_t *position,
989  UErrorCode *status);
990 
991 
1024 U_CAPI int32_t U_EXPORT2
1026  const char *id, int32_t length,
1027  int32_t *position,
1028  UErrorCode *status);
1029 
1030 
1059 U_CAPI int32_t U_EXPORT2
1061  const UChar* id, int32_t length,
1062  USpoofCheckResult* checkResult,
1063  UErrorCode *status);
1064 
1096 U_CAPI int32_t U_EXPORT2
1098  const char *id, int32_t length,
1099  USpoofCheckResult* checkResult,
1100  UErrorCode *status);
1101 
1120 U_CAPI USpoofCheckResult* U_EXPORT2
1122 
1130 U_CAPI void U_EXPORT2
1132 
1147 U_CAPI int32_t U_EXPORT2
1149 
1160 U_CAPI URestrictionLevel U_EXPORT2
1162 
1174 U_CAPI const USet* U_EXPORT2
1176 
1177 
1221 U_CAPI int32_t U_EXPORT2
1223  const UChar *id1, int32_t length1,
1224  const UChar *id2, int32_t length2,
1225  UErrorCode *status);
1226 
1273 U_CAPI uint32_t U_EXPORT2 uspoof_areBidiConfusable(const USpoofChecker *sc, UBiDiDirection direction,
1274  const UChar *id1, int32_t length1,
1275  const UChar *id2, int32_t length2,
1276  UErrorCode *status);
1277 
1303 U_CAPI int32_t U_EXPORT2
1305  const char *id1, int32_t length1,
1306  const char *id2, int32_t length2,
1307  UErrorCode *status);
1308 
1336 U_CAPI uint32_t U_EXPORT2 uspoof_areBidiConfusableUTF8(const USpoofChecker *sc, UBiDiDirection direction,
1337  const char *id1, int32_t length1,
1338  const char *id2, int32_t length2,
1339  UErrorCode *status);
1340 
1372 U_CAPI int32_t U_EXPORT2
1374  uint32_t type,
1375  const UChar *id, int32_t length,
1376  UChar *dest, int32_t destCapacity,
1377  UErrorCode *status);
1378 
1411 U_CAPI int32_t U_EXPORT2 uspoof_getBidiSkeleton(const USpoofChecker *sc,
1412  UBiDiDirection direction,
1413  const UChar *id, int32_t length,
1414  UChar *dest, int32_t destCapacity, UErrorCode *status);
1415 
1449 U_CAPI int32_t U_EXPORT2
1451  uint32_t type,
1452  const char *id, int32_t length,
1453  char *dest, int32_t destCapacity,
1454  UErrorCode *status);
1455 
1490 U_CAPI int32_t U_EXPORT2 uspoof_getBidiSkeletonUTF8(const USpoofChecker *sc, UBiDiDirection direction,
1491  const char *id, int32_t length, char *dest,
1492  int32_t destCapacity, UErrorCode *status);
1493 
1506 U_CAPI const USet * U_EXPORT2
1508 
1521 U_CAPI const USet * U_EXPORT2
1523 
1546 U_CAPI int32_t U_EXPORT2
1548  void *data, int32_t capacity,
1549  UErrorCode *status);
1550 
1552 
1553 #if U_SHOW_CPLUSPLUS_API
1554 
1555 U_NAMESPACE_BEGIN
1556 
1592 U_NAMESPACE_END
1593 
1612 U_CAPI void U_EXPORT2
1614 
1615 
1636 U_CAPI const icu::UnicodeSet * U_EXPORT2
1638 
1667 U_CAPI int32_t U_EXPORT2
1669  const icu::UnicodeString &id,
1670  int32_t *position,
1671  UErrorCode *status);
1672 
1698 U_CAPI int32_t U_EXPORT2
1700  const icu::UnicodeString &id,
1701  USpoofCheckResult* checkResult,
1702  UErrorCode *status);
1703 
1725 U_CAPI int32_t U_EXPORT2
1727  const icu::UnicodeString &s1,
1728  const icu::UnicodeString &s2,
1729  UErrorCode *status);
1730 
1754 U_CAPI uint32_t U_EXPORT2 uspoof_areBidiConfusableUnicodeString(const USpoofChecker *sc,
1755  UBiDiDirection direction,
1756  const icu::UnicodeString &s1,
1757  const icu::UnicodeString &s2,
1758  UErrorCode *status);
1759 
1783 U_I18N_API icu::UnicodeString & U_EXPORT2
1785  uint32_t type,
1786  const icu::UnicodeString &id,
1787  icu::UnicodeString &dest,
1788  UErrorCode *status);
1789 
1815  const USpoofChecker *sc, UBiDiDirection direction, const icu::UnicodeString &id,
1816  icu::UnicodeString &dest, UErrorCode *status);
1817 
1830 U_CAPI const icu::UnicodeSet * U_EXPORT2
1832 
1845 U_CAPI const icu::UnicodeSet * U_EXPORT2
1847 
1848 #endif /* U_SHOW_CPLUSPLUS_API */
1849 
1850 #endif /* UCONFIG_NO_NORMALIZATION */
1851 
1852 #endif /* USPOOF_H */
"Smart pointer" class, closes a USpoofCheckResult via uspoof_closeCheckResult().
"Smart pointer" class, closes a USpoofChecker via uspoof_close().
A mutable set of Unicode characters and multicharacter strings.
Definition: uniset.h:285
UnicodeString is a string class that stores Unicode characters directly and provides similar function...
Definition: unistr.h:296
C++ API: "Smart pointers" for use with and in ICU4C C++ code.
#define U_DEFINE_LOCAL_OPEN_POINTER(LocalPointerClassName, Type, closeFunction)
"Smart pointer" definition macro, deletes objects via the closeFunction.
Definition: localpointer.h:550
C API: Parse Error Information.
A UParseError struct is used to returned detailed information about parsing errors.
Definition: parseerr.h:58
C API: Bidi algorithm.
UBiDiDirection
UBiDiDirection values indicate the text direction.
Definition: ubidi.h:429
#define U_CDECL_END
This is used to end a declaration of a library private ICU C API.
Definition: umachine.h:86
#define U_CAPI
This is used to declare a function as a public ICU C API.
Definition: umachine.h:110
char16_t UChar
The base type for UTF-16 code units and pointers.
Definition: umachine.h:378
#define U_CDECL_BEGIN
This is used to begin a declaration of a library private ICU C API.
Definition: umachine.h:85
C++ API: Unicode Set.
C++ API: Unicode String.
C API: Unicode Set.
struct USet USet
USet is the C API type corresponding to C++ class UnicodeSet.
Definition: uset.h:54
U_CAPI void U_EXPORT2 uspoof_setRestrictionLevel(USpoofChecker *sc, URestrictionLevel restrictionLevel)
Set the loosest restriction level allowed for strings.
U_CAPI const icu::UnicodeSet *U_EXPORT2 uspoof_getInclusionUnicodeSet(UErrorCode *status)
Get the set of Candidate Characters for Inclusion in Identifiers, as defined in http://unicode....
USpoofChecks
Enum for the kinds of checks that USpoofChecker can perform.
Definition: uspoof.h:443
@ USPOOF_MIXED_NUMBERS
Check that an identifier does not mix numbers from different numbering systems.
Definition: uspoof.h:539
@ USPOOF_RESTRICTION_LEVEL
Check that an identifier is no looser than the specified RestrictionLevel.
Definition: uspoof.h:507
@ USPOOF_CHAR_LIMIT
Check that an identifier contains only characters from a specified set of acceptable characters.
Definition: uspoof.h:531
@ USPOOF_CONFUSABLE
Enable this flag in uspoof_setChecks to turn on all types of confusables.
Definition: uspoof.h:483
@ USPOOF_ALL_CHECKS
Enable all spoof checks.
Definition: uspoof.h:567
@ USPOOF_SINGLE_SCRIPT
Check that an identifier contains only characters from a single script (plus chars from the common an...
Definition: uspoof.h:515
@ USPOOF_HIDDEN_OVERLAY
Check that an identifier does not have a combining character following a character in which that comb...
Definition: uspoof.h:560
@ USPOOF_INVISIBLE
Check an identifier for the presence of invisible characters, such as zero-width spaces,...
Definition: uspoof.h:524
@ USPOOF_MIXED_SCRIPT_CONFUSABLE
When performing the two-string uspoof_areConfusable test, this flag in the return value indicates tha...
Definition: uspoof.h:462
@ USPOOF_AUX_INFO
Enable the return of auxiliary (non-error) information in the upper bits of the check results value.
Definition: uspoof.h:581
@ USPOOF_WHOLE_SCRIPT_CONFUSABLE
When performing the two-string uspoof_areConfusable test, this flag in the return value indicates tha...
Definition: uspoof.h:472
@ USPOOF_SINGLE_SCRIPT_CONFUSABLE
When performing the two-string uspoof_areConfusable test, this flag in the return value indicates tha...
Definition: uspoof.h:452
@ USPOOF_ANY_CASE
This flag is deprecated and no longer affects the behavior of SpoofChecker.
Definition: uspoof.h:491
U_CAPI int32_t U_EXPORT2 uspoof_getCheckResultChecks(const USpoofCheckResult *checkResult, UErrorCode *status)
Indicates which of the spoof check(s) have failed.
U_CAPI int32_t U_EXPORT2 uspoof_checkUnicodeString(const USpoofChecker *sc, const icu::UnicodeString &id, int32_t *position, UErrorCode *status)
Check the specified string for possible security issues.
U_CAPI int32_t U_EXPORT2 uspoof_getSkeleton(const USpoofChecker *sc, uint32_t type, const UChar *id, int32_t length, UChar *dest, int32_t destCapacity, UErrorCode *status)
Get the "skeleton" for an identifier.
U_CAPI void U_EXPORT2 uspoof_setAllowedChars(USpoofChecker *sc, const USet *chars, UErrorCode *status)
Limit the acceptable characters to those specified by a Unicode Set.
U_CAPI const USet *U_EXPORT2 uspoof_getCheckResultNumerics(const USpoofCheckResult *checkResult, UErrorCode *status)
Gets the set of numerics found in the string, if the USPOOF_MIXED_NUMBERS check was enabled; otherwis...
U_NAMESPACE_END U_CAPI void U_EXPORT2 uspoof_setAllowedUnicodeSet(USpoofChecker *sc, const icu::UnicodeSet *chars, UErrorCode *status)
Limit the acceptable characters to those specified by a Unicode Set.
U_CAPI int32_t U_EXPORT2 uspoof_getBidiSkeletonUTF8(const USpoofChecker *sc, UBiDiDirection direction, const char *id, int32_t length, char *dest, int32_t destCapacity, UErrorCode *status)
Get the "bidiSkeleton" for an identifier and a direction.
U_CAPI const USet *U_EXPORT2 uspoof_getAllowedChars(const USpoofChecker *sc, UErrorCode *status)
Get a USet for the characters permitted in an identifier.
U_CAPI uint32_t U_EXPORT2 uspoof_areBidiConfusable(const USpoofChecker *sc, UBiDiDirection direction, const UChar *id1, int32_t length1, const UChar *id2, int32_t length2, UErrorCode *status)
Check whether two specified strings are visually confusable when displayed in a context with the give...
U_CAPI USpoofChecker * uspoof_clone(const USpoofChecker *sc, UErrorCode *status)
Clone a Spoof Checker.
URestrictionLevel
Constants from UTS #39 for use in uspoof_setRestrictionLevel, and for returned identifier restriction...
Definition: uspoof.h:595
@ USPOOF_SINGLE_SCRIPT_RESTRICTIVE
The string classifies as ASCII-Only, or all characters in the string are in the identifier profile an...
Definition: uspoof.h:609
@ USPOOF_MODERATELY_RESTRICTIVE
The string classifies as Highly Restrictive, or all characters in the string are in the identifier pr...
Definition: uspoof.h:631
@ USPOOF_ASCII
All characters in the string are in the identifier profile and all characters in the string are in th...
Definition: uspoof.h:602
@ USPOOF_UNDEFINED_RESTRICTIVE
An undefined restriction level.
Definition: uspoof.h:655
@ USPOOF_RESTRICTION_LEVEL_MASK
Mask for selecting the Restriction Level bits from the return value of uspoof_check.
Definition: uspoof.h:649
@ USPOOF_MINIMALLY_RESTRICTIVE
All characters in the string are in the identifier profile.
Definition: uspoof.h:637
@ USPOOF_HIGHLY_RESTRICTIVE
The string classifies as Single Script, or all characters in the string are in the identifier profile...
Definition: uspoof.h:623
@ USPOOF_UNRESTRICTIVE
Any valid identifiers, including characters outside of the Identifier Profile.
Definition: uspoof.h:643
U_CAPI int32_t U_EXPORT2 uspoof_getSkeletonUTF8(const USpoofChecker *sc, uint32_t type, const char *id, int32_t length, char *dest, int32_t destCapacity, UErrorCode *status)
Get the "skeleton" for an identifier.
U_CAPI int32_t U_EXPORT2 uspoof_getBidiSkeleton(const USpoofChecker *sc, UBiDiDirection direction, const UChar *id, int32_t length, UChar *dest, int32_t destCapacity, UErrorCode *status)
Get the "bidiSkeleton" for an identifier and a direction.
U_CAPI URestrictionLevel U_EXPORT2 uspoof_getRestrictionLevel(const USpoofChecker *sc)
Get the Restriction Level that will be tested if the checks include USPOOF_RESTRICTION_LEVEL.
U_CAPI void U_EXPORT2 uspoof_setAllowedLocales(USpoofChecker *sc, const char *localesList, UErrorCode *status)
Limit characters that are acceptable in identifiers being checked to those normally used with the lan...
U_CAPI URestrictionLevel U_EXPORT2 uspoof_getCheckResultRestrictionLevel(const USpoofCheckResult *checkResult, UErrorCode *status)
Gets the restriction level that the text meets, if the USPOOF_RESTRICTION_LEVEL check was enabled; ot...
U_CAPI void U_EXPORT2 uspoof_setChecks(USpoofChecker *sc, int32_t checks, UErrorCode *status)
Specify the bitmask of checks that will be performed by uspoof_check.
U_CAPI const icu::UnicodeSet *U_EXPORT2 uspoof_getAllowedUnicodeSet(const USpoofChecker *sc, UErrorCode *status)
Get a UnicodeSet for the characters permitted in an identifier.
U_CAPI int32_t U_EXPORT2 uspoof_getChecks(const USpoofChecker *sc, UErrorCode *status)
Get the set of checks that this Spoof Checker has been configured to perform.
U_CAPI void U_EXPORT2 uspoof_closeCheckResult(USpoofCheckResult *checkResult)
Close a USpoofCheckResult, freeing any memory that was being held by its implementation.
U_CAPI int32_t U_EXPORT2 uspoof_check2UTF8(const USpoofChecker *sc, const char *id, int32_t length, USpoofCheckResult *checkResult, UErrorCode *status)
Check the specified string for possible security issues.
U_CAPI int32_t U_EXPORT2 uspoof_areConfusableUnicodeString(const USpoofChecker *sc, const icu::UnicodeString &s1, const icu::UnicodeString &s2, UErrorCode *status)
A version of uspoof_areConfusable accepting UnicodeStrings.
U_I18N_API icu::UnicodeString &U_EXPORT2 uspoof_getSkeletonUnicodeString(const USpoofChecker *sc, uint32_t type, const icu::UnicodeString &id, icu::UnicodeString &dest, UErrorCode *status)
Get the "skeleton" for an identifier.
U_CAPI int32_t U_EXPORT2 uspoof_areConfusableUTF8(const USpoofChecker *sc, const char *id1, int32_t length1, const char *id2, int32_t length2, UErrorCode *status)
A version of uspoof_areConfusable accepting strings in UTF-8 format.
U_CAPI USpoofCheckResult *U_EXPORT2 uspoof_openCheckResult(UErrorCode *status)
Create a USpoofCheckResult, used by the uspoof_check2 class of functions to return information about ...
U_CAPI const USet *U_EXPORT2 uspoof_getRecommendedSet(UErrorCode *status)
Get the set of characters from Recommended Scripts for Inclusion in Identifiers, as defined in http:/...
U_CAPI const USet *U_EXPORT2 uspoof_getInclusionSet(UErrorCode *status)
Get the set of Candidate Characters for Inclusion in Identifiers, as defined in http://unicode....
U_CAPI int32_t U_EXPORT2 uspoof_serialize(USpoofChecker *sc, void *data, int32_t capacity, UErrorCode *status)
Serialize the data for a spoof detector into a chunk of memory.
struct USpoofCheckResult USpoofCheckResult
Definition: uspoof.h:434
U_I18N_API icu::UnicodeString &U_EXPORT2 uspoof_getBidiSkeletonUnicodeString(const USpoofChecker *sc, UBiDiDirection direction, const icu::UnicodeString &id, icu::UnicodeString &dest, UErrorCode *status)
Get the "bidiSkeleton" for an identifier and a direction.
U_CAPI USpoofChecker * uspoof_openFromSerialized(const void *data, int32_t length, int32_t *pActualLength, UErrorCode *pErrorCode)
Open a Spoof checker from its serialized form, stored in 32-bit-aligned memory.
U_CAPI const char *U_EXPORT2 uspoof_getAllowedLocales(USpoofChecker *sc, UErrorCode *status)
Get a list of locales for the scripts that are acceptable in strings to be checked.
U_CAPI uint32_t U_EXPORT2 uspoof_areBidiConfusableUnicodeString(const USpoofChecker *sc, UBiDiDirection direction, const icu::UnicodeString &s1, const icu::UnicodeString &s2, UErrorCode *status)
A version of uspoof_areBidiConfusable accepting UnicodeStrings.
U_CAPI int32_t U_EXPORT2 uspoof_check2(const USpoofChecker *sc, const UChar *id, int32_t length, USpoofCheckResult *checkResult, UErrorCode *status)
Check the specified string for possible security issues.
U_CAPI int32_t U_EXPORT2 uspoof_checkUTF8(const USpoofChecker *sc, const char *id, int32_t length, int32_t *position, UErrorCode *status)
Check the specified string for possible security issues.
struct USpoofChecker USpoofChecker
typedef for C of USpoofChecker
Definition: uspoof.h:427
U_CAPI uint32_t U_EXPORT2 uspoof_areBidiConfusableUTF8(const USpoofChecker *sc, UBiDiDirection direction, const char *id1, int32_t length1, const char *id2, int32_t length2, UErrorCode *status)
A version of uspoof_areBidiConfusable accepting strings in UTF-8 format.
U_CAPI const icu::UnicodeSet *U_EXPORT2 uspoof_getRecommendedUnicodeSet(UErrorCode *status)
Get the set of characters from Recommended Scripts for Inclusion in Identifiers, as defined in http:/...
U_CAPI USpoofChecker * uspoof_open(UErrorCode *status)
Create a Unicode Spoof Checker, configured to perform all checks except for USPOOF_LOCALE_LIMIT and U...
U_CAPI void uspoof_close(USpoofChecker *sc)
Close a Spoof Checker, freeing any memory that was being held by its implementation.
U_CAPI int32_t U_EXPORT2 uspoof_check(const USpoofChecker *sc, const UChar *id, int32_t length, int32_t *position, UErrorCode *status)
Check the specified string for possible security issues.
U_CAPI USpoofChecker * uspoof_openFromSource(const char *confusables, int32_t confusablesLen, const char *confusablesWholeScript, int32_t confusablesWholeScriptLen, int32_t *errType, UParseError *pe, UErrorCode *status)
Open a Spoof Checker from the source form of the spoof data.
U_CAPI int32_t U_EXPORT2 uspoof_areConfusable(const USpoofChecker *sc, const UChar *id1, int32_t length1, const UChar *id2, int32_t length2, UErrorCode *status)
Check whether two specified strings are visually confusable.
U_CAPI int32_t U_EXPORT2 uspoof_check2UnicodeString(const USpoofChecker *sc, const icu::UnicodeString &id, USpoofCheckResult *checkResult, UErrorCode *status)
Check the specified string for possible security issues.
Basic definitions for ICU, for both C and C++ APIs.
UErrorCode
Standard ICU4C error code type, a substitute for exceptions.
Definition: utypes.h:430
#define U_I18N_API
Set to export library symbols from inside the i18n library, and to import them from outside.
Definition: utypes.h:316