ICU 76.1 76.1
Loading...
Searching...
No Matches
normlzr.h
Go to the documentation of this file.
1// © 2016 and later: Unicode, Inc. and others.
2// License & terms of use: http://www.unicode.org/copyright.html
3/*
4 ********************************************************************
5 * COPYRIGHT:
6 * Copyright (c) 1996-2015, International Business Machines Corporation and
7 * others. All Rights Reserved.
8 ********************************************************************
9 */
10
11#ifndef NORMLZR_H
12#define NORMLZR_H
13
14#include "unicode/utypes.h"
15
16#if U_SHOW_CPLUSPLUS_API
17
23#if !UCONFIG_NO_NORMALIZATION
24
25#include "unicode/chariter.h"
26#include "unicode/normalizer2.h"
27#include "unicode/unistr.h"
28#include "unicode/unorm.h"
29#include "unicode/uobject.h"
30
31U_NAMESPACE_BEGIN
137public:
138#ifndef U_HIDE_DEPRECATED_API
144 enum {
145 DONE=0xffff
146 };
147
148 // Constructors
149
161
174
186#endif /* U_HIDE_DEPRECATED_API */
187
188#ifndef U_FORCE_HIDE_DEPRECATED_API
194 Normalizer(const Normalizer& copy);
195
200 virtual ~Normalizer();
201#endif // U_FORCE_HIDE_DEPRECATED_API
202
203 //-------------------------------------------------------------------------
204 // Static utility methods
205 //-------------------------------------------------------------------------
206
207#ifndef U_HIDE_DEPRECATED_API
222 static void U_EXPORT2 normalize(const UnicodeString& source,
226
244 static void U_EXPORT2 compose(const UnicodeString& source,
245 UBool compat, int32_t options,
248
266 static void U_EXPORT2 decompose(const UnicodeString& source,
267 UBool compat, int32_t options,
270
291 static inline UNormalizationCheckResult
292 quickCheck(const UnicodeString &source, UNormalizationMode mode, UErrorCode &status);
293
309
330 static inline UBool
331 isNormalized(const UnicodeString &src, UNormalizationMode mode, UErrorCode &errorCode);
332
348 static UBool
350
380 static UnicodeString &
384 UErrorCode &errorCode);
385#endif /* U_HIDE_DEPRECATED_API */
386
451 static inline int32_t
452 compare(const UnicodeString &s1, const UnicodeString &s2,
453 uint32_t options,
454 UErrorCode &errorCode);
455
456#ifndef U_HIDE_DEPRECATED_API
457 //-------------------------------------------------------------------------
458 // Iteration API
459 //-------------------------------------------------------------------------
460
470
480
490
506
522
533
539 void reset();
540
556
566
578
587 bool operator==(const Normalizer& that) const;
588
597 inline bool operator!=(const Normalizer& that) const;
598
606
614
615 //-------------------------------------------------------------------------
616 // Property access methods
617 //-------------------------------------------------------------------------
618
635
647
665 UBool value);
666
678
689
700
711 int32_t length,
720
727#endif /* U_HIDE_DEPRECATED_API */
728
729#ifndef U_FORCE_HIDE_DEPRECATED_API
735 virtual UClassID getDynamicClassID() const override;
736#endif // U_FORCE_HIDE_DEPRECATED_API
737
738private:
739 //-------------------------------------------------------------------------
740 // Private functions
741 //-------------------------------------------------------------------------
742
743 Normalizer() = delete; // default constructor not implemented
744 Normalizer &operator=(const Normalizer &that) = delete; // assignment operator not implemented
745
746 // Private utility methods for iteration
747 // For documentation, see the source code
748 UBool nextNormalize();
749 UBool previousNormalize();
750
751 void init();
752 void clearBuffer();
753
754 //-------------------------------------------------------------------------
755 // Private data
756 //-------------------------------------------------------------------------
757
758 FilteredNormalizer2*fFilteredNorm2; // owned if not nullptr
759 const Normalizer2 *fNorm2; // not owned; may be equal to fFilteredNorm2
760 UNormalizationMode fUMode; // deprecated
761 int32_t fOptions;
762
763 // The input text and our position in it
764 CharacterIterator *text;
765
766 // The normalization buffer is the result of normalization
767 // of the source in [currentIndex..nextIndex[ .
768 int32_t currentIndex, nextIndex;
769
770 // A buffer for holding intermediate results
771 UnicodeString buffer;
772 int32_t bufferPos;
773};
774
775//-------------------------------------------------------------------------
776// Inline implementations
777//-------------------------------------------------------------------------
778
779#ifndef U_HIDE_DEPRECATED_API
780inline bool
781Normalizer::operator!= (const Normalizer& other) const
782{ return ! operator==(other); }
783
785Normalizer::quickCheck(const UnicodeString& source,
788 return quickCheck(source, mode, 0, status);
789}
790
791inline UBool
792Normalizer::isNormalized(const UnicodeString& source,
795 return isNormalized(source, mode, 0, status);
796}
797#endif /* U_HIDE_DEPRECATED_API */
798
799inline int32_t
800Normalizer::compare(const UnicodeString &s1, const UnicodeString &s2,
801 uint32_t options,
802 UErrorCode &errorCode) {
803 // all argument checking is done in unorm_compare
804 return unorm_compare(toUCharPtr(s1.getBuffer()), s1.length(),
805 toUCharPtr(s2.getBuffer()), s2.length(),
806 options,
807 &errorCode);
808}
809
811
812#endif /* #if !UCONFIG_NO_NORMALIZATION */
813
814#endif // NORMLZR_H
815
816#endif /* U_SHOW_CPLUSPLUS_API */
C++ API: Character Iterator.
Abstract class that defines an API for iteration on text objects.
Definition chariter.h:361
const char16_t * wrapper with implicit conversion from distinct but bit-compatible pointer types.
Definition char16ptr.h:150
Normalization filtered by a UnicodeSet.
"Smart pointer" base class; do not use directly: use LocalPointer etc.
Unicode normalization functionality for standard Unicode normalization or for using custom mapping ta...
Definition normalizer2.h:85
Old Unicode normalization API.
Definition normlzr.h:136
bool operator==(const Normalizer &that) const
Returns true when both iterators refer to the same character in the same input text.
UChar32 next()
Return the next character in the normalized text.
static void decompose(const UnicodeString &source, UBool compat, int32_t options, UnicodeString &result, UErrorCode &status)
Static method to decompose a UnicodeString.
static UClassID getStaticClassID()
ICU "poor man's RTTI", returns a UClassID for this class.
virtual UClassID getDynamicClassID() const override
ICU "poor man's RTTI", returns a UClassID for the actual class.
void setMode(UNormalizationMode newMode)
Set the normalization mode for this object.
virtual ~Normalizer()
Destructor.
void setText(const UnicodeString &newText, UErrorCode &status)
Set the input text over which this Normalizer will iterate.
int32_t startIndex() const
Retrieve the index of the start of the input text.
int32_t getIndex() const
Retrieve the current iteration position in the input text that is being normalized.
UChar32 previous()
Return the previous character in the normalized text and decrement.
UNormalizationMode getUMode() const
Return the normalization mode for this object.
UBool getOption(int32_t option) const
Determine whether an option is turned on or off.
void setText(ConstChar16Ptr newText, int32_t length, UErrorCode &status)
Set the input text over which this Normalizer will iterate.
static UBool isNormalized(const UnicodeString &src, UNormalizationMode mode, int32_t options, UErrorCode &errorCode)
Test if a string is in a given normalization form; same as the other version of isNormalized but take...
UChar32 last()
Return the last character in the normalized text.
Normalizer(const Normalizer &copy)
Copy constructor.
Normalizer(const UnicodeString &str, UNormalizationMode mode)
Creates a new Normalizer object for iterating over the normalized form of a given string.
void setText(const CharacterIterator &newText, UErrorCode &status)
Set the input text over which this Normalizer will iterate.
static UNormalizationCheckResult quickCheck(const UnicodeString &source, UNormalizationMode mode, int32_t options, UErrorCode &status)
Performing quick check on a string; same as the other version of quickCheck but takes an extra option...
Normalizer(ConstChar16Ptr str, int32_t length, UNormalizationMode mode)
Creates a new Normalizer object for iterating over the normalized form of a given string.
void setOption(int32_t option, UBool value)
Set options that affect this Normalizer's operation.
UChar32 first()
Return the first character in the normalized text.
static void normalize(const UnicodeString &source, UNormalizationMode mode, int32_t options, UnicodeString &result, UErrorCode &status)
Normalizes a UnicodeString according to the specified normalization mode.
static UnicodeString & concatenate(const UnicodeString &left, const UnicodeString &right, UnicodeString &result, UNormalizationMode mode, int32_t options, UErrorCode &errorCode)
Concatenate normalized strings, making sure that the result is normalized as well.
Normalizer(const CharacterIterator &iter, UNormalizationMode mode)
Creates a new Normalizer object for iterating over the normalized form of the given text.
void reset()
Reset the index to the beginning of the text.
int32_t endIndex() const
Retrieve the index of the end of the input text.
int32_t hashCode() const
Generates a hash code for this iterator.
void getText(UnicodeString &result)
Copies the input text into the UnicodeString argument.
void setIndexOnly(int32_t index)
Set the iteration position in the input text that is being normalized, without any immediate normaliz...
static void compose(const UnicodeString &source, UBool compat, int32_t options, UnicodeString &result, UErrorCode &status)
Compose a UnicodeString.
Normalizer * clone() const
Returns a pointer to a new Normalizer that is a clone of this one.
UChar32 current()
Return the current character in the normalized text.
UObject is the common ICU "boilerplate" class.
Definition uobject.h:223
UnicodeString is a string class that stores Unicode characters directly and provides similar function...
Definition unistr.h:296
U_EXPORT UBool operator==(const StringPiece &x, const StringPiece &y)
Global operator == for StringPiece.
bool operator!=(const StringPiece &x, const StringPiece &y)
Global operator != for StringPiece.
const UChar * toUCharPtr(const char16_t *p)
Converts from const char16_t * to const UChar *.
Definition char16ptr.h:261
C++ API: New API for Unicode Normalization.
int32_t UChar32
Define UChar32 as a type for single Unicode code points.
Definition umachine.h:427
int8_t UBool
The ICU boolean type, a signed-byte integer.
Definition umachine.h:247
C++ API: Unicode String.
UNormalizationCheckResult
Result values for normalization quick check functions.
Definition unorm2.h:97
U_CAPI int32_t unorm_compare(const UChar *s1, int32_t length1, const UChar *s2, int32_t length2, uint32_t options, UErrorCode *pErrorCode)
Compares two strings for canonical equivalence.
C API: Unicode Normalization.
UNormalizationMode
Constants for normalization modes.
Definition unorm.h:140
C++ API: Common ICU base class UObject.
void * UClassID
UClassID is used to identify classes without using the compiler's RTTI.
Definition uobject.h:96
Basic definitions for ICU, for both C and C++ APIs.
UErrorCode
Standard ICU4C error code type, a substitute for exceptions.
Definition utypes.h:430
#define U_COMMON_API
Set to export library symbols from inside the common library, and to import them from outside.
Definition utypes.h:315