ICU 77.1  77.1
All Data Structures Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
normlzr.h
Go to the documentation of this file.
1 // © 2016 and later: Unicode, Inc. and others.
2 // License & terms of use: http://www.unicode.org/copyright.html
3 /*
4  ********************************************************************
5  * COPYRIGHT:
6  * Copyright (c) 1996-2015, International Business Machines Corporation and
7  * others. All Rights Reserved.
8  ********************************************************************
9  */
10 
11 #ifndef NORMLZR_H
12 #define NORMLZR_H
13 
14 #include "unicode/utypes.h"
15 
16 #if U_SHOW_CPLUSPLUS_API
17 
23 #if !UCONFIG_NO_NORMALIZATION
24 
25 #include "unicode/chariter.h"
26 #include "unicode/normalizer2.h"
27 #include "unicode/unistr.h"
28 #include "unicode/unorm.h"
29 #include "unicode/uobject.h"
30 
31 U_NAMESPACE_BEGIN
137 public:
138 #ifndef U_HIDE_DEPRECATED_API
144  enum {
145  DONE=0xffff
146  };
147 
148  // Constructors
149 
161 
173  Normalizer(ConstChar16Ptr str, int32_t length, UNormalizationMode mode);
174 
186 #endif /* U_HIDE_DEPRECATED_API */
187 
188 #ifndef U_FORCE_HIDE_DEPRECATED_API
194  Normalizer(const Normalizer& copy);
195 
200  virtual ~Normalizer();
201 #endif // U_FORCE_HIDE_DEPRECATED_API
202 
203  //-------------------------------------------------------------------------
204  // Static utility methods
205  //-------------------------------------------------------------------------
206 
207 #ifndef U_HIDE_DEPRECATED_API
222  static void U_EXPORT2 normalize(const UnicodeString& source,
223  UNormalizationMode mode, int32_t options,
224  UnicodeString& result,
225  UErrorCode &status);
226 
244  static void U_EXPORT2 compose(const UnicodeString& source,
245  UBool compat, int32_t options,
246  UnicodeString& result,
247  UErrorCode &status);
248 
266  static void U_EXPORT2 decompose(const UnicodeString& source,
267  UBool compat, int32_t options,
268  UnicodeString& result,
269  UErrorCode &status);
270 
291  static inline UNormalizationCheckResult
292  quickCheck(const UnicodeString &source, UNormalizationMode mode, UErrorCode &status);
293 
308  quickCheck(const UnicodeString &source, UNormalizationMode mode, int32_t options, UErrorCode &status);
309 
330  static inline UBool
331  isNormalized(const UnicodeString &src, UNormalizationMode mode, UErrorCode &errorCode);
332 
348  static UBool
349  isNormalized(const UnicodeString &src, UNormalizationMode mode, int32_t options, UErrorCode &errorCode);
350 
380  static UnicodeString &
381  U_EXPORT2 concatenate(const UnicodeString &left, const UnicodeString &right,
382  UnicodeString &result,
383  UNormalizationMode mode, int32_t options,
384  UErrorCode &errorCode);
385 #endif /* U_HIDE_DEPRECATED_API */
386 
451  static inline int32_t
452  compare(const UnicodeString &s1, const UnicodeString &s2,
453  uint32_t options,
454  UErrorCode &errorCode);
455 
456 #ifndef U_HIDE_DEPRECATED_API
457  //-------------------------------------------------------------------------
458  // Iteration API
459  //-------------------------------------------------------------------------
460 
470 
480 
490 
506 
522 
532  void setIndexOnly(int32_t index);
533 
539  void reset();
540 
555  int32_t getIndex() const;
556 
565  int32_t startIndex() const;
566 
577  int32_t endIndex() const;
578 
587  bool operator==(const Normalizer& that) const;
588 
597  inline bool operator!=(const Normalizer& that) const;
598 
605  Normalizer* clone() const;
606 
613  int32_t hashCode() const;
614 
615  //-------------------------------------------------------------------------
616  // Property access methods
617  //-------------------------------------------------------------------------
618 
635 
647 
664  void setOption(int32_t option,
665  UBool value);
666 
677  UBool getOption(int32_t option) const;
678 
687  void setText(const UnicodeString& newText,
688  UErrorCode &status);
689 
698  void setText(const CharacterIterator& newText,
699  UErrorCode &status);
700 
710  void setText(ConstChar16Ptr newText,
711  int32_t length,
712  UErrorCode &status);
719  void getText(UnicodeString& result);
720 
726  static UClassID U_EXPORT2 getStaticClassID();
727 #endif /* U_HIDE_DEPRECATED_API */
728 
729 #ifndef U_FORCE_HIDE_DEPRECATED_API
735  virtual UClassID getDynamicClassID() const override;
736 #endif // U_FORCE_HIDE_DEPRECATED_API
737 
738 private:
739  //-------------------------------------------------------------------------
740  // Private functions
741  //-------------------------------------------------------------------------
742 
743  Normalizer() = delete; // default constructor not implemented
744  Normalizer &operator=(const Normalizer &that) = delete; // assignment operator not implemented
745 
746  // Private utility methods for iteration
747  // For documentation, see the source code
748  UBool nextNormalize();
749  UBool previousNormalize();
750 
751  void init();
752  void clearBuffer();
753 
754  //-------------------------------------------------------------------------
755  // Private data
756  //-------------------------------------------------------------------------
757 
758  FilteredNormalizer2*fFilteredNorm2; // owned if not nullptr
759  const Normalizer2 *fNorm2; // not owned; may be equal to fFilteredNorm2
760  UNormalizationMode fUMode; // deprecated
761  int32_t fOptions;
762 
763  // The input text and our position in it
764  CharacterIterator *text;
765 
766  // The normalization buffer is the result of normalization
767  // of the source in [currentIndex..nextIndex[ .
768  int32_t currentIndex, nextIndex;
769 
770  // A buffer for holding intermediate results
771  UnicodeString buffer;
772  int32_t bufferPos;
773 };
774 
775 //-------------------------------------------------------------------------
776 // Inline implementations
777 //-------------------------------------------------------------------------
778 
779 #ifndef U_HIDE_DEPRECATED_API
780 inline bool
782 { return ! operator==(other); }
783 
785 Normalizer::quickCheck(const UnicodeString& source,
786  UNormalizationMode mode,
787  UErrorCode &status) {
788  return quickCheck(source, mode, 0, status);
789 }
790 
791 inline UBool
792 Normalizer::isNormalized(const UnicodeString& source,
793  UNormalizationMode mode,
794  UErrorCode &status) {
795  return isNormalized(source, mode, 0, status);
796 }
797 #endif /* U_HIDE_DEPRECATED_API */
798 
799 inline int32_t
800 Normalizer::compare(const UnicodeString &s1, const UnicodeString &s2,
801  uint32_t options,
802  UErrorCode &errorCode) {
803  // all argument checking is done in unorm_compare
804  return unorm_compare(toUCharPtr(s1.getBuffer()), s1.length(),
805  toUCharPtr(s2.getBuffer()), s2.length(),
806  options,
807  &errorCode);
808 }
809 
810 U_NAMESPACE_END
811 
812 #endif /* #if !UCONFIG_NO_NORMALIZATION */
813 
814 #endif // NORMLZR_H
815 
816 #endif /* U_SHOW_CPLUSPLUS_API */
C++ API: Character Iterator.
Abstract class that defines an API for iteration on text objects.
Definition: chariter.h:361
const char16_t * wrapper with implicit conversion from distinct but bit-compatible pointer types.
Definition: char16ptr.h:156
Normalization filtered by a UnicodeSet.
Definition: normalizer2.h:519
Unicode normalization functionality for standard Unicode normalization or for using custom mapping ta...
Definition: normalizer2.h:85
Old Unicode normalization API.
Definition: normlzr.h:136
bool operator==(const Normalizer &that) const
Returns true when both iterators refer to the same character in the same input text.
UChar32 next()
Return the next character in the normalized text.
static void decompose(const UnicodeString &source, UBool compat, int32_t options, UnicodeString &result, UErrorCode &status)
Static method to decompose a UnicodeString.
static UClassID getStaticClassID()
ICU "poor man's RTTI", returns a UClassID for this class.
virtual UClassID getDynamicClassID() const override
ICU "poor man's RTTI", returns a UClassID for the actual class.
void setMode(UNormalizationMode newMode)
Set the normalization mode for this object.
virtual ~Normalizer()
Destructor.
void setText(const UnicodeString &newText, UErrorCode &status)
Set the input text over which this Normalizer will iterate.
int32_t startIndex() const
Retrieve the index of the start of the input text.
int32_t getIndex() const
Retrieve the current iteration position in the input text that is being normalized.
UChar32 previous()
Return the previous character in the normalized text and decrement.
UNormalizationMode getUMode() const
Return the normalization mode for this object.
UBool getOption(int32_t option) const
Determine whether an option is turned on or off.
void setText(ConstChar16Ptr newText, int32_t length, UErrorCode &status)
Set the input text over which this Normalizer will iterate.
static UBool isNormalized(const UnicodeString &src, UNormalizationMode mode, int32_t options, UErrorCode &errorCode)
Test if a string is in a given normalization form; same as the other version of isNormalized but take...
UChar32 last()
Return the last character in the normalized text.
Normalizer(const Normalizer &copy)
Copy constructor.
Normalizer(const UnicodeString &str, UNormalizationMode mode)
Creates a new Normalizer object for iterating over the normalized form of a given string.
void setText(const CharacterIterator &newText, UErrorCode &status)
Set the input text over which this Normalizer will iterate.
static UNormalizationCheckResult quickCheck(const UnicodeString &source, UNormalizationMode mode, int32_t options, UErrorCode &status)
Performing quick check on a string; same as the other version of quickCheck but takes an extra option...
Normalizer(ConstChar16Ptr str, int32_t length, UNormalizationMode mode)
Creates a new Normalizer object for iterating over the normalized form of a given string.
void setOption(int32_t option, UBool value)
Set options that affect this Normalizer's operation.
UChar32 first()
Return the first character in the normalized text.
static void normalize(const UnicodeString &source, UNormalizationMode mode, int32_t options, UnicodeString &result, UErrorCode &status)
Normalizes a UnicodeString according to the specified normalization mode.
Normalizer(const CharacterIterator &iter, UNormalizationMode mode)
Creates a new Normalizer object for iterating over the normalized form of the given text.
void reset()
Reset the index to the beginning of the text.
Normalizer * clone() const
Returns a pointer to a new Normalizer that is a clone of this one.
int32_t endIndex() const
Retrieve the index of the end of the input text.
int32_t hashCode() const
Generates a hash code for this iterator.
static UnicodeString & concatenate(const UnicodeString &left, const UnicodeString &right, UnicodeString &result, UNormalizationMode mode, int32_t options, UErrorCode &errorCode)
Concatenate normalized strings, making sure that the result is normalized as well.
void getText(UnicodeString &result)
Copies the input text into the UnicodeString argument.
void setIndexOnly(int32_t index)
Set the iteration position in the input text that is being normalized, without any immediate normaliz...
static void compose(const UnicodeString &source, UBool compat, int32_t options, UnicodeString &result, UErrorCode &status)
Compose a UnicodeString.
UChar32 current()
Return the current character in the normalized text.
UObject is the common ICU "boilerplate" class.
Definition: uobject.h:223
UnicodeString is a string class that stores Unicode characters directly and provides similar function...
Definition: unistr.h:296
int32_t length() const
Return the length of the UnicodeString object.
Definition: unistr.h:4214
char16_t * getBuffer(int32_t minCapacity)
Get a read/write pointer to the internal buffer.
U_EXPORT UBool operator==(const StringPiece &x, const StringPiece &y)
Global operator == for StringPiece.
bool operator!=(const StringPiece &x, const StringPiece &y)
Global operator != for StringPiece.
Definition: stringpiece.h:346
C++ API: New API for Unicode Normalization.
int32_t UChar32
Define UChar32 as a type for single Unicode code points.
Definition: umachine.h:427
int8_t UBool
The ICU boolean type, a signed-byte integer.
Definition: umachine.h:247
C++ API: Unicode String.
UNormalizationCheckResult
Result values for normalization quick check functions.
Definition: unorm2.h:97
U_CAPI int32_t unorm_compare(const UChar *s1, int32_t length1, const UChar *s2, int32_t length2, uint32_t options, UErrorCode *pErrorCode)
Compares two strings for canonical equivalence.
C API: Unicode Normalization.
UNormalizationMode
Constants for normalization modes.
Definition: unorm.h:140
C++ API: Common ICU base class UObject.
void * UClassID
UClassID is used to identify classes without using the compiler's RTTI.
Definition: uobject.h:96
Basic definitions for ICU, for both C and C++ APIs.
UErrorCode
Standard ICU4C error code type, a substitute for exceptions.
Definition: utypes.h:430
#define U_COMMON_API
Set to export library symbols from inside the common library, and to import them from outside.
Definition: utypes.h:315