icu_properties/
props.rs

Help
1// This file is part of ICU4X. For terms of use, please see the file
2// called LICENSE at the top level of the ICU4X source tree
3// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
4
5//! This module defines all available properties.
6//!
7//! Properties may be empty marker types and implement [`BinaryProperty`], or enumerations[^1]
8//! and implement [`EnumeratedProperty`].
9//!
10//! [`BinaryProperty`]s are queried through a [`CodePointSetData`](crate::CodePointSetData),
11//! while [`EnumeratedProperty`]s are queried through [`CodePointMapData`](crate::CodePointMapData).
12//!
13//! In addition, some [`EnumeratedProperty`]s also implement [`ParseableEnumeratedProperty`] or
14//! [`NamedEnumeratedProperty`]. For these properties, [`PropertyParser`](crate::PropertyParser),
15//! [`PropertyNamesLong`](crate::PropertyNamesLong), and [`PropertyNamesShort`](crate::PropertyNamesShort)
16//! can be constructed.
17//!
18//! [^1]: either Rust `enum`s, or Rust `struct`s with associated constants (open enums)
19
20pub use crate::names::{NamedEnumeratedProperty, ParseableEnumeratedProperty};
21
22pub use crate::bidi::{BidiMirroringGlyph, BidiPairedBracketType};
23
24/// See [`test_enumerated_property_completeness`] for usage.
25/// Example input:
26/// ```ignore
27/// impl EastAsianWidth {
28///     pub const Neutral: EastAsianWidth = EastAsianWidth(0);
29///     pub const Ambiguous: EastAsianWidth = EastAsianWidth(1);
30///     ...
31/// }
32/// ```
33/// Produces `const ALL_VALUES = &[("Neutral", 0u16), ...];` by
34/// explicitly casting first field of the struct to u16.
35macro_rules! create_const_array {
36    (
37        $ ( #[$meta:meta] )*
38        impl $enum_ty:ident {
39            $( $(#[$const_meta:meta])* $v:vis const $i:ident: $t:ty = $e:expr; )*
40        }
41    ) => {
42        $( #[$meta] )*
43        impl $enum_ty {
44            $(
45                $(#[$const_meta])*
46                $v const $i: $t = $e;
47            )*
48
49            /// All possible values of this enum in the Unicode version
50            /// from this ICU4X release.
51            pub const ALL_VALUES: &'static [$enum_ty] = &[
52                $($enum_ty::$i),*
53            ];
54        }
55
56
57        impl From<$enum_ty> for u16  {
58            fn from(other: $enum_ty) -> Self {
59                other.0 as u16
60            }
61        }
62    }
63}
64
65pub use crate::code_point_map::EnumeratedProperty;
66
67macro_rules! make_enumerated_property {
68    (
69        name: $name:literal;
70        short_name: $short_name:literal;
71        ident: $value_ty:path;
72        data_marker: $data_marker:ty;
73        singleton: $singleton:ident;
74        $(ule_ty: $ule_ty:ty;)?
75    ) => {
76        impl crate::private::Sealed for $value_ty {}
77
78        impl EnumeratedProperty for $value_ty {
79            type DataMarker = $data_marker;
80            #[cfg(feature = "compiled_data")]
81            const SINGLETON: &'static crate::provider::PropertyCodePointMap<'static, Self> =
82                crate::provider::Baked::$singleton;
83            const NAME: &'static [u8] = $name.as_bytes();
84            const SHORT_NAME: &'static [u8] = $short_name.as_bytes();
85        }
86
87        $(
88            impl zerovec::ule::AsULE for $value_ty {
89                type ULE = $ule_ty;
90
91                fn to_unaligned(self) -> Self::ULE {
92                    self.0.to_unaligned()
93                }
94                fn from_unaligned(unaligned: Self::ULE) -> Self {
95                    Self(zerovec::ule::AsULE::from_unaligned(unaligned))
96                }
97            }
98        )?
99    };
100}
101
102/// Enumerated property Bidi_Class
103///
104/// These are the categories required by the Unicode Bidirectional Algorithm.
105/// For the property values, see [Bidirectional Class Values](https://unicode.org/reports/tr44/#Bidi_Class_Values).
106/// For more information, see [Unicode Standard Annex #9](https://unicode.org/reports/tr41/tr41-28.html#UAX9).
107///
108/// # Example
109///
110/// ```
111/// use icu::properties::{CodePointMapData, props::BidiClass};
112///
113/// assert_eq!(CodePointMapData::<BidiClass>::new().get('y'), BidiClass::LeftToRight);  // U+0079
114/// assert_eq!(CodePointMapData::<BidiClass>::new().get('ع'), BidiClass::ArabicLetter);  // U+0639
115/// ```
116#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
117#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
118#[cfg_attr(feature = "datagen", derive(databake::Bake))]
119#[cfg_attr(feature = "datagen", databake(path = icu_properties::props))]
120#[allow(clippy::exhaustive_structs)] // newtype
121#[repr(transparent)]
122pub struct BidiClass(pub(crate) u8);
123
124impl BidiClass {
125    /// Returns an ICU4C `UBidiClass` value.
126    pub const fn to_icu4c_value(self) -> u8 {
127        self.0
128    }
129    /// Constructor from an ICU4C `UBidiClass` value.
130    pub const fn from_icu4c_value(value: u8) -> Self {
131        Self(value)
132    }
133}
134
135create_const_array! {
136#[allow(non_upper_case_globals)]
137impl BidiClass {
138    /// (`L`) any strong left-to-right character
139    pub const LeftToRight: BidiClass = BidiClass(0);
140    /// (`R`) any strong right-to-left (non-Arabic-type) character
141    pub const RightToLeft: BidiClass = BidiClass(1);
142    /// (`EN`) any ASCII digit or Eastern Arabic-Indic digit
143    pub const EuropeanNumber: BidiClass = BidiClass(2);
144    /// (`ES`) plus and minus signs
145    pub const EuropeanSeparator: BidiClass = BidiClass(3);
146    /// (`ET`) a terminator in a numeric format context, includes currency signs
147    pub const EuropeanTerminator: BidiClass = BidiClass(4);
148    /// (`AN`) any Arabic-Indic digit
149    pub const ArabicNumber: BidiClass = BidiClass(5);
150    /// (`CS`) commas, colons, and slashes
151    pub const CommonSeparator: BidiClass = BidiClass(6);
152    /// (`B`) various newline characters
153    pub const ParagraphSeparator: BidiClass = BidiClass(7);
154    /// (`S`) various segment-related control codes
155    pub const SegmentSeparator: BidiClass = BidiClass(8);
156    /// (`WS`) spaces
157    pub const WhiteSpace: BidiClass = BidiClass(9);
158    /// (`ON`) most other symbols and punctuation marks
159    pub const OtherNeutral: BidiClass = BidiClass(10);
160    /// (`LRE`) U+202A: the LR embedding control
161    pub const LeftToRightEmbedding: BidiClass = BidiClass(11);
162    /// (`LRO`) U+202D: the LR override control
163    pub const LeftToRightOverride: BidiClass = BidiClass(12);
164    /// (`AL`) any strong right-to-left (Arabic-type) character
165    pub const ArabicLetter: BidiClass = BidiClass(13);
166    /// (`RLE`) U+202B: the RL embedding control
167    pub const RightToLeftEmbedding: BidiClass = BidiClass(14);
168    /// (`RLO`) U+202E: the RL override control
169    pub const RightToLeftOverride: BidiClass = BidiClass(15);
170    /// (`PDF`) U+202C: terminates an embedding or override control
171    pub const PopDirectionalFormat: BidiClass = BidiClass(16);
172    /// (`NSM`) any nonspacing mark
173    pub const NonspacingMark: BidiClass = BidiClass(17);
174    /// (`BN`) most format characters, control codes, or noncharacters
175    pub const BoundaryNeutral: BidiClass = BidiClass(18);
176    /// (`FSI`) U+2068: the first strong isolate control
177    pub const FirstStrongIsolate: BidiClass = BidiClass(19);
178    /// (`LRI`) U+2066: the LR isolate control
179    pub const LeftToRightIsolate: BidiClass = BidiClass(20);
180    /// (`RLI`) U+2067: the RL isolate control
181    pub const RightToLeftIsolate: BidiClass = BidiClass(21);
182    /// (`PDI`) U+2069: terminates an isolate control
183    pub const PopDirectionalIsolate: BidiClass = BidiClass(22);
184}
185}
186
187make_enumerated_property! {
188    name: "Bidi_Class";
189    short_name: "bc";
190    ident: BidiClass;
191    data_marker: crate::provider::PropertyEnumBidiClassV1;
192    singleton: SINGLETON_PROPERTY_ENUM_BIDI_CLASS_V1;
193    ule_ty: u8;
194}
195
196// This exists to encapsulate GeneralCategoryULE so that it can exist in the provider module rather than props
197pub(crate) mod gc {
198    /// Enumerated property General_Category.
199    ///
200    /// General_Category specifies the most general classification of a code point, usually
201    /// determined based on the primary characteristic of the assigned character. For example, is the
202    /// character a letter, a mark, a number, punctuation, or a symbol, and if so, of what type?
203    ///
204    /// GeneralCategory only supports specific subcategories (eg `UppercaseLetter`).
205    /// It does not support grouped categories (eg `Letter`). For grouped categories, use [`GeneralCategoryGroup`](
206    /// crate::props::GeneralCategoryGroup).
207    ///
208    /// # Example
209    ///
210    /// ```
211    /// use icu::properties::{CodePointMapData, props::GeneralCategory};
212    ///
213    /// assert_eq!(CodePointMapData::<GeneralCategory>::new().get('木'), GeneralCategory::OtherLetter);  // U+6728
214    /// assert_eq!(CodePointMapData::<GeneralCategory>::new().get('🎃'), GeneralCategory::OtherSymbol);  // U+1F383 JACK-O-LANTERN
215    /// ```
216    #[derive(Copy, Clone, PartialEq, Eq, Debug, Ord, PartialOrd, Hash)]
217    #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
218    #[cfg_attr(feature = "datagen", derive(databake::Bake))]
219    #[cfg_attr(feature = "datagen", databake(path = icu_properties::props))]
220    #[allow(clippy::exhaustive_enums)] // this type is stable
221    #[zerovec::make_ule(GeneralCategoryULE)]
222    #[repr(u8)]
223    pub enum GeneralCategory {
224        /// (`Cn`) A reserved unassigned code point or a noncharacter
225        Unassigned = 0,
226
227        /// (`Lu`) An uppercase letter
228        UppercaseLetter = 1,
229        /// (`Ll`) A lowercase letter
230        LowercaseLetter = 2,
231        /// (`Lt`) A digraphic letter, with first part uppercase
232        TitlecaseLetter = 3,
233        /// (`Lm`) A modifier letter
234        ModifierLetter = 4,
235        /// (`Lo`) Other letters, including syllables and ideographs
236        OtherLetter = 5,
237
238        /// (`Mn`) A nonspacing combining mark (zero advance width)
239        NonspacingMark = 6,
240        /// (`Mc`) A spacing combining mark (positive advance width)
241        SpacingMark = 8,
242        /// (`Me`) An enclosing combining mark
243        EnclosingMark = 7,
244
245        /// (`Nd`) A decimal digit
246        DecimalNumber = 9,
247        /// (`Nl`) A letterlike numeric character
248        LetterNumber = 10,
249        /// (`No`) A numeric character of other type
250        OtherNumber = 11,
251
252        /// (`Zs`) A space character (of various non-zero widths)
253        SpaceSeparator = 12,
254        /// (`Zl`) U+2028 LINE SEPARATOR only
255        LineSeparator = 13,
256        /// (`Zp`) U+2029 PARAGRAPH SEPARATOR only
257        ParagraphSeparator = 14,
258
259        /// (`Cc`) A C0 or C1 control code
260        Control = 15,
261        /// (`Cf`) A format control character
262        Format = 16,
263        /// (`Co`) A private-use character
264        PrivateUse = 17,
265        /// (`Cs`) A surrogate code point
266        Surrogate = 18,
267
268        /// (`Pd`) A dash or hyphen punctuation mark
269        DashPunctuation = 19,
270        /// (`Ps`) An opening punctuation mark (of a pair)
271        OpenPunctuation = 20,
272        /// (`Pe`) A closing punctuation mark (of a pair)
273        ClosePunctuation = 21,
274        /// (`Pc`) A connecting punctuation mark, like a tie
275        ConnectorPunctuation = 22,
276        /// (`Pi`) An initial quotation mark
277        InitialPunctuation = 28,
278        /// (`Pf`) A final quotation mark
279        FinalPunctuation = 29,
280        /// (`Po`) A punctuation mark of other type
281        OtherPunctuation = 23,
282
283        /// (`Sm`) A symbol of mathematical use
284        MathSymbol = 24,
285        /// (`Sc`) A currency sign
286        CurrencySymbol = 25,
287        /// (`Sk`) A non-letterlike modifier symbol
288        ModifierSymbol = 26,
289        /// (`So`) A symbol of other type
290        OtherSymbol = 27,
291    }
292}
293
294pub use gc::GeneralCategory;
295
296impl GeneralCategory {
297    /// All possible values of this enum
298    pub const ALL_VALUES: &'static [GeneralCategory] = &[
299        GeneralCategory::Unassigned,
300        GeneralCategory::UppercaseLetter,
301        GeneralCategory::LowercaseLetter,
302        GeneralCategory::TitlecaseLetter,
303        GeneralCategory::ModifierLetter,
304        GeneralCategory::OtherLetter,
305        GeneralCategory::NonspacingMark,
306        GeneralCategory::SpacingMark,
307        GeneralCategory::EnclosingMark,
308        GeneralCategory::DecimalNumber,
309        GeneralCategory::LetterNumber,
310        GeneralCategory::OtherNumber,
311        GeneralCategory::SpaceSeparator,
312        GeneralCategory::LineSeparator,
313        GeneralCategory::ParagraphSeparator,
314        GeneralCategory::Control,
315        GeneralCategory::Format,
316        GeneralCategory::PrivateUse,
317        GeneralCategory::Surrogate,
318        GeneralCategory::DashPunctuation,
319        GeneralCategory::OpenPunctuation,
320        GeneralCategory::ClosePunctuation,
321        GeneralCategory::ConnectorPunctuation,
322        GeneralCategory::InitialPunctuation,
323        GeneralCategory::FinalPunctuation,
324        GeneralCategory::OtherPunctuation,
325        GeneralCategory::MathSymbol,
326        GeneralCategory::CurrencySymbol,
327        GeneralCategory::ModifierSymbol,
328        GeneralCategory::OtherSymbol,
329    ];
330}
331
332#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Debug, Hash, Default)]
333/// Error value for `impl TryFrom<u8> for GeneralCategory`.
334#[non_exhaustive]
335pub struct GeneralCategoryOutOfBoundsError;
336
337impl TryFrom<u8> for GeneralCategory {
338    type Error = GeneralCategoryOutOfBoundsError;
339    /// Construct this [`GeneralCategory`] from an integer, returning
340    /// an error if it is out of bounds
341    fn try_from(val: u8) -> Result<Self, GeneralCategoryOutOfBoundsError> {
342        GeneralCategory::new_from_u8(val).ok_or(GeneralCategoryOutOfBoundsError)
343    }
344}
345
346make_enumerated_property! {
347    name: "General_Category";
348    short_name: "gc";
349    ident: GeneralCategory;
350    data_marker: crate::provider::PropertyEnumGeneralCategoryV1;
351    singleton: SINGLETON_PROPERTY_ENUM_GENERAL_CATEGORY_V1;
352}
353
354/// Groupings of multiple General_Category property values.
355///
356/// Instances of `GeneralCategoryGroup` represent the defined multi-category
357/// values that are useful for users in certain contexts, such as regex. In
358/// other words, unlike [`GeneralCategory`], this supports groups of general
359/// categories: for example, `Letter` /// is the union of `UppercaseLetter`,
360/// `LowercaseLetter`, etc.
361///
362/// See <https://www.unicode.org/reports/tr44/> .
363///
364/// The discriminants correspond to the `U_GC_XX_MASK` constants in ICU4C.
365/// Unlike [`GeneralCategory`], this supports groups of general categories: for example, `Letter`
366/// is the union of `UppercaseLetter`, `LowercaseLetter`, etc.
367///
368/// See `UCharCategory` and `U_GET_GC_MASK` in ICU4C.
369#[derive(Copy, Clone, PartialEq, Debug, Eq)]
370#[allow(clippy::exhaustive_structs)] // newtype
371#[repr(transparent)]
372pub struct GeneralCategoryGroup(pub(crate) u32);
373
374impl crate::private::Sealed for GeneralCategoryGroup {}
375
376use GeneralCategory as GC;
377use GeneralCategoryGroup as GCG;
378
379#[allow(non_upper_case_globals)]
380impl GeneralCategoryGroup {
381    /// (`Lu`) An uppercase letter
382    pub const UppercaseLetter: GeneralCategoryGroup = GCG(1 << (GC::UppercaseLetter as u32));
383    /// (`Ll`) A lowercase letter
384    pub const LowercaseLetter: GeneralCategoryGroup = GCG(1 << (GC::LowercaseLetter as u32));
385    /// (`Lt`) A digraphic letter, with first part uppercase
386    pub const TitlecaseLetter: GeneralCategoryGroup = GCG(1 << (GC::TitlecaseLetter as u32));
387    /// (`Lm`) A modifier letter
388    pub const ModifierLetter: GeneralCategoryGroup = GCG(1 << (GC::ModifierLetter as u32));
389    /// (`Lo`) Other letters, including syllables and ideographs
390    pub const OtherLetter: GeneralCategoryGroup = GCG(1 << (GC::OtherLetter as u32));
391    /// (`LC`) The union of UppercaseLetter, LowercaseLetter, and TitlecaseLetter
392    pub const CasedLetter: GeneralCategoryGroup = GCG((1 << (GC::UppercaseLetter as u32))
393        | (1 << (GC::LowercaseLetter as u32))
394        | (1 << (GC::TitlecaseLetter as u32)));
395    /// (`L`) The union of all letter categories
396    pub const Letter: GeneralCategoryGroup = GCG((1 << (GC::UppercaseLetter as u32))
397        | (1 << (GC::LowercaseLetter as u32))
398        | (1 << (GC::TitlecaseLetter as u32))
399        | (1 << (GC::ModifierLetter as u32))
400        | (1 << (GC::OtherLetter as u32)));
401
402    /// (`Mn`) A nonspacing combining mark (zero advance width)
403    pub const NonspacingMark: GeneralCategoryGroup = GCG(1 << (GC::NonspacingMark as u32));
404    /// (`Mc`) A spacing combining mark (positive advance width)
405    pub const EnclosingMark: GeneralCategoryGroup = GCG(1 << (GC::EnclosingMark as u32));
406    /// (`Me`) An enclosing combining mark
407    pub const SpacingMark: GeneralCategoryGroup = GCG(1 << (GC::SpacingMark as u32));
408    /// (`M`) The union of all mark categories
409    pub const Mark: GeneralCategoryGroup = GCG((1 << (GC::NonspacingMark as u32))
410        | (1 << (GC::EnclosingMark as u32))
411        | (1 << (GC::SpacingMark as u32)));
412
413    /// (`Nd`) A decimal digit
414    pub const DecimalNumber: GeneralCategoryGroup = GCG(1 << (GC::DecimalNumber as u32));
415    /// (`Nl`) A letterlike numeric character
416    pub const LetterNumber: GeneralCategoryGroup = GCG(1 << (GC::LetterNumber as u32));
417    /// (`No`) A numeric character of other type
418    pub const OtherNumber: GeneralCategoryGroup = GCG(1 << (GC::OtherNumber as u32));
419    /// (`N`) The union of all number categories
420    pub const Number: GeneralCategoryGroup = GCG((1 << (GC::DecimalNumber as u32))
421        | (1 << (GC::LetterNumber as u32))
422        | (1 << (GC::OtherNumber as u32)));
423
424    /// (`Zs`) A space character (of various non-zero widths)
425    pub const SpaceSeparator: GeneralCategoryGroup = GCG(1 << (GC::SpaceSeparator as u32));
426    /// (`Zl`) U+2028 LINE SEPARATOR only
427    pub const LineSeparator: GeneralCategoryGroup = GCG(1 << (GC::LineSeparator as u32));
428    /// (`Zp`) U+2029 PARAGRAPH SEPARATOR only
429    pub const ParagraphSeparator: GeneralCategoryGroup = GCG(1 << (GC::ParagraphSeparator as u32));
430    /// (`Z`) The union of all separator categories
431    pub const Separator: GeneralCategoryGroup = GCG((1 << (GC::SpaceSeparator as u32))
432        | (1 << (GC::LineSeparator as u32))
433        | (1 << (GC::ParagraphSeparator as u32)));
434
435    /// (`Cc`) A C0 or C1 control code
436    pub const Control: GeneralCategoryGroup = GCG(1 << (GC::Control as u32));
437    /// (`Cf`) A format control character
438    pub const Format: GeneralCategoryGroup = GCG(1 << (GC::Format as u32));
439    /// (`Co`) A private-use character
440    pub const PrivateUse: GeneralCategoryGroup = GCG(1 << (GC::PrivateUse as u32));
441    /// (`Cs`) A surrogate code point
442    pub const Surrogate: GeneralCategoryGroup = GCG(1 << (GC::Surrogate as u32));
443    /// (`Cn`) A reserved unassigned code point or a noncharacter
444    pub const Unassigned: GeneralCategoryGroup = GCG(1 << (GC::Unassigned as u32));
445    /// (`C`) The union of all control code, reserved, and unassigned categories
446    pub const Other: GeneralCategoryGroup = GCG((1 << (GC::Control as u32))
447        | (1 << (GC::Format as u32))
448        | (1 << (GC::PrivateUse as u32))
449        | (1 << (GC::Surrogate as u32))
450        | (1 << (GC::Unassigned as u32)));
451
452    /// (`Pd`) A dash or hyphen punctuation mark
453    pub const DashPunctuation: GeneralCategoryGroup = GCG(1 << (GC::DashPunctuation as u32));
454    /// (`Ps`) An opening punctuation mark (of a pair)
455    pub const OpenPunctuation: GeneralCategoryGroup = GCG(1 << (GC::OpenPunctuation as u32));
456    /// (`Pe`) A closing punctuation mark (of a pair)
457    pub const ClosePunctuation: GeneralCategoryGroup = GCG(1 << (GC::ClosePunctuation as u32));
458    /// (`Pc`) A connecting punctuation mark, like a tie
459    pub const ConnectorPunctuation: GeneralCategoryGroup =
460        GCG(1 << (GC::ConnectorPunctuation as u32));
461    /// (`Pi`) An initial quotation mark
462    pub const InitialPunctuation: GeneralCategoryGroup = GCG(1 << (GC::InitialPunctuation as u32));
463    /// (`Pf`) A final quotation mark
464    pub const FinalPunctuation: GeneralCategoryGroup = GCG(1 << (GC::FinalPunctuation as u32));
465    /// (`Po`) A punctuation mark of other type
466    pub const OtherPunctuation: GeneralCategoryGroup = GCG(1 << (GC::OtherPunctuation as u32));
467    /// (`P`) The union of all punctuation categories
468    pub const Punctuation: GeneralCategoryGroup = GCG((1 << (GC::DashPunctuation as u32))
469        | (1 << (GC::OpenPunctuation as u32))
470        | (1 << (GC::ClosePunctuation as u32))
471        | (1 << (GC::ConnectorPunctuation as u32))
472        | (1 << (GC::OtherPunctuation as u32))
473        | (1 << (GC::InitialPunctuation as u32))
474        | (1 << (GC::FinalPunctuation as u32)));
475
476    /// (`Sm`) A symbol of mathematical use
477    pub const MathSymbol: GeneralCategoryGroup = GCG(1 << (GC::MathSymbol as u32));
478    /// (`Sc`) A currency sign
479    pub const CurrencySymbol: GeneralCategoryGroup = GCG(1 << (GC::CurrencySymbol as u32));
480    /// (`Sk`) A non-letterlike modifier symbol
481    pub const ModifierSymbol: GeneralCategoryGroup = GCG(1 << (GC::ModifierSymbol as u32));
482    /// (`So`) A symbol of other type
483    pub const OtherSymbol: GeneralCategoryGroup = GCG(1 << (GC::OtherSymbol as u32));
484    /// (`S`) The union of all symbol categories
485    pub const Symbol: GeneralCategoryGroup = GCG((1 << (GC::MathSymbol as u32))
486        | (1 << (GC::CurrencySymbol as u32))
487        | (1 << (GC::ModifierSymbol as u32))
488        | (1 << (GC::OtherSymbol as u32)));
489
490    const ALL: u32 = (1 << (GC::FinalPunctuation as u32 + 1)) - 1;
491
492    /// Return whether the code point belongs in the provided multi-value category.
493    ///
494    /// ```
495    /// use icu::properties::props::{GeneralCategory, GeneralCategoryGroup};
496    /// use icu::properties::CodePointMapData;
497    ///
498    /// let gc = CodePointMapData::<GeneralCategory>::new();
499    ///
500    /// assert_eq!(gc.get('A'), GeneralCategory::UppercaseLetter);
501    /// assert!(GeneralCategoryGroup::CasedLetter.contains(gc.get('A')));
502    ///
503    /// // U+0B1E ORIYA LETTER NYA
504    /// assert_eq!(gc.get('ଞ'), GeneralCategory::OtherLetter);
505    /// assert!(GeneralCategoryGroup::Letter.contains(gc.get('ଞ')));
506    /// assert!(!GeneralCategoryGroup::CasedLetter.contains(gc.get('ଞ')));
507    ///
508    /// // U+0301 COMBINING ACUTE ACCENT
509    /// assert_eq!(gc.get('\u{0301}'), GeneralCategory::NonspacingMark);
510    /// assert!(GeneralCategoryGroup::Mark.contains(gc.get('\u{0301}')));
511    /// assert!(!GeneralCategoryGroup::Letter.contains(gc.get('\u{0301}')));
512    ///
513    /// assert_eq!(gc.get('0'), GeneralCategory::DecimalNumber);
514    /// assert!(GeneralCategoryGroup::Number.contains(gc.get('0')));
515    /// assert!(!GeneralCategoryGroup::Mark.contains(gc.get('0')));
516    ///
517    /// assert_eq!(gc.get('('), GeneralCategory::OpenPunctuation);
518    /// assert!(GeneralCategoryGroup::Punctuation.contains(gc.get('(')));
519    /// assert!(!GeneralCategoryGroup::Number.contains(gc.get('(')));
520    ///
521    /// // U+2713 CHECK MARK
522    /// assert_eq!(gc.get('✓'), GeneralCategory::OtherSymbol);
523    /// assert!(GeneralCategoryGroup::Symbol.contains(gc.get('✓')));
524    /// assert!(!GeneralCategoryGroup::Punctuation.contains(gc.get('✓')));
525    ///
526    /// assert_eq!(gc.get(' '), GeneralCategory::SpaceSeparator);
527    /// assert!(GeneralCategoryGroup::Separator.contains(gc.get(' ')));
528    /// assert!(!GeneralCategoryGroup::Symbol.contains(gc.get(' ')));
529    ///
530    /// // U+E007F CANCEL TAG
531    /// assert_eq!(gc.get('\u{E007F}'), GeneralCategory::Format);
532    /// assert!(GeneralCategoryGroup::Other.contains(gc.get('\u{E007F}')));
533    /// assert!(!GeneralCategoryGroup::Separator.contains(gc.get('\u{E007F}')));
534    /// ```
535    pub const fn contains(self, val: GeneralCategory) -> bool {
536        0 != (1 << (val as u32)) & self.0
537    }
538
539    /// Produce a GeneralCategoryGroup that is the inverse of this one
540    ///
541    /// # Example
542    ///
543    /// ```rust
544    /// use icu::properties::props::{GeneralCategory, GeneralCategoryGroup};
545    ///
546    /// let letter = GeneralCategoryGroup::Letter;
547    /// let not_letter = letter.complement();
548    ///
549    /// assert!(not_letter.contains(GeneralCategory::MathSymbol));
550    /// assert!(!letter.contains(GeneralCategory::MathSymbol));
551    /// assert!(not_letter.contains(GeneralCategory::OtherPunctuation));
552    /// assert!(!letter.contains(GeneralCategory::OtherPunctuation));
553    /// assert!(!not_letter.contains(GeneralCategory::UppercaseLetter));
554    /// assert!(letter.contains(GeneralCategory::UppercaseLetter));
555    /// ```
556    pub const fn complement(self) -> Self {
557        // Mask off things not in Self::ALL to guarantee the mask
558        // values stay in-range
559        GeneralCategoryGroup(!self.0 & Self::ALL)
560    }
561
562    /// Return the group representing all GeneralCategory values
563    ///
564    /// # Example
565    ///
566    /// ```rust
567    /// use icu::properties::props::{GeneralCategory, GeneralCategoryGroup};
568    ///
569    /// let all = GeneralCategoryGroup::all();
570    ///
571    /// assert!(all.contains(GeneralCategory::MathSymbol));
572    /// assert!(all.contains(GeneralCategory::OtherPunctuation));
573    /// assert!(all.contains(GeneralCategory::UppercaseLetter));
574    /// ```
575    pub const fn all() -> Self {
576        Self(Self::ALL)
577    }
578
579    /// Return the empty group
580    ///
581    /// # Example
582    ///
583    /// ```rust
584    /// use icu::properties::props::{GeneralCategory, GeneralCategoryGroup};
585    ///
586    /// let empty = GeneralCategoryGroup::empty();
587    ///
588    /// assert!(!empty.contains(GeneralCategory::MathSymbol));
589    /// assert!(!empty.contains(GeneralCategory::OtherPunctuation));
590    /// assert!(!empty.contains(GeneralCategory::UppercaseLetter));
591    /// ```
592    pub const fn empty() -> Self {
593        Self(0)
594    }
595
596    /// Take the union of two groups
597    ///
598    /// # Example
599    ///
600    /// ```rust
601    /// use icu::properties::props::{GeneralCategory, GeneralCategoryGroup};
602    ///
603    /// let letter = GeneralCategoryGroup::Letter;
604    /// let symbol = GeneralCategoryGroup::Symbol;
605    /// let union = letter.union(symbol);
606    ///
607    /// assert!(union.contains(GeneralCategory::MathSymbol));
608    /// assert!(!union.contains(GeneralCategory::OtherPunctuation));
609    /// assert!(union.contains(GeneralCategory::UppercaseLetter));
610    /// ```
611    pub const fn union(self, other: Self) -> Self {
612        Self(self.0 | other.0)
613    }
614
615    /// Take the intersection of two groups
616    ///
617    /// # Example
618    ///
619    /// ```rust
620    /// use icu::properties::props::{GeneralCategory, GeneralCategoryGroup};
621    ///
622    /// let letter = GeneralCategoryGroup::Letter;
623    /// let lu = GeneralCategoryGroup::UppercaseLetter;
624    /// let intersection = letter.intersection(lu);
625    ///
626    /// assert!(!intersection.contains(GeneralCategory::MathSymbol));
627    /// assert!(!intersection.contains(GeneralCategory::OtherPunctuation));
628    /// assert!(intersection.contains(GeneralCategory::UppercaseLetter));
629    /// assert!(!intersection.contains(GeneralCategory::LowercaseLetter));
630    /// ```
631    pub const fn intersection(self, other: Self) -> Self {
632        Self(self.0 & other.0)
633    }
634}
635
636impl From<GeneralCategory> for GeneralCategoryGroup {
637    fn from(subcategory: GeneralCategory) -> Self {
638        GeneralCategoryGroup(1 << (subcategory as u32))
639    }
640}
641impl From<u32> for GeneralCategoryGroup {
642    fn from(mask: u32) -> Self {
643        // Mask off things not in Self::ALL to guarantee the mask
644        // values stay in-range
645        GeneralCategoryGroup(mask & Self::ALL)
646    }
647}
648impl From<GeneralCategoryGroup> for u32 {
649    fn from(group: GeneralCategoryGroup) -> Self {
650        group.0
651    }
652}
653
654/// Enumerated property Script.
655///
656/// This is used with both the Script and Script_Extensions Unicode properties.
657/// Each character is assigned a single Script, but characters that are used in
658/// a particular subset of scripts will be in more than one Script_Extensions set.
659/// For example, DEVANAGARI DIGIT NINE has Script=Devanagari, but is also in the
660/// Script_Extensions set for Dogra, Kaithi, and Mahajani. If you are trying to
661/// determine whether a code point belongs to a certain script, you should use
662/// [`ScriptWithExtensionsBorrowed::has_script`].
663///
664/// For more information, see UAX #24: <http://www.unicode.org/reports/tr24/>.
665/// See `UScriptCode` in ICU4C.
666///
667/// # Example
668///
669/// ```
670/// use icu::properties::{CodePointMapData, props::Script};
671///
672/// assert_eq!(CodePointMapData::<Script>::new().get('木'), Script::Han);  // U+6728
673/// assert_eq!(CodePointMapData::<Script>::new().get('🎃'), Script::Common);  // U+1F383 JACK-O-LANTERN
674/// ```
675/// [`ScriptWithExtensionsBorrowed::has_script`]: crate::script::ScriptWithExtensionsBorrowed::has_script
676#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
677#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
678#[cfg_attr(feature = "datagen", derive(databake::Bake))]
679#[cfg_attr(feature = "datagen", databake(path = icu_properties::props))]
680#[allow(clippy::exhaustive_structs)] // newtype
681#[repr(transparent)]
682pub struct Script(pub(crate) u16);
683
684impl Script {
685    /// Returns an ICU4C `UScriptCode` value.
686    pub const fn to_icu4c_value(self) -> u16 {
687        self.0
688    }
689    /// Constructor from an ICU4C `UScriptCode` value.
690    pub const fn from_icu4c_value(value: u16) -> Self {
691        Self(value)
692    }
693}
694
695create_const_array! {
696#[allow(missing_docs)] // These constants don't need individual documentation.
697#[allow(non_upper_case_globals)]
698impl Script {
699    pub const Adlam: Script = Script(167);
700    pub const Ahom: Script = Script(161);
701    pub const AnatolianHieroglyphs: Script = Script(156);
702    pub const Arabic: Script = Script(2);
703    pub const Armenian: Script = Script(3);
704    pub const Avestan: Script = Script(117);
705    pub const Balinese: Script = Script(62);
706    pub const Bamum: Script = Script(130);
707    pub const BassaVah: Script = Script(134);
708    pub const Batak: Script = Script(63);
709    pub const Bengali: Script = Script(4);
710    pub const Bhaiksuki: Script = Script(168);
711    pub const Bopomofo: Script = Script(5);
712    pub const Brahmi: Script = Script(65);
713    pub const Braille: Script = Script(46);
714    pub const Buginese: Script = Script(55);
715    pub const Buhid: Script = Script(44);
716    pub const CanadianAboriginal: Script = Script(40);
717    pub const Carian: Script = Script(104);
718    pub const CaucasianAlbanian: Script = Script(159);
719    pub const Chakma: Script = Script(118);
720    pub const Cham: Script = Script(66);
721    pub const Cherokee: Script = Script(6);
722    pub const Chorasmian: Script = Script(189);
723    pub const Common: Script = Script(0);
724    pub const Coptic: Script = Script(7);
725    pub const Cuneiform: Script = Script(101);
726    pub const Cypriot: Script = Script(47);
727    pub const CyproMinoan: Script = Script(193);
728    pub const Cyrillic: Script = Script(8);
729    pub const Deseret: Script = Script(9);
730    pub const Devanagari: Script = Script(10);
731    pub const DivesAkuru: Script = Script(190);
732    pub const Dogra: Script = Script(178);
733    pub const Duployan: Script = Script(135);
734    pub const EgyptianHieroglyphs: Script = Script(71);
735    pub const Elbasan: Script = Script(136);
736    pub const Elymaic: Script = Script(185);
737    pub const Ethiopian: Script = Script(11);
738    pub const Georgian: Script = Script(12);
739    pub const Glagolitic: Script = Script(56);
740    pub const Gothic: Script = Script(13);
741    pub const Grantha: Script = Script(137);
742    pub const Greek: Script = Script(14);
743    pub const Gujarati: Script = Script(15);
744    pub const GunjalaGondi: Script = Script(179);
745    pub const Gurmukhi: Script = Script(16);
746    pub const Han: Script = Script(17);
747    pub const Hangul: Script = Script(18);
748    pub const HanifiRohingya: Script = Script(182);
749    pub const Hanunoo: Script = Script(43);
750    pub const Hatran: Script = Script(162);
751    pub const Hebrew: Script = Script(19);
752    pub const Hiragana: Script = Script(20);
753    pub const ImperialAramaic: Script = Script(116);
754    pub const Inherited: Script = Script(1);
755    pub const InscriptionalPahlavi: Script = Script(122);
756    pub const InscriptionalParthian: Script = Script(125);
757    pub const Javanese: Script = Script(78);
758    pub const Kaithi: Script = Script(120);
759    pub const Kannada: Script = Script(21);
760    pub const Katakana: Script = Script(22);
761    pub const Kawi: Script = Script(198);
762    pub const KayahLi: Script = Script(79);
763    pub const Kharoshthi: Script = Script(57);
764    pub const KhitanSmallScript: Script = Script(191);
765    pub const Khmer: Script = Script(23);
766    pub const Khojki: Script = Script(157);
767    pub const Khudawadi: Script = Script(145);
768    pub const Lao: Script = Script(24);
769    pub const Latin: Script = Script(25);
770    pub const Lepcha: Script = Script(82);
771    pub const Limbu: Script = Script(48);
772    pub const LinearA: Script = Script(83);
773    pub const LinearB: Script = Script(49);
774    pub const Lisu: Script = Script(131);
775    pub const Lycian: Script = Script(107);
776    pub const Lydian: Script = Script(108);
777    pub const Mahajani: Script = Script(160);
778    pub const Makasar: Script = Script(180);
779    pub const Malayalam: Script = Script(26);
780    pub const Mandaic: Script = Script(84);
781    pub const Manichaean: Script = Script(121);
782    pub const Marchen: Script = Script(169);
783    pub const MasaramGondi: Script = Script(175);
784    pub const Medefaidrin: Script = Script(181);
785    pub const MeeteiMayek: Script = Script(115);
786    pub const MendeKikakui: Script = Script(140);
787    pub const MeroiticCursive: Script = Script(141);
788    pub const MeroiticHieroglyphs: Script = Script(86);
789    pub const Miao: Script = Script(92);
790    pub const Modi: Script = Script(163);
791    pub const Mongolian: Script = Script(27);
792    pub const Mro: Script = Script(149);
793    pub const Multani: Script = Script(164);
794    pub const Myanmar: Script = Script(28);
795    pub const Nabataean: Script = Script(143);
796    pub const NagMundari: Script = Script(199);
797    pub const Nandinagari: Script = Script(187);
798    pub const Nastaliq: Script = Script(200);
799    pub const NewTaiLue: Script = Script(59);
800    pub const Newa: Script = Script(170);
801    pub const Nko: Script = Script(87);
802    pub const Nushu: Script = Script(150);
803    pub const NyiakengPuachueHmong: Script = Script(186);
804    pub const Ogham: Script = Script(29);
805    pub const OlChiki: Script = Script(109);
806    pub const OldHungarian: Script = Script(76);
807    pub const OldItalic: Script = Script(30);
808    pub const OldNorthArabian: Script = Script(142);
809    pub const OldPermic: Script = Script(89);
810    pub const OldPersian: Script = Script(61);
811    pub const OldSogdian: Script = Script(184);
812    pub const OldSouthArabian: Script = Script(133);
813    pub const OldTurkic: Script = Script(88);
814    pub const OldUyghur: Script = Script(194);
815    pub const Oriya: Script = Script(31);
816    pub const Osage: Script = Script(171);
817    pub const Osmanya: Script = Script(50);
818    pub const PahawhHmong: Script = Script(75);
819    pub const Palmyrene: Script = Script(144);
820    pub const PauCinHau: Script = Script(165);
821    pub const PhagsPa: Script = Script(90);
822    pub const Phoenician: Script = Script(91);
823    pub const PsalterPahlavi: Script = Script(123);
824    pub const Rejang: Script = Script(110);
825    pub const Runic: Script = Script(32);
826    pub const Samaritan: Script = Script(126);
827    pub const Saurashtra: Script = Script(111);
828    pub const Sharada: Script = Script(151);
829    pub const Shavian: Script = Script(51);
830    pub const Siddham: Script = Script(166);
831    pub const SignWriting: Script = Script(112);
832    pub const Sinhala: Script = Script(33);
833    pub const Sogdian: Script = Script(183);
834    pub const SoraSompeng: Script = Script(152);
835    pub const Soyombo: Script = Script(176);
836    pub const Sundanese: Script = Script(113);
837    pub const SylotiNagri: Script = Script(58);
838    pub const Syriac: Script = Script(34);
839    pub const Tagalog: Script = Script(42);
840    pub const Tagbanwa: Script = Script(45);
841    pub const TaiLe: Script = Script(52);
842    pub const TaiTham: Script = Script(106);
843    pub const TaiViet: Script = Script(127);
844    pub const Takri: Script = Script(153);
845    pub const Tamil: Script = Script(35);
846    pub const Tangsa: Script = Script(195);
847    pub const Tangut: Script = Script(154);
848    pub const Telugu: Script = Script(36);
849    pub const Thaana: Script = Script(37);
850    pub const Thai: Script = Script(38);
851    pub const Tibetan: Script = Script(39);
852    pub const Tifinagh: Script = Script(60);
853    pub const Tirhuta: Script = Script(158);
854    pub const Toto: Script = Script(196);
855    pub const Ugaritic: Script = Script(53);
856    pub const Unknown: Script = Script(103);
857    pub const Vai: Script = Script(99);
858    pub const Vithkuqi: Script = Script(197);
859    pub const Wancho: Script = Script(188);
860    pub const WarangCiti: Script = Script(146);
861    pub const Yezidi: Script = Script(192);
862    pub const Yi: Script = Script(41);
863    pub const ZanabazarSquare: Script = Script(177);
864}
865}
866
867make_enumerated_property! {
868    name: "Script";
869    short_name: "sc";
870    ident: Script;
871    data_marker: crate::provider::PropertyEnumScriptV1;
872    singleton: SINGLETON_PROPERTY_ENUM_SCRIPT_V1;
873    ule_ty: <u16 as zerovec::ule::AsULE>::ULE;
874}
875
876/// Enumerated property Hangul_Syllable_Type
877///
878/// The Unicode standard provides both precomposed Hangul syllables and conjoining Jamo to compose
879/// arbitrary Hangul syllables. This property provides that ontology of Hangul code points.
880///
881/// For more information, see the [Unicode Korean FAQ](https://www.unicode.org/faq/korean.html).
882///
883/// # Example
884///
885/// ```
886/// use icu::properties::{CodePointMapData, props::HangulSyllableType};
887///
888/// assert_eq!(CodePointMapData::<HangulSyllableType>::new().get('ᄀ'), HangulSyllableType::LeadingJamo);  // U+1100
889/// assert_eq!(CodePointMapData::<HangulSyllableType>::new().get('가'), HangulSyllableType::LeadingVowelSyllable);  // U+AC00
890/// ```
891#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
892#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
893#[cfg_attr(feature = "datagen", derive(databake::Bake))]
894#[cfg_attr(feature = "datagen", databake(path = icu_properties::props))]
895#[allow(clippy::exhaustive_structs)] // newtype
896#[repr(transparent)]
897pub struct HangulSyllableType(pub(crate) u8);
898
899impl HangulSyllableType {
900    /// Returns an ICU4C `UHangulSyllableType` value.
901    pub const fn to_icu4c_value(self) -> u8 {
902        self.0
903    }
904    /// Constructor from an ICU4C `UHangulSyllableType` value.
905    pub const fn from_icu4c_value(value: u8) -> Self {
906        Self(value)
907    }
908}
909
910create_const_array! {
911#[allow(non_upper_case_globals)]
912impl HangulSyllableType {
913    /// (`NA`) not applicable (e.g. not a Hangul code point).
914    pub const NotApplicable: HangulSyllableType = HangulSyllableType(0);
915    /// (`L`) a conjoining leading consonant Jamo.
916    pub const LeadingJamo: HangulSyllableType = HangulSyllableType(1);
917    /// (`V`) a conjoining vowel Jamo.
918    pub const VowelJamo: HangulSyllableType = HangulSyllableType(2);
919    /// (`T`) a conjoining trailing consonant Jamo.
920    pub const TrailingJamo: HangulSyllableType = HangulSyllableType(3);
921    /// (`LV`) a precomposed syllable with a leading consonant and a vowel.
922    pub const LeadingVowelSyllable: HangulSyllableType = HangulSyllableType(4);
923    /// (`LVT`) a precomposed syllable with a leading consonant, a vowel, and a trailing consonant.
924    pub const LeadingVowelTrailingSyllable: HangulSyllableType = HangulSyllableType(5);
925}
926}
927
928make_enumerated_property! {
929    name: "Hangul_Syllable_Type";
930    short_name: "hst";
931    ident: HangulSyllableType;
932    data_marker: crate::provider::PropertyEnumHangulSyllableTypeV1;
933    singleton: SINGLETON_PROPERTY_ENUM_HANGUL_SYLLABLE_TYPE_V1;
934    ule_ty: u8;
935
936}
937
938/// Enumerated property East_Asian_Width.
939///
940/// See "Definition" in UAX #11 for the summary of each property value:
941/// <https://www.unicode.org/reports/tr11/#Definitions>
942///
943/// # Example
944///
945/// ```
946/// use icu::properties::{CodePointMapData, props::EastAsianWidth};
947///
948/// assert_eq!(CodePointMapData::<EastAsianWidth>::new().get('ｱ'), EastAsianWidth::Halfwidth); // U+FF71: Halfwidth Katakana Letter A
949/// assert_eq!(CodePointMapData::<EastAsianWidth>::new().get('ア'), EastAsianWidth::Wide); //U+30A2: Katakana Letter A
950/// ```
951#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
952#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
953#[cfg_attr(feature = "datagen", derive(databake::Bake))]
954#[cfg_attr(feature = "datagen", databake(path = icu_properties::props))]
955#[allow(clippy::exhaustive_structs)] // newtype
956#[repr(transparent)]
957pub struct EastAsianWidth(pub(crate) u8);
958
959impl EastAsianWidth {
960    /// Returns an ICU4C `UEastAsianWidth` value.
961    pub const fn to_icu4c_value(self) -> u8 {
962        self.0
963    }
964    /// Constructor from an ICU4C `UEastAsianWidth` value.
965    pub const fn from_icu4c_value(value: u8) -> Self {
966        Self(value)
967    }
968}
969
970create_const_array! {
971#[allow(missing_docs)] // These constants don't need individual documentation.
972#[allow(non_upper_case_globals)]
973impl EastAsianWidth {
974    pub const Neutral: EastAsianWidth = EastAsianWidth(0); //name="N"
975    pub const Ambiguous: EastAsianWidth = EastAsianWidth(1); //name="A"
976    pub const Halfwidth: EastAsianWidth = EastAsianWidth(2); //name="H"
977    pub const Fullwidth: EastAsianWidth = EastAsianWidth(3); //name="F"
978    pub const Narrow: EastAsianWidth = EastAsianWidth(4); //name="Na"
979    pub const Wide: EastAsianWidth = EastAsianWidth(5); //name="W"
980}
981}
982
983make_enumerated_property! {
984    name: "East_Asian_Width";
985    short_name: "ea";
986    ident: EastAsianWidth;
987    data_marker: crate::provider::PropertyEnumEastAsianWidthV1;
988    singleton: SINGLETON_PROPERTY_ENUM_EAST_ASIAN_WIDTH_V1;
989    ule_ty: u8;
990}
991
992/// Enumerated property Line_Break.
993///
994/// See "Line Breaking Properties" in UAX #14 for the summary of each property
995/// value: <https://www.unicode.org/reports/tr14/#Properties>
996///
997/// The numeric value is compatible with `ULineBreak` in ICU4C.
998///
999/// **Note:** Use `icu::segmenter` for an all-in-one break iterator implementation.
1000///
1001/// # Example
1002///
1003/// ```
1004/// use icu::properties::{CodePointMapData, props::LineBreak};
1005///
1006/// assert_eq!(CodePointMapData::<LineBreak>::new().get(')'), LineBreak::CloseParenthesis); // U+0029: Right Parenthesis
1007/// assert_eq!(CodePointMapData::<LineBreak>::new().get('ぁ'), LineBreak::ConditionalJapaneseStarter); //U+3041: Hiragana Letter Small A
1008/// ```
1009#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
1010#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
1011#[cfg_attr(feature = "datagen", derive(databake::Bake))]
1012#[cfg_attr(feature = "datagen", databake(path = icu_properties::props))]
1013#[allow(clippy::exhaustive_structs)] // newtype
1014#[repr(transparent)]
1015pub struct LineBreak(pub(crate) u8);
1016
1017impl LineBreak {
1018    /// Returns an ICU4C `ULineBreak` value.
1019    pub const fn to_icu4c_value(self) -> u8 {
1020        self.0
1021    }
1022    /// Constructor from an ICU4C `ULineBreak` value.
1023    pub const fn from_icu4c_value(value: u8) -> Self {
1024        Self(value)
1025    }
1026}
1027
1028create_const_array! {
1029#[allow(missing_docs)] // These constants don't need individual documentation.
1030#[allow(non_upper_case_globals)]
1031impl LineBreak {
1032    pub const Unknown: LineBreak = LineBreak(0); // name="XX"
1033    pub const Ambiguous: LineBreak = LineBreak(1); // name="AI"
1034    pub const Alphabetic: LineBreak = LineBreak(2); // name="AL"
1035    pub const BreakBoth: LineBreak = LineBreak(3); // name="B2"
1036    pub const BreakAfter: LineBreak = LineBreak(4); // name="BA"
1037    pub const BreakBefore: LineBreak = LineBreak(5); // name="BB"
1038    pub const MandatoryBreak: LineBreak = LineBreak(6); // name="BK"
1039    pub const ContingentBreak: LineBreak = LineBreak(7); // name="CB"
1040    pub const ClosePunctuation: LineBreak = LineBreak(8); // name="CL"
1041    pub const CombiningMark: LineBreak = LineBreak(9); // name="CM"
1042    pub const CarriageReturn: LineBreak = LineBreak(10); // name="CR"
1043    pub const Exclamation: LineBreak = LineBreak(11); // name="EX"
1044    pub const Glue: LineBreak = LineBreak(12); // name="GL"
1045    pub const Hyphen: LineBreak = LineBreak(13); // name="HY"
1046    pub const Ideographic: LineBreak = LineBreak(14); // name="ID"
1047    pub const Inseparable: LineBreak = LineBreak(15); // name="IN"
1048    pub const InfixNumeric: LineBreak = LineBreak(16); // name="IS"
1049    pub const LineFeed: LineBreak = LineBreak(17); // name="LF"
1050    pub const Nonstarter: LineBreak = LineBreak(18); // name="NS"
1051    pub const Numeric: LineBreak = LineBreak(19); // name="NU"
1052    pub const OpenPunctuation: LineBreak = LineBreak(20); // name="OP"
1053    pub const PostfixNumeric: LineBreak = LineBreak(21); // name="PO"
1054    pub const PrefixNumeric: LineBreak = LineBreak(22); // name="PR"
1055    pub const Quotation: LineBreak = LineBreak(23); // name="QU"
1056    pub const ComplexContext: LineBreak = LineBreak(24); // name="SA"
1057    pub const Surrogate: LineBreak = LineBreak(25); // name="SG"
1058    pub const Space: LineBreak = LineBreak(26); // name="SP"
1059    pub const BreakSymbols: LineBreak = LineBreak(27); // name="SY"
1060    pub const ZWSpace: LineBreak = LineBreak(28); // name="ZW"
1061    pub const NextLine: LineBreak = LineBreak(29); // name="NL"
1062    pub const WordJoiner: LineBreak = LineBreak(30); // name="WJ"
1063    pub const H2: LineBreak = LineBreak(31); // name="H2"
1064    pub const H3: LineBreak = LineBreak(32); // name="H3"
1065    pub const JL: LineBreak = LineBreak(33); // name="JL"
1066    pub const JT: LineBreak = LineBreak(34); // name="JT"
1067    pub const JV: LineBreak = LineBreak(35); // name="JV"
1068    pub const CloseParenthesis: LineBreak = LineBreak(36); // name="CP"
1069    pub const ConditionalJapaneseStarter: LineBreak = LineBreak(37); // name="CJ"
1070    pub const HebrewLetter: LineBreak = LineBreak(38); // name="HL"
1071    pub const RegionalIndicator: LineBreak = LineBreak(39); // name="RI"
1072    pub const EBase: LineBreak = LineBreak(40); // name="EB"
1073    pub const EModifier: LineBreak = LineBreak(41); // name="EM"
1074    pub const ZWJ: LineBreak = LineBreak(42); // name="ZWJ"
1075
1076    // Added in ICU 74:
1077    pub const Aksara: LineBreak = LineBreak(43); // name="AK"
1078    pub const AksaraPrebase: LineBreak = LineBreak(44); // name=AP"
1079    pub const AksaraStart: LineBreak = LineBreak(45); // name=AS"
1080    pub const ViramaFinal: LineBreak = LineBreak(46); // name=VF"
1081    pub const Virama: LineBreak = LineBreak(47); // name=VI"
1082}
1083}
1084
1085make_enumerated_property! {
1086    name: "Line_Break";
1087    short_name: "lb";
1088    ident: LineBreak;
1089    data_marker: crate::provider::PropertyEnumLineBreakV1;
1090    singleton: SINGLETON_PROPERTY_ENUM_LINE_BREAK_V1;
1091    ule_ty: u8;
1092}
1093
1094/// Enumerated property Grapheme_Cluster_Break.
1095///
1096/// See "Default Grapheme Cluster Boundary Specification" in UAX #29 for the
1097/// summary of each property value:
1098/// <https://www.unicode.org/reports/tr29/#Default_Grapheme_Cluster_Table>
1099///
1100/// **Note:** Use `icu::segmenter` for an all-in-one break iterator implementation.
1101///
1102/// # Example
1103///
1104/// ```
1105/// use icu::properties::{CodePointMapData, props::GraphemeClusterBreak};
1106///
1107/// assert_eq!(CodePointMapData::<GraphemeClusterBreak>::new().get('🇦'), GraphemeClusterBreak::RegionalIndicator); // U+1F1E6: Regional Indicator Symbol Letter A
1108/// assert_eq!(CodePointMapData::<GraphemeClusterBreak>::new().get('ำ'), GraphemeClusterBreak::SpacingMark); //U+0E33: Thai Character Sara Am
1109/// ```
1110#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
1111#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
1112#[cfg_attr(feature = "datagen", derive(databake::Bake))]
1113#[cfg_attr(feature = "datagen", databake(path = icu_properties::props))]
1114#[allow(clippy::exhaustive_structs)] // this type is stable
1115#[repr(transparent)]
1116pub struct GraphemeClusterBreak(pub(crate) u8);
1117
1118impl GraphemeClusterBreak {
1119    /// Returns an ICU4C `UGraphemeClusterBreak` value.
1120    pub const fn to_icu4c_value(self) -> u8 {
1121        self.0
1122    }
1123    /// Constructor from an ICU4C `UGraphemeClusterBreak` value.
1124    pub const fn from_icu4c_value(value: u8) -> Self {
1125        Self(value)
1126    }
1127}
1128
1129create_const_array! {
1130#[allow(missing_docs)] // These constants don't need individual documentation.
1131#[allow(non_upper_case_globals)]
1132impl GraphemeClusterBreak {
1133    pub const Other: GraphemeClusterBreak = GraphemeClusterBreak(0); // name="XX"
1134    pub const Control: GraphemeClusterBreak = GraphemeClusterBreak(1); // name="CN"
1135    pub const CR: GraphemeClusterBreak = GraphemeClusterBreak(2); // name="CR"
1136    pub const Extend: GraphemeClusterBreak = GraphemeClusterBreak(3); // name="EX"
1137    pub const L: GraphemeClusterBreak = GraphemeClusterBreak(4); // name="L"
1138    pub const LF: GraphemeClusterBreak = GraphemeClusterBreak(5); // name="LF"
1139    pub const LV: GraphemeClusterBreak = GraphemeClusterBreak(6); // name="LV"
1140    pub const LVT: GraphemeClusterBreak = GraphemeClusterBreak(7); // name="LVT"
1141    pub const T: GraphemeClusterBreak = GraphemeClusterBreak(8); // name="T"
1142    pub const V: GraphemeClusterBreak = GraphemeClusterBreak(9); // name="V"
1143    pub const SpacingMark: GraphemeClusterBreak = GraphemeClusterBreak(10); // name="SM"
1144    pub const Prepend: GraphemeClusterBreak = GraphemeClusterBreak(11); // name="PP"
1145    pub const RegionalIndicator: GraphemeClusterBreak = GraphemeClusterBreak(12); // name="RI"
1146    /// This value is obsolete and unused.
1147    pub const EBase: GraphemeClusterBreak = GraphemeClusterBreak(13); // name="EB"
1148    /// This value is obsolete and unused.
1149    pub const EBaseGAZ: GraphemeClusterBreak = GraphemeClusterBreak(14); // name="EBG"
1150    /// This value is obsolete and unused.
1151    pub const EModifier: GraphemeClusterBreak = GraphemeClusterBreak(15); // name="EM"
1152    /// This value is obsolete and unused.
1153    pub const GlueAfterZwj: GraphemeClusterBreak = GraphemeClusterBreak(16); // name="GAZ"
1154    pub const ZWJ: GraphemeClusterBreak = GraphemeClusterBreak(17); // name="ZWJ"
1155}
1156}
1157
1158make_enumerated_property! {
1159    name: "Grapheme_Cluster_Break";
1160    short_name: "GCB";
1161    ident: GraphemeClusterBreak;
1162    data_marker: crate::provider::PropertyEnumGraphemeClusterBreakV1;
1163    singleton: SINGLETON_PROPERTY_ENUM_GRAPHEME_CLUSTER_BREAK_V1;
1164    ule_ty: u8;
1165}
1166
1167/// Enumerated property Word_Break.
1168///
1169/// See "Default Word Boundary Specification" in UAX #29 for the summary of
1170/// each property value:
1171/// <https://www.unicode.org/reports/tr29/#Default_Word_Boundaries>.
1172///
1173/// **Note:** Use `icu::segmenter` for an all-in-one break iterator implementation.
1174///
1175/// # Example
1176///
1177/// ```
1178/// use icu::properties::{CodePointMapData, props::WordBreak};
1179///
1180/// assert_eq!(CodePointMapData::<WordBreak>::new().get('.'), WordBreak::MidNumLet); // U+002E: Full Stop
1181/// assert_eq!(CodePointMapData::<WordBreak>::new().get('，'), WordBreak::MidNum); // U+FF0C: Fullwidth Comma
1182/// ```
1183#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
1184#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
1185#[cfg_attr(feature = "datagen", derive(databake::Bake))]
1186#[cfg_attr(feature = "datagen", databake(path = icu_properties::props))]
1187#[allow(clippy::exhaustive_structs)] // newtype
1188#[repr(transparent)]
1189pub struct WordBreak(pub(crate) u8);
1190
1191impl WordBreak {
1192    /// Returns an ICU4C `UWordBreak` value.
1193    pub const fn to_icu4c_value(self) -> u8 {
1194        self.0
1195    }
1196    /// Constructor from an ICU4C `UWordBreak` value.
1197    pub const fn from_icu4c_value(value: u8) -> Self {
1198        Self(value)
1199    }
1200}
1201
1202create_const_array! {
1203#[allow(missing_docs)] // These constants don't need individual documentation.
1204#[allow(non_upper_case_globals)]
1205impl WordBreak {
1206    pub const Other: WordBreak = WordBreak(0); // name="XX"
1207    pub const ALetter: WordBreak = WordBreak(1); // name="LE"
1208    pub const Format: WordBreak = WordBreak(2); // name="FO"
1209    pub const Katakana: WordBreak = WordBreak(3); // name="KA"
1210    pub const MidLetter: WordBreak = WordBreak(4); // name="ML"
1211    pub const MidNum: WordBreak = WordBreak(5); // name="MN"
1212    pub const Numeric: WordBreak = WordBreak(6); // name="NU"
1213    pub const ExtendNumLet: WordBreak = WordBreak(7); // name="EX"
1214    pub const CR: WordBreak = WordBreak(8); // name="CR"
1215    pub const Extend: WordBreak = WordBreak(9); // name="Extend"
1216    pub const LF: WordBreak = WordBreak(10); // name="LF"
1217    pub const MidNumLet: WordBreak = WordBreak(11); // name="MB"
1218    pub const Newline: WordBreak = WordBreak(12); // name="NL"
1219    pub const RegionalIndicator: WordBreak = WordBreak(13); // name="RI"
1220    pub const HebrewLetter: WordBreak = WordBreak(14); // name="HL"
1221    pub const SingleQuote: WordBreak = WordBreak(15); // name="SQ"
1222    pub const DoubleQuote: WordBreak = WordBreak(16); // name=DQ
1223    /// This value is obsolete and unused.
1224    pub const EBase: WordBreak = WordBreak(17); // name="EB"
1225    /// This value is obsolete and unused.
1226    pub const EBaseGAZ: WordBreak = WordBreak(18); // name="EBG"
1227    /// This value is obsolete and unused.
1228    pub const EModifier: WordBreak = WordBreak(19); // name="EM"
1229    /// This value is obsolete and unused.
1230    pub const GlueAfterZwj: WordBreak = WordBreak(20); // name="GAZ"
1231    pub const ZWJ: WordBreak = WordBreak(21); // name="ZWJ"
1232    pub const WSegSpace: WordBreak = WordBreak(22); // name="WSegSpace"
1233}
1234}
1235
1236make_enumerated_property! {
1237    name: "Word_Break";
1238    short_name: "WB";
1239    ident: WordBreak;
1240    data_marker: crate::provider::PropertyEnumWordBreakV1;
1241    singleton: SINGLETON_PROPERTY_ENUM_WORD_BREAK_V1;
1242    ule_ty: u8;
1243}
1244
1245/// Enumerated property Sentence_Break.
1246///
1247/// See "Default Sentence Boundary Specification" in UAX #29 for the summary of
1248/// each property value:
1249/// <https://www.unicode.org/reports/tr29/#Default_Word_Boundaries>.
1250///
1251/// **Note:** Use `icu::segmenter` for an all-in-one break iterator implementation.
1252///
1253/// # Example
1254///
1255/// ```
1256/// use icu::properties::{CodePointMapData, props::SentenceBreak};
1257///
1258/// assert_eq!(CodePointMapData::<SentenceBreak>::new().get('９'), SentenceBreak::Numeric); // U+FF19: Fullwidth Digit Nine
1259/// assert_eq!(CodePointMapData::<SentenceBreak>::new().get(','), SentenceBreak::SContinue); // U+002C: Comma
1260/// ```
1261#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
1262#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
1263#[cfg_attr(feature = "datagen", derive(databake::Bake))]
1264#[cfg_attr(feature = "datagen", databake(path = icu_properties::props))]
1265#[allow(clippy::exhaustive_structs)] // newtype
1266#[repr(transparent)]
1267pub struct SentenceBreak(pub(crate) u8);
1268
1269impl SentenceBreak {
1270    /// Returns an ICU4C `USentenceBreak` value.
1271    pub const fn to_icu4c_value(self) -> u8 {
1272        self.0
1273    }
1274    /// Constructor from an ICU4C `USentenceBreak` value.
1275    pub const fn from_icu4c_value(value: u8) -> Self {
1276        Self(value)
1277    }
1278}
1279
1280create_const_array! {
1281#[allow(missing_docs)] // These constants don't need individual documentation.
1282#[allow(non_upper_case_globals)]
1283impl SentenceBreak {
1284    pub const Other: SentenceBreak = SentenceBreak(0); // name="XX"
1285    pub const ATerm: SentenceBreak = SentenceBreak(1); // name="AT"
1286    pub const Close: SentenceBreak = SentenceBreak(2); // name="CL"
1287    pub const Format: SentenceBreak = SentenceBreak(3); // name="FO"
1288    pub const Lower: SentenceBreak = SentenceBreak(4); // name="LO"
1289    pub const Numeric: SentenceBreak = SentenceBreak(5); // name="NU"
1290    pub const OLetter: SentenceBreak = SentenceBreak(6); // name="LE"
1291    pub const Sep: SentenceBreak = SentenceBreak(7); // name="SE"
1292    pub const Sp: SentenceBreak = SentenceBreak(8); // name="SP"
1293    pub const STerm: SentenceBreak = SentenceBreak(9); // name="ST"
1294    pub const Upper: SentenceBreak = SentenceBreak(10); // name="UP"
1295    pub const CR: SentenceBreak = SentenceBreak(11); // name="CR"
1296    pub const Extend: SentenceBreak = SentenceBreak(12); // name="EX"
1297    pub const LF: SentenceBreak = SentenceBreak(13); // name="LF"
1298    pub const SContinue: SentenceBreak = SentenceBreak(14); // name="SC"
1299}
1300}
1301
1302make_enumerated_property! {
1303    name: "Sentence_Break";
1304    short_name: "SB";
1305    ident: SentenceBreak;
1306    data_marker: crate::provider::PropertyEnumSentenceBreakV1;
1307    singleton: SINGLETON_PROPERTY_ENUM_SENTENCE_BREAK_V1;
1308    ule_ty: u8;
1309}
1310
1311/// Property Canonical_Combining_Class.
1312/// See UAX #15:
1313/// <https://www.unicode.org/reports/tr15/>.
1314///
1315/// See `icu::normalizer::properties::CanonicalCombiningClassMap` for the API
1316/// to look up the Canonical_Combining_Class property by scalar value.
1317///
1318/// **Note:** See `icu::normalizer::CanonicalCombiningClassMap` for the preferred API
1319/// to look up the Canonical_Combining_Class property by scalar value.
1320///
1321/// # Example
1322///
1323/// ```
1324/// use icu::properties::{CodePointMapData, props::CanonicalCombiningClass};
1325///
1326/// assert_eq!(CodePointMapData::<CanonicalCombiningClass>::new().get('a'), CanonicalCombiningClass::NotReordered); // U+0061: LATIN SMALL LETTER A
1327/// assert_eq!(CodePointMapData::<CanonicalCombiningClass>::new().get('\u{0301}'), CanonicalCombiningClass::Above); // U+0301: COMBINING ACUTE ACCENT
1328/// ```
1329//
1330// NOTE: The Pernosco debugger has special knowledge
1331// of this struct. Please do not change the bit layout
1332// or the crate-module-qualified name of this struct
1333// without coordination.
1334#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
1335#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
1336#[cfg_attr(feature = "datagen", derive(databake::Bake))]
1337#[cfg_attr(feature = "datagen", databake(path = icu_properties::props))]
1338#[allow(clippy::exhaustive_structs)] // newtype
1339#[repr(transparent)]
1340pub struct CanonicalCombiningClass(pub(crate) u8);
1341
1342impl CanonicalCombiningClass {
1343    /// Returns an ICU4C `UCanonicalCombiningClass` value.
1344    pub const fn to_icu4c_value(self) -> u8 {
1345        self.0
1346    }
1347    /// Constructor from an ICU4C `UCanonicalCombiningClass` value.
1348    pub const fn from_icu4c_value(value: u8) -> Self {
1349        Self(value)
1350    }
1351}
1352
1353create_const_array! {
1354// These constant names come from PropertyValueAliases.txt
1355#[allow(missing_docs)] // These constants don't need individual documentation.
1356#[allow(non_upper_case_globals)]
1357impl CanonicalCombiningClass {
1358    pub const NotReordered: CanonicalCombiningClass = CanonicalCombiningClass(0); // name="NR"
1359    pub const Overlay: CanonicalCombiningClass = CanonicalCombiningClass(1); // name="OV"
1360    pub const HanReading: CanonicalCombiningClass = CanonicalCombiningClass(6); // name="HANR"
1361    pub const Nukta: CanonicalCombiningClass = CanonicalCombiningClass(7); // name="NK"
1362    pub const KanaVoicing: CanonicalCombiningClass = CanonicalCombiningClass(8); // name="KV"
1363    pub const Virama: CanonicalCombiningClass = CanonicalCombiningClass(9); // name="VR"
1364    pub const CCC10: CanonicalCombiningClass = CanonicalCombiningClass(10); // name="CCC10"
1365    pub const CCC11: CanonicalCombiningClass = CanonicalCombiningClass(11); // name="CCC11"
1366    pub const CCC12: CanonicalCombiningClass = CanonicalCombiningClass(12); // name="CCC12"
1367    pub const CCC13: CanonicalCombiningClass = CanonicalCombiningClass(13); // name="CCC13"
1368    pub const CCC14: CanonicalCombiningClass = CanonicalCombiningClass(14); // name="CCC14"
1369    pub const CCC15: CanonicalCombiningClass = CanonicalCombiningClass(15); // name="CCC15"
1370    pub const CCC16: CanonicalCombiningClass = CanonicalCombiningClass(16); // name="CCC16"
1371    pub const CCC17: CanonicalCombiningClass = CanonicalCombiningClass(17); // name="CCC17"
1372    pub const CCC18: CanonicalCombiningClass = CanonicalCombiningClass(18); // name="CCC18"
1373    pub const CCC19: CanonicalCombiningClass = CanonicalCombiningClass(19); // name="CCC19"
1374    pub const CCC20: CanonicalCombiningClass = CanonicalCombiningClass(20); // name="CCC20"
1375    pub const CCC21: CanonicalCombiningClass = CanonicalCombiningClass(21); // name="CCC21"
1376    pub const CCC22: CanonicalCombiningClass = CanonicalCombiningClass(22); // name="CCC22"
1377    pub const CCC23: CanonicalCombiningClass = CanonicalCombiningClass(23); // name="CCC23"
1378    pub const CCC24: CanonicalCombiningClass = CanonicalCombiningClass(24); // name="CCC24"
1379    pub const CCC25: CanonicalCombiningClass = CanonicalCombiningClass(25); // name="CCC25"
1380    pub const CCC26: CanonicalCombiningClass = CanonicalCombiningClass(26); // name="CCC26"
1381    pub const CCC27: CanonicalCombiningClass = CanonicalCombiningClass(27); // name="CCC27"
1382    pub const CCC28: CanonicalCombiningClass = CanonicalCombiningClass(28); // name="CCC28"
1383    pub const CCC29: CanonicalCombiningClass = CanonicalCombiningClass(29); // name="CCC29"
1384    pub const CCC30: CanonicalCombiningClass = CanonicalCombiningClass(30); // name="CCC30"
1385    pub const CCC31: CanonicalCombiningClass = CanonicalCombiningClass(31); // name="CCC31"
1386    pub const CCC32: CanonicalCombiningClass = CanonicalCombiningClass(32); // name="CCC32"
1387    pub const CCC33: CanonicalCombiningClass = CanonicalCombiningClass(33); // name="CCC33"
1388    pub const CCC34: CanonicalCombiningClass = CanonicalCombiningClass(34); // name="CCC34"
1389    pub const CCC35: CanonicalCombiningClass = CanonicalCombiningClass(35); // name="CCC35"
1390    pub const CCC36: CanonicalCombiningClass = CanonicalCombiningClass(36); // name="CCC36"
1391    pub const CCC84: CanonicalCombiningClass = CanonicalCombiningClass(84); // name="CCC84"
1392    pub const CCC91: CanonicalCombiningClass = CanonicalCombiningClass(91); // name="CCC91"
1393    pub const CCC103: CanonicalCombiningClass = CanonicalCombiningClass(103); // name="CCC103"
1394    pub const CCC107: CanonicalCombiningClass = CanonicalCombiningClass(107); // name="CCC107"
1395    pub const CCC118: CanonicalCombiningClass = CanonicalCombiningClass(118); // name="CCC118"
1396    pub const CCC122: CanonicalCombiningClass = CanonicalCombiningClass(122); // name="CCC122"
1397    pub const CCC129: CanonicalCombiningClass = CanonicalCombiningClass(129); // name="CCC129"
1398    pub const CCC130: CanonicalCombiningClass = CanonicalCombiningClass(130); // name="CCC130"
1399    pub const CCC132: CanonicalCombiningClass = CanonicalCombiningClass(132); // name="CCC132"
1400    pub const CCC133: CanonicalCombiningClass = CanonicalCombiningClass(133); // name="CCC133" // RESERVED
1401    pub const AttachedBelowLeft: CanonicalCombiningClass = CanonicalCombiningClass(200); // name="ATBL"
1402    pub const AttachedBelow: CanonicalCombiningClass = CanonicalCombiningClass(202); // name="ATB"
1403    pub const AttachedAbove: CanonicalCombiningClass = CanonicalCombiningClass(214); // name="ATA"
1404    pub const AttachedAboveRight: CanonicalCombiningClass = CanonicalCombiningClass(216); // name="ATAR"
1405    pub const BelowLeft: CanonicalCombiningClass = CanonicalCombiningClass(218); // name="BL"
1406    pub const Below: CanonicalCombiningClass = CanonicalCombiningClass(220); // name="B"
1407    pub const BelowRight: CanonicalCombiningClass = CanonicalCombiningClass(222); // name="BR"
1408    pub const Left: CanonicalCombiningClass = CanonicalCombiningClass(224); // name="L"
1409    pub const Right: CanonicalCombiningClass = CanonicalCombiningClass(226); // name="R"
1410    pub const AboveLeft: CanonicalCombiningClass = CanonicalCombiningClass(228); // name="AL"
1411    pub const Above: CanonicalCombiningClass = CanonicalCombiningClass(230); // name="A"
1412    pub const AboveRight: CanonicalCombiningClass = CanonicalCombiningClass(232); // name="AR"
1413    pub const DoubleBelow: CanonicalCombiningClass = CanonicalCombiningClass(233); // name="DB"
1414    pub const DoubleAbove: CanonicalCombiningClass = CanonicalCombiningClass(234); // name="DA"
1415    pub const IotaSubscript: CanonicalCombiningClass = CanonicalCombiningClass(240); // name="IS"
1416}
1417}
1418
1419make_enumerated_property! {
1420    name: "Canonical_Combining_Class";
1421    short_name: "ccc";
1422    ident: CanonicalCombiningClass;
1423    data_marker: crate::provider::PropertyEnumCanonicalCombiningClassV1;
1424    singleton: SINGLETON_PROPERTY_ENUM_CANONICAL_COMBINING_CLASS_V1;
1425    ule_ty: u8;
1426}
1427
1428/// Property Indic_Syllabic_Category.
1429/// See UAX #44:
1430/// <https://www.unicode.org/reports/tr44/#Indic_Syllabic_Category>.
1431///
1432/// # Example
1433///
1434/// ```
1435/// use icu::properties::{CodePointMapData, props::IndicSyllabicCategory};
1436///
1437/// assert_eq!(CodePointMapData::<IndicSyllabicCategory>::new().get('a'), IndicSyllabicCategory::Other);
1438/// assert_eq!(CodePointMapData::<IndicSyllabicCategory>::new().get('\u{0900}'), IndicSyllabicCategory::Bindu); // U+0900: DEVANAGARI SIGN INVERTED CANDRABINDU
1439/// ```
1440#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
1441#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
1442#[cfg_attr(feature = "datagen", derive(databake::Bake))]
1443#[cfg_attr(feature = "datagen", databake(path = icu_properties::props))]
1444#[allow(clippy::exhaustive_structs)] // newtype
1445#[repr(transparent)]
1446pub struct IndicSyllabicCategory(pub(crate) u8);
1447
1448impl IndicSyllabicCategory {
1449    /// Returns an ICU4C `UIndicSyllabicCategory` value.
1450    pub const fn to_icu4c_value(self) -> u8 {
1451        self.0
1452    }
1453    /// Constructor from an ICU4C `UIndicSyllabicCategory` value.
1454    pub const fn from_icu4c_value(value: u8) -> Self {
1455        Self(value)
1456    }
1457}
1458
1459create_const_array! {
1460#[allow(missing_docs)] // These constants don't need individual documentation.
1461#[allow(non_upper_case_globals)]
1462impl IndicSyllabicCategory {
1463    pub const Other: IndicSyllabicCategory = IndicSyllabicCategory(0);
1464    pub const Avagraha: IndicSyllabicCategory = IndicSyllabicCategory(1);
1465    pub const Bindu: IndicSyllabicCategory = IndicSyllabicCategory(2);
1466    pub const BrahmiJoiningNumber: IndicSyllabicCategory = IndicSyllabicCategory(3);
1467    pub const CantillationMark: IndicSyllabicCategory = IndicSyllabicCategory(4);
1468    pub const Consonant: IndicSyllabicCategory = IndicSyllabicCategory(5);
1469    pub const ConsonantDead: IndicSyllabicCategory = IndicSyllabicCategory(6);
1470    pub const ConsonantFinal: IndicSyllabicCategory = IndicSyllabicCategory(7);
1471    pub const ConsonantHeadLetter: IndicSyllabicCategory = IndicSyllabicCategory(8);
1472    pub const ConsonantInitialPostfixed: IndicSyllabicCategory = IndicSyllabicCategory(9);
1473    pub const ConsonantKiller: IndicSyllabicCategory = IndicSyllabicCategory(10);
1474    pub const ConsonantMedial: IndicSyllabicCategory = IndicSyllabicCategory(11);
1475    pub const ConsonantPlaceholder: IndicSyllabicCategory = IndicSyllabicCategory(12);
1476    pub const ConsonantPrecedingRepha: IndicSyllabicCategory = IndicSyllabicCategory(13);
1477    pub const ConsonantPrefixed: IndicSyllabicCategory = IndicSyllabicCategory(14);
1478    pub const ConsonantSucceedingRepha: IndicSyllabicCategory = IndicSyllabicCategory(15);
1479    pub const ConsonantSubjoined: IndicSyllabicCategory = IndicSyllabicCategory(16);
1480    pub const ConsonantWithStacker: IndicSyllabicCategory = IndicSyllabicCategory(17);
1481    pub const GeminationMark: IndicSyllabicCategory = IndicSyllabicCategory(18);
1482    pub const InvisibleStacker: IndicSyllabicCategory = IndicSyllabicCategory(19);
1483    pub const Joiner: IndicSyllabicCategory = IndicSyllabicCategory(20);
1484    pub const ModifyingLetter: IndicSyllabicCategory = IndicSyllabicCategory(21);
1485    pub const NonJoiner: IndicSyllabicCategory = IndicSyllabicCategory(22);
1486    pub const Nukta: IndicSyllabicCategory = IndicSyllabicCategory(23);
1487    pub const Number: IndicSyllabicCategory = IndicSyllabicCategory(24);
1488    pub const NumberJoiner: IndicSyllabicCategory = IndicSyllabicCategory(25);
1489    pub const PureKiller: IndicSyllabicCategory = IndicSyllabicCategory(26);
1490    pub const RegisterShifter: IndicSyllabicCategory = IndicSyllabicCategory(27);
1491    pub const SyllableModifier: IndicSyllabicCategory = IndicSyllabicCategory(28);
1492    pub const ToneLetter: IndicSyllabicCategory = IndicSyllabicCategory(29);
1493    pub const ToneMark: IndicSyllabicCategory = IndicSyllabicCategory(30);
1494    pub const Virama: IndicSyllabicCategory = IndicSyllabicCategory(31);
1495    pub const Visarga: IndicSyllabicCategory = IndicSyllabicCategory(32);
1496    pub const Vowel: IndicSyllabicCategory = IndicSyllabicCategory(33);
1497    pub const VowelDependent: IndicSyllabicCategory = IndicSyllabicCategory(34);
1498    pub const VowelIndependent: IndicSyllabicCategory = IndicSyllabicCategory(35);
1499    pub const ReorderingKiller: IndicSyllabicCategory = IndicSyllabicCategory(36);
1500}
1501}
1502
1503make_enumerated_property! {
1504    name: "Indic_Syllabic_Category";
1505    short_name: "InSC";
1506    ident: IndicSyllabicCategory;
1507    data_marker: crate::provider::PropertyEnumIndicSyllabicCategoryV1;
1508    singleton: SINGLETON_PROPERTY_ENUM_INDIC_SYLLABIC_CATEGORY_V1;
1509    ule_ty: u8;
1510}
1511
1512/// Enumerated property Joining_Type.
1513///
1514/// See Section 9.2, Arabic Cursive Joining in The Unicode Standard for the summary of
1515/// each property value.
1516///
1517/// # Example
1518///
1519/// ```
1520/// use icu::properties::{CodePointMapData, props::JoiningType};
1521///
1522/// assert_eq!(CodePointMapData::<JoiningType>::new().get('ؠ'), JoiningType::DualJoining); // U+0620: Arabic Letter Kashmiri Yeh
1523/// assert_eq!(CodePointMapData::<JoiningType>::new().get('𐫍'), JoiningType::LeftJoining); // U+10ACD: Manichaean Letter Heth
1524/// ```
1525#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
1526#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
1527#[cfg_attr(feature = "datagen", derive(databake::Bake))]
1528#[cfg_attr(feature = "datagen", databake(path = icu_properties::props))]
1529#[allow(clippy::exhaustive_structs)] // newtype
1530#[repr(transparent)]
1531pub struct JoiningType(pub(crate) u8);
1532
1533impl JoiningType {
1534    /// Returns an ICU4C `UJoiningType` value.
1535    pub const fn to_icu4c_value(self) -> u8 {
1536        self.0
1537    }
1538    /// Constructor from an ICU4C `UJoiningType` value.
1539    pub const fn from_icu4c_value(value: u8) -> Self {
1540        Self(value)
1541    }
1542}
1543
1544create_const_array! {
1545#[allow(missing_docs)] // These constants don't need individual documentation.
1546#[allow(non_upper_case_globals)]
1547impl JoiningType {
1548    pub const NonJoining: JoiningType = JoiningType(0); // name="U"
1549    pub const JoinCausing: JoiningType = JoiningType(1); // name="C"
1550    pub const DualJoining: JoiningType = JoiningType(2); // name="D"
1551    pub const LeftJoining: JoiningType = JoiningType(3); // name="L"
1552    pub const RightJoining: JoiningType = JoiningType(4); // name="R"
1553    pub const Transparent: JoiningType = JoiningType(5); // name="T"
1554}
1555}
1556
1557make_enumerated_property! {
1558    name: "Joining_Type";
1559    short_name: "jt";
1560    ident: JoiningType;
1561    data_marker: crate::provider::PropertyEnumJoiningTypeV1;
1562    singleton: SINGLETON_PROPERTY_ENUM_JOINING_TYPE_V1;
1563    ule_ty: u8;
1564}
1565
1566/// Property Vertical_Orientation
1567///
1568/// See UTR #50:
1569/// <https://www.unicode.org/reports/tr50/#vo>
1570///
1571/// # Example
1572///
1573/// ```
1574/// use icu::properties::{CodePointMapData, props::VerticalOrientation};
1575///
1576/// assert_eq!(CodePointMapData::<VerticalOrientation>::new().get('a'), VerticalOrientation::Rotated);
1577/// assert_eq!(CodePointMapData::<VerticalOrientation>::new().get('§'), VerticalOrientation::Upright);
1578/// assert_eq!(CodePointMapData::<VerticalOrientation>::new().get32(0x2329), VerticalOrientation::TransformedRotated);
1579/// assert_eq!(CodePointMapData::<VerticalOrientation>::new().get32(0x3001), VerticalOrientation::TransformedUpright);
1580/// ```
1581#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
1582#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
1583#[cfg_attr(feature = "datagen", derive(databake::Bake))]
1584#[cfg_attr(feature = "datagen", databake(path = icu_properties::props))]
1585#[allow(clippy::exhaustive_structs)] // newtype
1586#[repr(transparent)]
1587pub struct VerticalOrientation(pub(crate) u8);
1588
1589impl VerticalOrientation {
1590    /// Returns an ICU4C `UVerticalOrientation` value.
1591    pub const fn to_icu4c_value(self) -> u8 {
1592        self.0
1593    }
1594    /// Constructor from an ICU4C `UVerticalOrientation` value.
1595    pub const fn from_icu4c_value(value: u8) -> Self {
1596        Self(value)
1597    }
1598}
1599
1600create_const_array! {
1601#[allow(missing_docs)] // These constants don't need individual documentation.
1602#[allow(non_upper_case_globals)]
1603impl VerticalOrientation {
1604    pub const Rotated: VerticalOrientation = VerticalOrientation(0); // name="R"
1605    pub const TransformedRotated: VerticalOrientation = VerticalOrientation(1); // name="Tr"
1606    pub const TransformedUpright: VerticalOrientation = VerticalOrientation(2); // name="Tu"
1607    pub const Upright: VerticalOrientation = VerticalOrientation(3); // name="U"
1608}
1609}
1610
1611make_enumerated_property! {
1612    name: "Vertical_Orientation";
1613    short_name: "vo";
1614    ident: VerticalOrientation;
1615    data_marker: crate::provider::PropertyEnumVerticalOrientationV1;
1616    singleton: SINGLETON_PROPERTY_ENUM_VERTICAL_ORIENTATION_V1;
1617    ule_ty: u8;
1618}
1619
1620pub use crate::code_point_set::BinaryProperty;
1621
1622macro_rules! make_binary_property {
1623    (
1624        name: $name:literal;
1625        short_name: $short_name:literal;
1626        ident: $ident:ident;
1627        data_marker: $data_marker:ty;
1628        singleton: $singleton:ident;
1629            $(#[$doc:meta])+
1630    ) => {
1631        $(#[$doc])+
1632        #[derive(Debug)]
1633        #[non_exhaustive]
1634        pub struct $ident;
1635
1636        impl crate::private::Sealed for $ident {}
1637
1638        impl BinaryProperty for $ident {
1639        type DataMarker = $data_marker;
1640            #[cfg(feature = "compiled_data")]
1641            const SINGLETON: &'static crate::provider::PropertyCodePointSet<'static> =
1642                &crate::provider::Baked::$singleton;
1643            const NAME: &'static [u8] = $name.as_bytes();
1644            const SHORT_NAME: &'static [u8] = $short_name.as_bytes();
1645        }
1646    };
1647}
1648
1649make_binary_property! {
1650    name: "ASCII_Hex_Digit";
1651    short_name: "AHex";
1652    ident: AsciiHexDigit;
1653    data_marker: crate::provider::PropertyBinaryAsciiHexDigitV1;
1654    singleton: SINGLETON_PROPERTY_BINARY_ASCII_HEX_DIGIT_V1;
1655    /// ASCII characters commonly used for the representation of hexadecimal numbers.
1656    ///
1657    /// # Example
1658    ///
1659    /// ```
1660    /// use icu::properties::CodePointSetData;
1661    /// use icu::properties::props::AsciiHexDigit;
1662    ///
1663    /// let ascii_hex_digit = CodePointSetData::new::<AsciiHexDigit>();
1664    ///
1665    /// assert!(ascii_hex_digit.contains('3'));
1666    /// assert!(!ascii_hex_digit.contains('੩'));  // U+0A69 GURMUKHI DIGIT THREE
1667    /// assert!(ascii_hex_digit.contains('A'));
1668    /// assert!(!ascii_hex_digit.contains('Ä'));  // U+00C4 LATIN CAPITAL LETTER A WITH DIAERESIS
1669    /// ```
1670}
1671
1672make_binary_property! {
1673    name: "Alnum";
1674    short_name: "Alnum";
1675    ident: Alnum;
1676    data_marker: crate::provider::PropertyBinaryAlnumV1;
1677    singleton: SINGLETON_PROPERTY_BINARY_ALNUM_V1;
1678    /// Characters with the `Alphabetic` or `Decimal_Number` property.
1679    ///
1680    /// This is defined for POSIX compatibility.
1681}
1682
1683make_binary_property! {
1684    name: "Alphabetic";
1685    short_name: "Alpha";
1686    ident: Alphabetic;
1687    data_marker: crate::provider::PropertyBinaryAlphabeticV1;
1688    singleton: SINGLETON_PROPERTY_BINARY_ALPHABETIC_V1;
1689    /// Alphabetic characters.
1690    ///
1691    /// # Example
1692    ///
1693    /// ```
1694    /// use icu::properties::CodePointSetData;
1695    /// use icu::properties::props::Alphabetic;
1696    ///
1697    /// let alphabetic = CodePointSetData::new::<Alphabetic>();
1698    ///
1699    /// assert!(!alphabetic.contains('3'));
1700    /// assert!(!alphabetic.contains('੩'));  // U+0A69 GURMUKHI DIGIT THREE
1701    /// assert!(alphabetic.contains('A'));
1702    /// assert!(alphabetic.contains('Ä'));  // U+00C4 LATIN CAPITAL LETTER A WITH DIAERESIS
1703    /// ```
1704
1705}
1706
1707make_binary_property! {
1708    name: "Bidi_Control";
1709    short_name: "Bidi_C";
1710    ident: BidiControl;
1711    data_marker: crate::provider::PropertyBinaryBidiControlV1;
1712    singleton: SINGLETON_PROPERTY_BINARY_BIDI_CONTROL_V1;
1713    /// Format control characters which have specific functions in the Unicode Bidirectional
1714    /// Algorithm.
1715    ///
1716    /// # Example
1717    ///
1718    /// ```
1719    /// use icu::properties::CodePointSetData;
1720    /// use icu::properties::props::BidiControl;
1721    ///
1722    /// let bidi_control = CodePointSetData::new::<BidiControl>();
1723    ///
1724    /// assert!(bidi_control.contains('\u{200F}'));  // RIGHT-TO-LEFT MARK
1725    /// assert!(!bidi_control.contains('ش'));  // U+0634 ARABIC LETTER SHEEN
1726    /// ```
1727
1728}
1729
1730make_binary_property! {
1731    name: "Bidi_Mirrored";
1732    short_name: "Bidi_M";
1733    ident: BidiMirrored;
1734    data_marker: crate::provider::PropertyBinaryBidiMirroredV1;
1735    singleton: SINGLETON_PROPERTY_BINARY_BIDI_MIRRORED_V1;
1736    /// Characters that are mirrored in bidirectional text.
1737    ///
1738    /// # Example
1739    ///
1740    /// ```
1741    /// use icu::properties::CodePointSetData;
1742    /// use icu::properties::props::BidiMirrored;
1743    ///
1744    /// let bidi_mirrored = CodePointSetData::new::<BidiMirrored>();
1745    ///
1746    /// assert!(bidi_mirrored.contains('['));
1747    /// assert!(bidi_mirrored.contains(']'));
1748    /// assert!(bidi_mirrored.contains('∑'));  // U+2211 N-ARY SUMMATION
1749    /// assert!(!bidi_mirrored.contains('ཉ'));  // U+0F49 TIBETAN LETTER NYA
1750    /// ```
1751
1752}
1753
1754make_binary_property! {
1755    name: "Blank";
1756    short_name: "Blank";
1757    ident: Blank;
1758    data_marker: crate::provider::PropertyBinaryBlankV1;
1759    singleton: SINGLETON_PROPERTY_BINARY_BLANK_V1;
1760    /// Horizontal whitespace characters
1761
1762}
1763
1764make_binary_property! {
1765    name: "Cased";
1766    short_name: "Cased";
1767    ident: Cased;
1768    data_marker: crate::provider::PropertyBinaryCasedV1;
1769    singleton: SINGLETON_PROPERTY_BINARY_CASED_V1;
1770    /// Uppercase, lowercase, and titlecase characters.
1771    ///
1772    /// # Example
1773    ///
1774    /// ```
1775    /// use icu::properties::CodePointSetData;
1776    /// use icu::properties::props::Cased;
1777    ///
1778    /// let cased = CodePointSetData::new::<Cased>();
1779    ///
1780    /// assert!(cased.contains('Ꙡ'));  // U+A660 CYRILLIC CAPITAL LETTER REVERSED TSE
1781    /// assert!(!cased.contains('ދ'));  // U+078B THAANA LETTER DHAALU
1782    /// ```
1783
1784}
1785
1786make_binary_property! {
1787    name: "Case_Ignorable";
1788    short_name: "CI";
1789    ident: CaseIgnorable;
1790    data_marker: crate::provider::PropertyBinaryCaseIgnorableV1;
1791    singleton: SINGLETON_PROPERTY_BINARY_CASE_IGNORABLE_V1;
1792    /// Characters which are ignored for casing purposes.
1793    ///
1794    /// # Example
1795    ///
1796    /// ```
1797    /// use icu::properties::CodePointSetData;
1798    /// use icu::properties::props::CaseIgnorable;
1799    ///
1800    /// let case_ignorable = CodePointSetData::new::<CaseIgnorable>();
1801    ///
1802    /// assert!(case_ignorable.contains(':'));
1803    /// assert!(!case_ignorable.contains('λ'));  // U+03BB GREEK SMALL LETTER LAMBDA
1804    /// ```
1805
1806}
1807
1808make_binary_property! {
1809    name: "Full_Composition_Exclusion";
1810    short_name: "Comp_Ex";
1811    ident: FullCompositionExclusion;
1812    data_marker: crate::provider::PropertyBinaryFullCompositionExclusionV1;
1813    singleton: SINGLETON_PROPERTY_BINARY_FULL_COMPOSITION_EXCLUSION_V1;
1814    /// Characters that are excluded from composition.
1815    ///
1816    /// See <https://unicode.org/Public/UNIDATA/CompositionExclusions.txt>
1817
1818}
1819
1820make_binary_property! {
1821    name: "Changes_When_Casefolded";
1822    short_name: "CWCF";
1823    ident: ChangesWhenCasefolded;
1824    data_marker: crate::provider::PropertyBinaryChangesWhenCasefoldedV1;
1825    singleton: SINGLETON_PROPERTY_BINARY_CHANGES_WHEN_CASEFOLDED_V1;
1826    /// Characters whose normalized forms are not stable under case folding.
1827    ///
1828    /// # Example
1829    ///
1830    /// ```
1831    /// use icu::properties::CodePointSetData;
1832    /// use icu::properties::props::ChangesWhenCasefolded;
1833    ///
1834    /// let changes_when_casefolded = CodePointSetData::new::<ChangesWhenCasefolded>();
1835    ///
1836    /// assert!(changes_when_casefolded.contains('ß'));  // U+00DF LATIN SMALL LETTER SHARP S
1837    /// assert!(!changes_when_casefolded.contains('ᜉ'));  // U+1709 TAGALOG LETTER PA
1838    /// ```
1839
1840}
1841
1842make_binary_property! {
1843    name: "Changes_When_Casemapped";
1844    short_name: "CWCM";
1845    ident: ChangesWhenCasemapped;
1846    data_marker: crate::provider::PropertyBinaryChangesWhenCasemappedV1;
1847    singleton: SINGLETON_PROPERTY_BINARY_CHANGES_WHEN_CASEMAPPED_V1;
1848    /// Characters which may change when they undergo case mapping.
1849
1850}
1851
1852make_binary_property! {
1853    name: "Changes_When_NFKC_Casefolded";
1854    short_name: "CWKCF";
1855    ident: ChangesWhenNfkcCasefolded;
1856    data_marker: crate::provider::PropertyBinaryChangesWhenNfkcCasefoldedV1;
1857    singleton: SINGLETON_PROPERTY_BINARY_CHANGES_WHEN_NFKC_CASEFOLDED_V1;
1858    /// Characters which are not identical to their `NFKC_Casefold` mapping.
1859    ///
1860    /// # Example
1861    ///
1862    /// ```
1863    /// use icu::properties::CodePointSetData;
1864    /// use icu::properties::props::ChangesWhenNfkcCasefolded;
1865    ///
1866    /// let changes_when_nfkc_casefolded = CodePointSetData::new::<ChangesWhenNfkcCasefolded>();
1867    ///
1868    /// assert!(changes_when_nfkc_casefolded.contains('🄵'));  // U+1F135 SQUARED LATIN CAPITAL LETTER F
1869    /// assert!(!changes_when_nfkc_casefolded.contains('f'));
1870    /// ```
1871
1872}
1873
1874make_binary_property! {
1875    name: "Changes_When_Lowercased";
1876    short_name: "CWL";
1877    ident: ChangesWhenLowercased;
1878    data_marker: crate::provider::PropertyBinaryChangesWhenLowercasedV1;
1879    singleton: SINGLETON_PROPERTY_BINARY_CHANGES_WHEN_LOWERCASED_V1;
1880    /// Characters whose normalized forms are not stable under a `toLowercase` mapping.
1881    ///
1882    /// # Example
1883    ///
1884    /// ```
1885    /// use icu::properties::CodePointSetData;
1886    /// use icu::properties::props::ChangesWhenLowercased;
1887    ///
1888    /// let changes_when_lowercased = CodePointSetData::new::<ChangesWhenLowercased>();
1889    ///
1890    /// assert!(changes_when_lowercased.contains('Ⴔ'));  // U+10B4 GEORGIAN CAPITAL LETTER PHAR
1891    /// assert!(!changes_when_lowercased.contains('ფ'));  // U+10E4 GEORGIAN LETTER PHAR
1892    /// ```
1893
1894}
1895
1896make_binary_property! {
1897    name: "Changes_When_Titlecased";
1898    short_name: "CWT";
1899    ident: ChangesWhenTitlecased;
1900    data_marker: crate::provider::PropertyBinaryChangesWhenTitlecasedV1;
1901    singleton: SINGLETON_PROPERTY_BINARY_CHANGES_WHEN_TITLECASED_V1;
1902    /// Characters whose normalized forms are not stable under a `toTitlecase` mapping.
1903    ///
1904    /// # Example
1905    ///
1906    /// ```
1907    /// use icu::properties::CodePointSetData;
1908    /// use icu::properties::props::ChangesWhenTitlecased;
1909    ///
1910    /// let changes_when_titlecased = CodePointSetData::new::<ChangesWhenTitlecased>();
1911    ///
1912    /// assert!(changes_when_titlecased.contains('æ'));  // U+00E6 LATIN SMALL LETTER AE
1913    /// assert!(!changes_when_titlecased.contains('Æ'));  // U+00E6 LATIN CAPITAL LETTER AE
1914    /// ```
1915
1916}
1917
1918make_binary_property! {
1919    name: "Changes_When_Uppercased";
1920    short_name: "CWU";
1921    ident: ChangesWhenUppercased;
1922    data_marker: crate::provider::PropertyBinaryChangesWhenUppercasedV1;
1923    singleton: SINGLETON_PROPERTY_BINARY_CHANGES_WHEN_UPPERCASED_V1;
1924    /// Characters whose normalized forms are not stable under a `toUppercase` mapping.
1925    ///
1926    /// # Example
1927    ///
1928    /// ```
1929    /// use icu::properties::CodePointSetData;
1930    /// use icu::properties::props::ChangesWhenUppercased;
1931    ///
1932    /// let changes_when_uppercased = CodePointSetData::new::<ChangesWhenUppercased>();
1933    ///
1934    /// assert!(changes_when_uppercased.contains('ւ'));  // U+0582 ARMENIAN SMALL LETTER YIWN
1935    /// assert!(!changes_when_uppercased.contains('Ւ'));  // U+0552 ARMENIAN CAPITAL LETTER YIWN
1936    /// ```
1937
1938}
1939
1940make_binary_property! {
1941    name: "Dash";
1942    short_name: "Dash";
1943    ident: Dash;
1944    data_marker: crate::provider::PropertyBinaryDashV1;
1945    singleton: SINGLETON_PROPERTY_BINARY_DASH_V1;
1946    /// Punctuation characters explicitly called out as dashes in the Unicode Standard, plus
1947    /// their compatibility equivalents.
1948    ///
1949    /// # Example
1950    ///
1951    /// ```
1952    /// use icu::properties::CodePointSetData;
1953    /// use icu::properties::props::Dash;
1954    ///
1955    /// let dash = CodePointSetData::new::<Dash>();
1956    ///
1957    /// assert!(dash.contains('⸺'));  // U+2E3A TWO-EM DASH
1958    /// assert!(dash.contains('-'));  // U+002D
1959    /// assert!(!dash.contains('='));  // U+003D
1960    /// ```
1961
1962}
1963
1964make_binary_property! {
1965    name: "Deprecated";
1966    short_name: "Dep";
1967    ident: Deprecated;
1968    data_marker: crate::provider::PropertyBinaryDeprecatedV1;
1969    singleton: SINGLETON_PROPERTY_BINARY_DEPRECATED_V1;
1970    /// Deprecated characters.
1971    ///
1972    /// No characters will ever be removed from the standard, but the
1973    /// usage of deprecated characters is strongly discouraged.
1974    ///
1975    /// # Example
1976    ///
1977    /// ```
1978    /// use icu::properties::CodePointSetData;
1979    /// use icu::properties::props::Deprecated;
1980    ///
1981    /// let deprecated = CodePointSetData::new::<Deprecated>();
1982    ///
1983    /// assert!(deprecated.contains('ឣ'));  // U+17A3 KHMER INDEPENDENT VOWEL QAQ
1984    /// assert!(!deprecated.contains('A'));
1985    /// ```
1986
1987}
1988
1989make_binary_property! {
1990    name: "Default_Ignorable_Code_Point";
1991    short_name: "DI";
1992    ident: DefaultIgnorableCodePoint;
1993    data_marker: crate::provider::PropertyBinaryDefaultIgnorableCodePointV1;
1994    singleton: SINGLETON_PROPERTY_BINARY_DEFAULT_IGNORABLE_CODE_POINT_V1;
1995    /// For programmatic determination of default ignorable code points.
1996    ///
1997    /// New characters that
1998    /// should be ignored in rendering (unless explicitly supported) will be assigned in these
1999    /// ranges, permitting programs to correctly handle the default rendering of such
2000    /// characters when not otherwise supported.
2001    ///
2002    /// # Example
2003    ///
2004    /// ```
2005    /// use icu::properties::CodePointSetData;
2006    /// use icu::properties::props::DefaultIgnorableCodePoint;
2007    ///
2008    /// let default_ignorable_code_point = CodePointSetData::new::<DefaultIgnorableCodePoint>();
2009    ///
2010    /// assert!(default_ignorable_code_point.contains('\u{180B}'));  // MONGOLIAN FREE VARIATION SELECTOR ONE
2011    /// assert!(!default_ignorable_code_point.contains('E'));
2012    /// ```
2013
2014}
2015
2016make_binary_property! {
2017    name: "Diacritic";
2018    short_name: "Dia";
2019    ident: Diacritic;
2020    data_marker: crate::provider::PropertyBinaryDiacriticV1;
2021    singleton: SINGLETON_PROPERTY_BINARY_DIACRITIC_V1;
2022    /// Characters that linguistically modify the meaning of another character to which they apply.
2023    ///
2024    /// # Example
2025    ///
2026    /// ```
2027    /// use icu::properties::CodePointSetData;
2028    /// use icu::properties::props::Diacritic;
2029    ///
2030    /// let diacritic = CodePointSetData::new::<Diacritic>();
2031    ///
2032    /// assert!(diacritic.contains('\u{05B3}'));  // HEBREW POINT HATAF QAMATS
2033    /// assert!(!diacritic.contains('א'));  // U+05D0 HEBREW LETTER ALEF
2034    /// ```
2035
2036}
2037
2038make_binary_property! {
2039    name: "Emoji_Modifier_Base";
2040    short_name: "EBase";
2041    ident: EmojiModifierBase;
2042    data_marker: crate::provider::PropertyBinaryEmojiModifierBaseV1;
2043    singleton: SINGLETON_PROPERTY_BINARY_EMOJI_MODIFIER_BASE_V1;
2044    /// Characters that can serve as a base for emoji modifiers.
2045    ///
2046    /// # Example
2047    ///
2048    /// ```
2049    /// use icu::properties::CodePointSetData;
2050    /// use icu::properties::props::EmojiModifierBase;
2051    ///
2052    /// let emoji_modifier_base = CodePointSetData::new::<EmojiModifierBase>();
2053    ///
2054    /// assert!(emoji_modifier_base.contains('✊'));  // U+270A RAISED FIST
2055    /// assert!(!emoji_modifier_base.contains('⛰'));  // U+26F0 MOUNTAIN
2056    /// ```
2057
2058}
2059
2060make_binary_property! {
2061    name: "Emoji_Component";
2062    short_name: "EComp";
2063    ident: EmojiComponent;
2064    data_marker: crate::provider::PropertyBinaryEmojiComponentV1;
2065    singleton: SINGLETON_PROPERTY_BINARY_EMOJI_COMPONENT_V1;
2066    /// Characters used in emoji sequences that normally do not appear on emoji keyboards as
2067    /// separate choices, such as base characters for emoji keycaps.
2068    ///
2069    /// # Example
2070    ///
2071    /// ```
2072    /// use icu::properties::CodePointSetData;
2073    /// use icu::properties::props::EmojiComponent;
2074    ///
2075    /// let emoji_component = CodePointSetData::new::<EmojiComponent>();
2076    ///
2077    /// assert!(emoji_component.contains('🇹'));  // U+1F1F9 REGIONAL INDICATOR SYMBOL LETTER T
2078    /// assert!(emoji_component.contains('\u{20E3}'));  // COMBINING ENCLOSING KEYCAP
2079    /// assert!(emoji_component.contains('7'));
2080    /// assert!(!emoji_component.contains('T'));
2081    /// ```
2082
2083}
2084
2085make_binary_property! {
2086    name: "Emoji_Modifier";
2087    short_name: "EMod";
2088    ident: EmojiModifier;
2089    data_marker: crate::provider::PropertyBinaryEmojiModifierV1;
2090    singleton: SINGLETON_PROPERTY_BINARY_EMOJI_MODIFIER_V1;
2091    /// Characters that are emoji modifiers.
2092    ///
2093    /// # Example
2094    ///
2095    /// ```
2096    /// use icu::properties::CodePointSetData;
2097    /// use icu::properties::props::EmojiModifier;
2098    ///
2099    /// let emoji_modifier = CodePointSetData::new::<EmojiModifier>();
2100    ///
2101    /// assert!(emoji_modifier.contains('\u{1F3FD}'));  // EMOJI MODIFIER FITZPATRICK TYPE-4
2102    /// assert!(!emoji_modifier.contains('\u{200C}'));  // ZERO WIDTH NON-JOINER
2103    /// ```
2104
2105}
2106
2107make_binary_property! {
2108    name: "Emoji";
2109    short_name: "Emoji";
2110    ident: Emoji;
2111    data_marker: crate::provider::PropertyBinaryEmojiV1;
2112    singleton: SINGLETON_PROPERTY_BINARY_EMOJI_V1;
2113    /// Characters that are emoji.
2114    ///
2115    /// # Example
2116    ///
2117    /// ```
2118    /// use icu::properties::CodePointSetData;
2119    /// use icu::properties::props::Emoji;
2120    ///
2121    /// let emoji = CodePointSetData::new::<Emoji>();
2122    ///
2123    /// assert!(emoji.contains('🔥'));  // U+1F525 FIRE
2124    /// assert!(!emoji.contains('V'));
2125    /// ```
2126
2127}
2128
2129make_binary_property! {
2130    name: "Emoji_Presentation";
2131    short_name: "EPres";
2132    ident: EmojiPresentation;
2133    data_marker: crate::provider::PropertyBinaryEmojiPresentationV1;
2134    singleton: SINGLETON_PROPERTY_BINARY_EMOJI_PRESENTATION_V1;
2135    /// Characters that have emoji presentation by default.
2136    ///
2137    /// # Example
2138    ///
2139    /// ```
2140    /// use icu::properties::CodePointSetData;
2141    /// use icu::properties::props::EmojiPresentation;
2142    ///
2143    /// let emoji_presentation = CodePointSetData::new::<EmojiPresentation>();
2144    ///
2145    /// assert!(emoji_presentation.contains('🦬')); // U+1F9AC BISON
2146    /// assert!(!emoji_presentation.contains('♻'));  // U+267B BLACK UNIVERSAL RECYCLING SYMBOL
2147    /// ```
2148
2149}
2150
2151make_binary_property! {
2152    name: "Extender";
2153    short_name: "Ext";
2154    ident: Extender;
2155    data_marker: crate::provider::PropertyBinaryExtenderV1;
2156    singleton: SINGLETON_PROPERTY_BINARY_EXTENDER_V1;
2157    /// Characters whose principal function is to extend the value of a preceding alphabetic
2158    /// character or to extend the shape of adjacent characters.
2159    ///
2160    /// # Example
2161    ///
2162    /// ```
2163    /// use icu::properties::CodePointSetData;
2164    /// use icu::properties::props::Extender;
2165    ///
2166    /// let extender = CodePointSetData::new::<Extender>();
2167    ///
2168    /// assert!(extender.contains('ヾ'));  // U+30FE KATAKANA VOICED ITERATION MARK
2169    /// assert!(extender.contains('ー'));  // U+30FC KATAKANA-HIRAGANA PROLONGED SOUND MARK
2170    /// assert!(!extender.contains('・'));  // U+30FB KATAKANA MIDDLE DOT
2171    /// ```
2172
2173}
2174
2175make_binary_property! {
2176    name: "Extended_Pictographic";
2177    short_name: "ExtPict";
2178    ident: ExtendedPictographic;
2179    data_marker: crate::provider::PropertyBinaryExtendedPictographicV1;
2180    singleton: SINGLETON_PROPERTY_BINARY_EXTENDED_PICTOGRAPHIC_V1;
2181    /// Pictographic symbols, as well as reserved ranges in blocks largely associated with
2182    /// emoji characters
2183    ///
2184    /// # Example
2185    ///
2186    /// ```
2187    /// use icu::properties::CodePointSetData;
2188    /// use icu::properties::props::ExtendedPictographic;
2189    ///
2190    /// let extended_pictographic = CodePointSetData::new::<ExtendedPictographic>();
2191    ///
2192    /// assert!(extended_pictographic.contains('🥳')); // U+1F973 FACE WITH PARTY HORN AND PARTY HAT
2193    /// assert!(!extended_pictographic.contains('🇪'));  // U+1F1EA REGIONAL INDICATOR SYMBOL LETTER E
2194    /// ```
2195
2196}
2197
2198make_binary_property! {
2199    name: "Graph";
2200    short_name: "Graph";
2201    ident: Graph;
2202    data_marker: crate::provider::PropertyBinaryGraphV1;
2203    singleton: SINGLETON_PROPERTY_BINARY_GRAPH_V1;
2204    /// Invisible characters.
2205    ///
2206    /// This is defined for POSIX compatibility.
2207
2208}
2209
2210make_binary_property! {
2211    name: "Grapheme_Base";
2212    short_name: "Gr_Base";
2213    ident: GraphemeBase;
2214    data_marker: crate::provider::PropertyBinaryGraphemeBaseV1;
2215    singleton: SINGLETON_PROPERTY_BINARY_GRAPHEME_BASE_V1;
2216    /// Property used together with the definition of Standard Korean Syllable Block to define
2217    /// "Grapheme base".
2218    ///
2219    /// See D58 in Chapter 3, Conformance in the Unicode Standard.
2220    ///
2221    /// # Example
2222    ///
2223    /// ```
2224    /// use icu::properties::CodePointSetData;
2225    /// use icu::properties::props::GraphemeBase;
2226    ///
2227    /// let grapheme_base = CodePointSetData::new::<GraphemeBase>();
2228    ///
2229    /// assert!(grapheme_base.contains('ക'));  // U+0D15 MALAYALAM LETTER KA
2230    /// assert!(grapheme_base.contains('\u{0D3F}'));  // U+0D3F MALAYALAM VOWEL SIGN I
2231    /// assert!(!grapheme_base.contains('\u{0D3E}'));  // U+0D3E MALAYALAM VOWEL SIGN AA
2232    /// ```
2233
2234}
2235
2236make_binary_property! {
2237    name: "Grapheme_Extend";
2238    short_name: "Gr_Ext";
2239    ident: GraphemeExtend;
2240    data_marker: crate::provider::PropertyBinaryGraphemeExtendV1;
2241    singleton: SINGLETON_PROPERTY_BINARY_GRAPHEME_EXTEND_V1;
2242    /// Property used to define "Grapheme extender".
2243    ///
2244    /// See D59 in Chapter 3, Conformance in the
2245    /// Unicode Standard.
2246    ///
2247    /// # Example
2248    ///
2249    /// ```
2250    /// use icu::properties::CodePointSetData;
2251    /// use icu::properties::props::GraphemeExtend;
2252    ///
2253    /// let grapheme_extend = CodePointSetData::new::<GraphemeExtend>();
2254    ///
2255    /// assert!(!grapheme_extend.contains('ക'));  // U+0D15 MALAYALAM LETTER KA
2256    /// assert!(!grapheme_extend.contains('\u{0D3F}'));  // U+0D3F MALAYALAM VOWEL SIGN I
2257    /// assert!(grapheme_extend.contains('\u{0D3E}'));  // U+0D3E MALAYALAM VOWEL SIGN AA
2258    /// ```
2259
2260}
2261
2262make_binary_property! {
2263    name: "Grapheme_Link";
2264    short_name: "Gr_Link";
2265    ident: GraphemeLink;
2266    data_marker: crate::provider::PropertyBinaryGraphemeLinkV1;
2267    singleton: SINGLETON_PROPERTY_BINARY_GRAPHEME_LINK_V1;
2268    /// Deprecated property.
2269    ///
2270    /// Formerly proposed for programmatic determination of grapheme
2271    /// cluster boundaries.
2272}
2273
2274make_binary_property! {
2275    name: "Hex_Digit";
2276    short_name: "Hex";
2277    ident: HexDigit;
2278    data_marker: crate::provider::PropertyBinaryHexDigitV1;
2279    singleton: SINGLETON_PROPERTY_BINARY_HEX_DIGIT_V1;
2280    /// Characters commonly used for the representation of hexadecimal numbers, plus their
2281    /// compatibility equivalents.
2282    ///
2283    /// # Example
2284    ///
2285    /// ```
2286    /// use icu::properties::CodePointSetData;
2287    /// use icu::properties::props::HexDigit;
2288    ///
2289    /// let hex_digit = CodePointSetData::new::<HexDigit>();
2290    ///
2291    /// assert!(hex_digit.contains('0'));
2292    /// assert!(!hex_digit.contains('੩'));  // U+0A69 GURMUKHI DIGIT THREE
2293    /// assert!(hex_digit.contains('f'));
2294    /// assert!(hex_digit.contains('ｆ'));  // U+FF46 FULLWIDTH LATIN SMALL LETTER F
2295    /// assert!(hex_digit.contains('Ｆ'));  // U+FF26 FULLWIDTH LATIN CAPITAL LETTER F
2296    /// assert!(!hex_digit.contains('Ä'));  // U+00C4 LATIN CAPITAL LETTER A WITH DIAERESIS
2297    /// ```
2298}
2299
2300make_binary_property! {
2301    name: "Hyphen";
2302    short_name: "Hyphen";
2303    ident: Hyphen;
2304    data_marker: crate::provider::PropertyBinaryHyphenV1;
2305    singleton: SINGLETON_PROPERTY_BINARY_HYPHEN_V1;
2306    /// Deprecated property.
2307    ///
2308    /// Dashes which are used to mark connections between pieces of
2309    /// words, plus the Katakana middle dot.
2310}
2311
2312make_binary_property! {
2313    name: "Id_Continue";
2314    short_name: "IDC";
2315    ident: IdContinue;
2316    data_marker: crate::provider::PropertyBinaryIdContinueV1;
2317    singleton: SINGLETON_PROPERTY_BINARY_ID_CONTINUE_V1;
2318    /// Characters that can come after the first character in an identifier.
2319    ///
2320    /// If using NFKC to
2321    /// fold differences between characters, use [`XidContinue`] instead.  See
2322    /// [`Unicode Standard Annex #31`](https://www.unicode.org/reports/tr31/tr31-35.html) for
2323    /// more details.
2324    ///
2325    /// # Example
2326    ///
2327    /// ```
2328    /// use icu::properties::CodePointSetData;
2329    /// use icu::properties::props::IdContinue;
2330    ///
2331    /// let id_continue = CodePointSetData::new::<IdContinue>();
2332    ///
2333    /// assert!(id_continue.contains('x'));
2334    /// assert!(id_continue.contains('1'));
2335    /// assert!(id_continue.contains('_'));
2336    /// assert!(id_continue.contains('ߝ'));  // U+07DD NKO LETTER FA
2337    /// assert!(!id_continue.contains('ⓧ'));  // U+24E7 CIRCLED LATIN SMALL LETTER X
2338    /// assert!(id_continue.contains('\u{FC5E}'));  // ARABIC LIGATURE SHADDA WITH DAMMATAN ISOLATED FORM
2339    /// ```
2340}
2341
2342make_binary_property! {
2343    name: "Ideographic";
2344    short_name: "Ideo";
2345    ident: Ideographic;
2346    data_marker: crate::provider::PropertyBinaryIdeographicV1;
2347    singleton: SINGLETON_PROPERTY_BINARY_IDEOGRAPHIC_V1;
2348    /// Characters considered to be CJKV (Chinese, Japanese, Korean, and Vietnamese)
2349    /// ideographs, or related siniform ideographs
2350    ///
2351    /// # Example
2352    ///
2353    /// ```
2354    /// use icu::properties::CodePointSetData;
2355    /// use icu::properties::props::Ideographic;
2356    ///
2357    /// let ideographic = CodePointSetData::new::<Ideographic>();
2358    ///
2359    /// assert!(ideographic.contains('川'));  // U+5DDD CJK UNIFIED IDEOGRAPH-5DDD
2360    /// assert!(!ideographic.contains('밥'));  // U+BC25 HANGUL SYLLABLE BAB
2361    /// ```
2362}
2363
2364make_binary_property! {
2365    name: "Id_Start";
2366    short_name: "IDS";
2367    ident: IdStart;
2368    data_marker: crate::provider::PropertyBinaryIdStartV1;
2369    singleton: SINGLETON_PROPERTY_BINARY_ID_START_V1;
2370    /// Characters that can begin an identifier.
2371    ///
2372    /// If using NFKC to fold differences between
2373    /// characters, use [`XidStart`] instead.  See [`Unicode Standard Annex
2374    /// #31`](https://www.unicode.org/reports/tr31/tr31-35.html) for more details.
2375    ///
2376    /// # Example
2377    ///
2378    /// ```
2379    /// use icu::properties::CodePointSetData;
2380    /// use icu::properties::props::IdStart;
2381    ///
2382    /// let id_start = CodePointSetData::new::<IdStart>();
2383    ///
2384    /// assert!(id_start.contains('x'));
2385    /// assert!(!id_start.contains('1'));
2386    /// assert!(!id_start.contains('_'));
2387    /// assert!(id_start.contains('ߝ'));  // U+07DD NKO LETTER FA
2388    /// assert!(!id_start.contains('ⓧ'));  // U+24E7 CIRCLED LATIN SMALL LETTER X
2389    /// assert!(id_start.contains('\u{FC5E}'));  // ARABIC LIGATURE SHADDA WITH DAMMATAN ISOLATED FORM
2390    /// ```
2391}
2392
2393make_binary_property! {
2394    name: "Ids_Binary_Operator";
2395    short_name: "IDSB";
2396    ident: IdsBinaryOperator;
2397    data_marker: crate::provider::PropertyBinaryIdsBinaryOperatorV1;
2398    singleton: SINGLETON_PROPERTY_BINARY_IDS_BINARY_OPERATOR_V1;
2399    /// Characters used in Ideographic Description Sequences.
2400    ///
2401    /// # Example
2402    ///
2403    /// ```
2404    /// use icu::properties::CodePointSetData;
2405    /// use icu::properties::props::IdsBinaryOperator;
2406    ///
2407    /// let ids_binary_operator = CodePointSetData::new::<IdsBinaryOperator>();
2408    ///
2409    /// assert!(ids_binary_operator.contains('\u{2FF5}'));  // IDEOGRAPHIC DESCRIPTION CHARACTER SURROUND FROM ABOVE
2410    /// assert!(!ids_binary_operator.contains('\u{3006}'));  // IDEOGRAPHIC CLOSING MARK
2411    /// ```
2412}
2413
2414make_binary_property! {
2415    name: "Ids_Trinary_Operator";
2416    short_name: "IDST";
2417    ident: IdsTrinaryOperator;
2418    data_marker: crate::provider::PropertyBinaryIdsTrinaryOperatorV1;
2419    singleton: SINGLETON_PROPERTY_BINARY_IDS_TRINARY_OPERATOR_V1;
2420    /// Characters used in Ideographic Description Sequences.
2421    ///
2422    /// # Example
2423    ///
2424    /// ```
2425    /// use icu::properties::CodePointSetData;
2426    /// use icu::properties::props::IdsTrinaryOperator;
2427    ///
2428    /// let ids_trinary_operator = CodePointSetData::new::<IdsTrinaryOperator>();
2429    ///
2430    /// assert!(ids_trinary_operator.contains('\u{2FF2}'));  // IDEOGRAPHIC DESCRIPTION CHARACTER LEFT TO MIDDLE AND RIGHT
2431    /// assert!(ids_trinary_operator.contains('\u{2FF3}'));  // IDEOGRAPHIC DESCRIPTION CHARACTER ABOVE TO MIDDLE AND BELOW
2432    /// assert!(!ids_trinary_operator.contains('\u{2FF4}'));
2433    /// assert!(!ids_trinary_operator.contains('\u{2FF5}'));  // IDEOGRAPHIC DESCRIPTION CHARACTER SURROUND FROM ABOVE
2434    /// assert!(!ids_trinary_operator.contains('\u{3006}'));  // IDEOGRAPHIC CLOSING MARK
2435    /// ```
2436}
2437
2438make_binary_property! {
2439    name: "Join_Control";
2440    short_name: "Join_C";
2441    ident: JoinControl;
2442    data_marker: crate::provider::PropertyBinaryJoinControlV1;
2443    singleton: SINGLETON_PROPERTY_BINARY_JOIN_CONTROL_V1;
2444    /// Format control characters which have specific functions for control of cursive joining
2445    /// and ligation.
2446    ///
2447    /// # Example
2448    ///
2449    /// ```
2450    /// use icu::properties::CodePointSetData;
2451    /// use icu::properties::props::JoinControl;
2452    ///
2453    /// let join_control = CodePointSetData::new::<JoinControl>();
2454    ///
2455    /// assert!(join_control.contains('\u{200C}'));  // ZERO WIDTH NON-JOINER
2456    /// assert!(join_control.contains('\u{200D}'));  // ZERO WIDTH JOINER
2457    /// assert!(!join_control.contains('\u{200E}'));
2458    /// ```
2459}
2460
2461make_binary_property! {
2462    name: "Logical_Order_Exception";
2463    short_name: "LOE";
2464    ident: LogicalOrderException;
2465    data_marker: crate::provider::PropertyBinaryLogicalOrderExceptionV1;
2466    singleton: SINGLETON_PROPERTY_BINARY_LOGICAL_ORDER_EXCEPTION_V1;
2467    /// A small number of spacing vowel letters occurring in certain Southeast Asian scripts such as Thai and Lao.
2468    ///
2469    /// # Example
2470    ///
2471    /// ```
2472    /// use icu::properties::CodePointSetData;
2473    /// use icu::properties::props::LogicalOrderException;
2474    ///
2475    /// let logical_order_exception = CodePointSetData::new::<LogicalOrderException>();
2476    ///
2477    /// assert!(logical_order_exception.contains('ແ'));  // U+0EC1 LAO VOWEL SIGN EI
2478    /// assert!(!logical_order_exception.contains('ະ'));  // U+0EB0 LAO VOWEL SIGN A
2479    /// ```
2480}
2481
2482make_binary_property! {
2483    name: "Lowercase";
2484    short_name: "Lower";
2485    ident: Lowercase;
2486    data_marker: crate::provider::PropertyBinaryLowercaseV1;
2487    singleton: SINGLETON_PROPERTY_BINARY_LOWERCASE_V1;
2488    /// Lowercase characters.
2489    ///
2490    /// # Example
2491    ///
2492    /// ```
2493    /// use icu::properties::CodePointSetData;
2494    /// use icu::properties::props::Lowercase;
2495    ///
2496    /// let lowercase = CodePointSetData::new::<Lowercase>();
2497    ///
2498    /// assert!(lowercase.contains('a'));
2499    /// assert!(!lowercase.contains('A'));
2500    /// ```
2501}
2502
2503make_binary_property! {
2504    name: "Math";
2505    short_name: "Math";
2506    ident: Math;
2507    data_marker: crate::provider::PropertyBinaryMathV1;
2508    singleton: SINGLETON_PROPERTY_BINARY_MATH_V1;
2509    /// Characters used in mathematical notation.
2510    ///
2511    /// # Example
2512    ///
2513    /// ```
2514    /// use icu::properties::CodePointSetData;
2515    /// use icu::properties::props::Math;
2516    ///
2517    /// let math = CodePointSetData::new::<Math>();
2518    ///
2519    /// assert!(math.contains('='));
2520    /// assert!(math.contains('+'));
2521    /// assert!(!math.contains('-'));
2522    /// assert!(math.contains('−'));  // U+2212 MINUS SIGN
2523    /// assert!(!math.contains('/'));
2524    /// assert!(math.contains('∕'));  // U+2215 DIVISION SLASH
2525    /// ```
2526}
2527
2528make_binary_property! {
2529    name: "Noncharacter_Code_Point";
2530    short_name: "NChar";
2531    ident: NoncharacterCodePoint;
2532    data_marker: crate::provider::PropertyBinaryNoncharacterCodePointV1;
2533    singleton: SINGLETON_PROPERTY_BINARY_NONCHARACTER_CODE_POINT_V1;
2534    /// Code points permanently reserved for internal use.
2535    ///
2536    /// # Example
2537    ///
2538    /// ```
2539    /// use icu::properties::CodePointSetData;
2540    /// use icu::properties::props::NoncharacterCodePoint;
2541    ///
2542    /// let noncharacter_code_point = CodePointSetData::new::<NoncharacterCodePoint>();
2543    ///
2544    /// assert!(noncharacter_code_point.contains('\u{FDD0}'));
2545    /// assert!(noncharacter_code_point.contains('\u{FFFF}'));
2546    /// assert!(!noncharacter_code_point.contains('\u{10000}'));
2547    /// ```
2548}
2549
2550make_binary_property! {
2551    name: "NFC_Inert";
2552    short_name: "NFC_Inert";
2553    ident: NfcInert;
2554    data_marker: crate::provider::PropertyBinaryNfcInertV1;
2555    singleton: SINGLETON_PROPERTY_BINARY_NFC_INERT_V1;
2556    /// Characters that are inert under NFC, i.e., they do not interact with adjacent characters.
2557}
2558
2559make_binary_property! {
2560    name: "NFD_Inert";
2561    short_name: "NFD_Inert";
2562    ident: NfdInert;
2563    data_marker: crate::provider::PropertyBinaryNfdInertV1;
2564    singleton: SINGLETON_PROPERTY_BINARY_NFD_INERT_V1;
2565    /// Characters that are inert under NFD, i.e., they do not interact with adjacent characters.
2566}
2567
2568make_binary_property! {
2569    name: "NFKC_Inert";
2570    short_name: "NFKC_Inert";
2571    ident: NfkcInert;
2572    data_marker: crate::provider::PropertyBinaryNfkcInertV1;
2573    singleton: SINGLETON_PROPERTY_BINARY_NFKC_INERT_V1;
2574    /// Characters that are inert under NFKC, i.e., they do not interact with adjacent characters.
2575}
2576
2577make_binary_property! {
2578    name: "NFKD_Inert";
2579    short_name: "NFKD_Inert";
2580    ident: NfkdInert;
2581    data_marker: crate::provider::PropertyBinaryNfkdInertV1;
2582    singleton: SINGLETON_PROPERTY_BINARY_NFKD_INERT_V1;
2583    /// Characters that are inert under NFKD, i.e., they do not interact with adjacent characters.
2584}
2585
2586make_binary_property! {
2587    name: "Pattern_Syntax";
2588    short_name: "Pat_Syn";
2589    ident: PatternSyntax;
2590    data_marker: crate::provider::PropertyBinaryPatternSyntaxV1;
2591    singleton: SINGLETON_PROPERTY_BINARY_PATTERN_SYNTAX_V1;
2592    /// Characters used as syntax in patterns (such as regular expressions).
2593    ///
2594    /// See [`Unicode
2595    /// Standard Annex #31`](https://www.unicode.org/reports/tr31/tr31-35.html) for more
2596    /// details.
2597    ///
2598    /// # Example
2599    ///
2600    /// ```
2601    /// use icu::properties::CodePointSetData;
2602    /// use icu::properties::props::PatternSyntax;
2603    ///
2604    /// let pattern_syntax = CodePointSetData::new::<PatternSyntax>();
2605    ///
2606    /// assert!(pattern_syntax.contains('{'));
2607    /// assert!(pattern_syntax.contains('⇒'));  // U+21D2 RIGHTWARDS DOUBLE ARROW
2608    /// assert!(!pattern_syntax.contains('0'));
2609    /// ```
2610}
2611
2612make_binary_property! {
2613    name: "Pattern_White_Space";
2614    short_name: "Pat_WS";
2615    ident: PatternWhiteSpace;
2616    data_marker: crate::provider::PropertyBinaryPatternWhiteSpaceV1;
2617    singleton: SINGLETON_PROPERTY_BINARY_PATTERN_WHITE_SPACE_V1;
2618    /// Characters used as whitespace in patterns (such as regular expressions).
2619    ///
2620    /// See
2621    /// [`Unicode Standard Annex #31`](https://www.unicode.org/reports/tr31/tr31-35.html) for
2622    /// more details.
2623    ///
2624    /// # Example
2625    ///
2626    /// ```
2627    /// use icu::properties::CodePointSetData;
2628    /// use icu::properties::props::PatternWhiteSpace;
2629    ///
2630    /// let pattern_white_space = CodePointSetData::new::<PatternWhiteSpace>();
2631    ///
2632    /// assert!(pattern_white_space.contains(' '));
2633    /// assert!(pattern_white_space.contains('\u{2029}'));  // PARAGRAPH SEPARATOR
2634    /// assert!(pattern_white_space.contains('\u{000A}'));  // NEW LINE
2635    /// assert!(!pattern_white_space.contains('\u{00A0}'));  // NO-BREAK SPACE
2636    /// ```
2637}
2638
2639make_binary_property! {
2640    name: "Prepended_Concatenation_Mark";
2641    short_name: "PCM";
2642    ident: PrependedConcatenationMark;
2643    data_marker: crate::provider::PropertyBinaryPrependedConcatenationMarkV1;
2644    singleton: SINGLETON_PROPERTY_BINARY_PREPENDED_CONCATENATION_MARK_V1;
2645    /// A small class of visible format controls, which precede and then span a sequence of
2646    /// other characters, usually digits.
2647}
2648
2649make_binary_property! {
2650    name: "Print";
2651    short_name: "Print";
2652    ident: Print;
2653    data_marker: crate::provider::PropertyBinaryPrintV1;
2654    singleton: SINGLETON_PROPERTY_BINARY_PRINT_V1;
2655    /// Printable characters (visible characters and whitespace).
2656    ///
2657    /// This is defined for POSIX compatibility.
2658}
2659
2660make_binary_property! {
2661    name: "Quotation_Mark";
2662    short_name: "QMark";
2663    ident: QuotationMark;
2664    data_marker: crate::provider::PropertyBinaryQuotationMarkV1;
2665    singleton: SINGLETON_PROPERTY_BINARY_QUOTATION_MARK_V1;
2666    /// Punctuation characters that function as quotation marks.
2667    ///
2668    /// # Example
2669    ///
2670    /// ```
2671    /// use icu::properties::CodePointSetData;
2672    /// use icu::properties::props::QuotationMark;
2673    ///
2674    /// let quotation_mark = CodePointSetData::new::<QuotationMark>();
2675    ///
2676    /// assert!(quotation_mark.contains('\''));
2677    /// assert!(quotation_mark.contains('„'));  // U+201E DOUBLE LOW-9 QUOTATION MARK
2678    /// assert!(!quotation_mark.contains('<'));
2679    /// ```
2680}
2681
2682make_binary_property! {
2683    name: "Radical";
2684    short_name: "Radical";
2685    ident: Radical;
2686    data_marker: crate::provider::PropertyBinaryRadicalV1;
2687    singleton: SINGLETON_PROPERTY_BINARY_RADICAL_V1;
2688    /// Characters used in the definition of Ideographic Description Sequences.
2689    ///
2690    /// # Example
2691    ///
2692    /// ```
2693    /// use icu::properties::CodePointSetData;
2694    /// use icu::properties::props::Radical;
2695    ///
2696    /// let radical = CodePointSetData::new::<Radical>();
2697    ///
2698    /// assert!(radical.contains('⺆'));  // U+2E86 CJK RADICAL BOX
2699    /// assert!(!radical.contains('丹'));  // U+F95E CJK COMPATIBILITY IDEOGRAPH-F95E
2700    /// ```
2701}
2702
2703make_binary_property! {
2704    name: "Regional_Indicator";
2705    short_name: "RI";
2706    ident: RegionalIndicator;
2707    data_marker: crate::provider::PropertyBinaryRegionalIndicatorV1;
2708    singleton: SINGLETON_PROPERTY_BINARY_REGIONAL_INDICATOR_V1;
2709    /// Regional indicator characters, `U+1F1E6..U+1F1FF`.
2710    ///
2711    /// # Example
2712    ///
2713    /// ```
2714    /// use icu::properties::CodePointSetData;
2715    /// use icu::properties::props::RegionalIndicator;
2716    ///
2717    /// let regional_indicator = CodePointSetData::new::<RegionalIndicator>();
2718    ///
2719    /// assert!(regional_indicator.contains('🇹'));  // U+1F1F9 REGIONAL INDICATOR SYMBOL LETTER T
2720    /// assert!(!regional_indicator.contains('Ⓣ'));  // U+24C9 CIRCLED LATIN CAPITAL LETTER T
2721    /// assert!(!regional_indicator.contains('T'));
2722    /// ```
2723}
2724
2725make_binary_property! {
2726    name: "Soft_Dotted";
2727    short_name: "SD";
2728    ident: SoftDotted;
2729    data_marker: crate::provider::PropertyBinarySoftDottedV1;
2730    singleton: SINGLETON_PROPERTY_BINARY_SOFT_DOTTED_V1;
2731    /// Characters with a "soft dot", like i or j.
2732    ///
2733    /// An accent placed on these characters causes
2734    /// the dot to disappear.
2735    ///
2736    /// # Example
2737    ///
2738    /// ```
2739    /// use icu::properties::CodePointSetData;
2740    /// use icu::properties::props::SoftDotted;
2741    ///
2742    /// let soft_dotted = CodePointSetData::new::<SoftDotted>();
2743    ///
2744    /// assert!(soft_dotted.contains('і'));  //U+0456 CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I
2745    /// assert!(!soft_dotted.contains('ı'));  // U+0131 LATIN SMALL LETTER DOTLESS I
2746    /// ```
2747}
2748
2749make_binary_property! {
2750    name: "Segment_Starter";
2751    short_name: "Segment_Starter";
2752    ident: SegmentStarter;
2753    data_marker: crate::provider::PropertyBinarySegmentStarterV1;
2754    singleton: SINGLETON_PROPERTY_BINARY_SEGMENT_STARTER_V1;
2755    /// Characters that are starters in terms of Unicode normalization and combining character
2756    /// sequences.
2757}
2758
2759make_binary_property! {
2760    name: "Case_Sensitive";
2761    short_name: "Case_Sensitive";
2762    ident: CaseSensitive;
2763    data_marker: crate::provider::PropertyBinaryCaseSensitiveV1;
2764    singleton: SINGLETON_PROPERTY_BINARY_CASE_SENSITIVE_V1;
2765    /// Characters that are either the source of a case mapping or in the target of a case
2766    /// mapping.
2767}
2768
2769make_binary_property! {
2770    name: "Sentence_Terminal";
2771    short_name: "STerm";
2772    ident: SentenceTerminal;
2773    data_marker: crate::provider::PropertyBinarySentenceTerminalV1;
2774    singleton: SINGLETON_PROPERTY_BINARY_SENTENCE_TERMINAL_V1;
2775    /// Punctuation characters that generally mark the end of sentences.
2776    ///
2777    /// # Example
2778    ///
2779    /// ```
2780    /// use icu::properties::CodePointSetData;
2781    /// use icu::properties::props::SentenceTerminal;
2782    ///
2783    /// let sentence_terminal = CodePointSetData::new::<SentenceTerminal>();
2784    ///
2785    /// assert!(sentence_terminal.contains('.'));
2786    /// assert!(sentence_terminal.contains('?'));
2787    /// assert!(sentence_terminal.contains('᪨'));  // U+1AA8 TAI THAM SIGN KAAN
2788    /// assert!(!sentence_terminal.contains(','));
2789    /// assert!(!sentence_terminal.contains('¿'));  // U+00BF INVERTED QUESTION MARK
2790    /// ```
2791}
2792
2793make_binary_property! {
2794    name: "Terminal_Punctuation";
2795    short_name: "Term";
2796    ident: TerminalPunctuation;
2797    data_marker: crate::provider::PropertyBinaryTerminalPunctuationV1;
2798    singleton: SINGLETON_PROPERTY_BINARY_TERMINAL_PUNCTUATION_V1;
2799    /// Punctuation characters that generally mark the end of textual units.
2800    ///
2801    /// # Example
2802    ///
2803    /// ```
2804    /// use icu::properties::CodePointSetData;
2805    /// use icu::properties::props::TerminalPunctuation;
2806    ///
2807    /// let terminal_punctuation = CodePointSetData::new::<TerminalPunctuation>();
2808    ///
2809    /// assert!(terminal_punctuation.contains('.'));
2810    /// assert!(terminal_punctuation.contains('?'));
2811    /// assert!(terminal_punctuation.contains('᪨'));  // U+1AA8 TAI THAM SIGN KAAN
2812    /// assert!(terminal_punctuation.contains(','));
2813    /// assert!(!terminal_punctuation.contains('¿'));  // U+00BF INVERTED QUESTION MARK
2814    /// ```
2815}
2816
2817make_binary_property! {
2818    name: "Unified_Ideograph";
2819    short_name: "UIdeo";
2820    ident: UnifiedIdeograph;
2821    data_marker: crate::provider::PropertyBinaryUnifiedIdeographV1;
2822    singleton: SINGLETON_PROPERTY_BINARY_UNIFIED_IDEOGRAPH_V1;
2823    /// A property which specifies the exact set of Unified CJK Ideographs in the standard.
2824    ///
2825    /// # Example
2826    ///
2827    /// ```
2828    /// use icu::properties::CodePointSetData;
2829    /// use icu::properties::props::UnifiedIdeograph;
2830    ///
2831    /// let unified_ideograph = CodePointSetData::new::<UnifiedIdeograph>();
2832    ///
2833    /// assert!(unified_ideograph.contains('川'));  // U+5DDD CJK UNIFIED IDEOGRAPH-5DDD
2834    /// assert!(unified_ideograph.contains('木'));  // U+6728 CJK UNIFIED IDEOGRAPH-6728
2835    /// assert!(!unified_ideograph.contains('𛅸'));  // U+1B178 NUSHU CHARACTER-1B178
2836    /// ```
2837}
2838
2839make_binary_property! {
2840    name: "Uppercase";
2841    short_name: "Upper";
2842    ident: Uppercase;
2843    data_marker: crate::provider::PropertyBinaryUppercaseV1;
2844    singleton: SINGLETON_PROPERTY_BINARY_UPPERCASE_V1;
2845    /// Uppercase characters.
2846    ///
2847    /// # Example
2848    ///
2849    /// ```
2850    /// use icu::properties::CodePointSetData;
2851    /// use icu::properties::props::Uppercase;
2852    ///
2853    /// let uppercase = CodePointSetData::new::<Uppercase>();
2854    ///
2855    /// assert!(uppercase.contains('U'));
2856    /// assert!(!uppercase.contains('u'));
2857    /// ```
2858}
2859
2860make_binary_property! {
2861    name: "Variation_Selector";
2862    short_name: "VS";
2863    ident: VariationSelector;
2864    data_marker: crate::provider::PropertyBinaryVariationSelectorV1;
2865    singleton: SINGLETON_PROPERTY_BINARY_VARIATION_SELECTOR_V1;
2866    /// Characters that are Variation Selectors.
2867    ///
2868    /// # Example
2869    ///
2870    /// ```
2871    /// use icu::properties::CodePointSetData;
2872    /// use icu::properties::props::VariationSelector;
2873    ///
2874    /// let variation_selector = CodePointSetData::new::<VariationSelector>();
2875    ///
2876    /// assert!(variation_selector.contains('\u{180D}'));  // MONGOLIAN FREE VARIATION SELECTOR THREE
2877    /// assert!(!variation_selector.contains('\u{303E}'));  // IDEOGRAPHIC VARIATION INDICATOR
2878    /// assert!(variation_selector.contains('\u{FE0F}'));  // VARIATION SELECTOR-16
2879    /// assert!(!variation_selector.contains('\u{FE10}'));  // PRESENTATION FORM FOR VERTICAL COMMA
2880    /// assert!(variation_selector.contains('\u{E01EF}'));  // VARIATION SELECTOR-256
2881    /// ```
2882}
2883
2884make_binary_property! {
2885    name: "White_Space";
2886    short_name: "space";
2887    ident: WhiteSpace;
2888    data_marker: crate::provider::PropertyBinaryWhiteSpaceV1;
2889    singleton: SINGLETON_PROPERTY_BINARY_WHITE_SPACE_V1;
2890    /// Spaces, separator characters and other control characters which should be treated by
2891    /// programming languages as "white space" for the purpose of parsing elements.
2892    ///
2893    /// # Example
2894    ///
2895    /// ```
2896    /// use icu::properties::CodePointSetData;
2897    /// use icu::properties::props::WhiteSpace;
2898    ///
2899    /// let white_space = CodePointSetData::new::<WhiteSpace>();
2900    ///
2901    /// assert!(white_space.contains(' '));
2902    /// assert!(white_space.contains('\u{000A}'));  // NEW LINE
2903    /// assert!(white_space.contains('\u{00A0}'));  // NO-BREAK SPACE
2904    /// assert!(!white_space.contains('\u{200B}'));  // ZERO WIDTH SPACE
2905    /// ```
2906}
2907
2908make_binary_property! {
2909    name: "Xdigit";
2910    short_name: "Xdigit";
2911    ident: Xdigit;
2912    data_marker: crate::provider::PropertyBinaryXdigitV1;
2913    singleton: SINGLETON_PROPERTY_BINARY_XDIGIT_V1;
2914    /// Hexadecimal digits
2915    ///
2916    /// This is defined for POSIX compatibility.
2917}
2918
2919make_binary_property! {
2920    name: "XID_Continue";
2921    short_name: "XIDC";
2922    ident: XidContinue;
2923    data_marker: crate::provider::PropertyBinaryXidContinueV1;
2924    singleton: SINGLETON_PROPERTY_BINARY_XID_CONTINUE_V1;
2925    /// Characters that can come after the first character in an identifier.
2926    ///
2927    /// See [`Unicode Standard Annex
2928    /// #31`](https://www.unicode.org/reports/tr31/tr31-35.html) for more details.
2929    ///
2930    /// # Example
2931    ///
2932    /// ```
2933    /// use icu::properties::CodePointSetData;
2934    /// use icu::properties::props::XidContinue;
2935    ///
2936    /// let xid_continue = CodePointSetData::new::<XidContinue>();
2937    ///
2938    /// assert!(xid_continue.contains('x'));
2939    /// assert!(xid_continue.contains('1'));
2940    /// assert!(xid_continue.contains('_'));
2941    /// assert!(xid_continue.contains('ߝ'));  // U+07DD NKO LETTER FA
2942    /// assert!(!xid_continue.contains('ⓧ'));  // U+24E7 CIRCLED LATIN SMALL LETTER X
2943    /// assert!(!xid_continue.contains('\u{FC5E}'));  // ARABIC LIGATURE SHADDA WITH DAMMATAN ISOLATED FORM
2944    /// ```
2945}
2946
2947make_binary_property! {
2948    name: "XID_Start";
2949    short_name: "XIDS";
2950    ident: XidStart;
2951    data_marker: crate::provider::PropertyBinaryXidStartV1;
2952    singleton: SINGLETON_PROPERTY_BINARY_XID_START_V1;
2953    /// Characters that can begin an identifier.
2954    ///
2955    /// See [`Unicode
2956    /// Standard Annex #31`](https://www.unicode.org/reports/tr31/tr31-35.html) for more
2957    /// details.
2958    ///
2959    /// # Example
2960    ///
2961    /// ```
2962    /// use icu::properties::CodePointSetData;
2963    /// use icu::properties::props::XidStart;
2964    ///
2965    /// let xid_start = CodePointSetData::new::<XidStart>();
2966    ///
2967    /// assert!(xid_start.contains('x'));
2968    /// assert!(!xid_start.contains('1'));
2969    /// assert!(!xid_start.contains('_'));
2970    /// assert!(xid_start.contains('ߝ'));  // U+07DD NKO LETTER FA
2971    /// assert!(!xid_start.contains('ⓧ'));  // U+24E7 CIRCLED LATIN SMALL LETTER X
2972    /// assert!(!xid_start.contains('\u{FC5E}'));  // ARABIC LIGATURE SHADDA WITH DAMMATAN ISOLATED FORM
2973    /// ```
2974}
2975
2976pub use crate::emoji::EmojiSet;
2977
2978macro_rules! make_emoji_set {
2979    (
2980        ident: $ident:ident;
2981        data_marker: $data_marker:ty;
2982        singleton: $singleton:ident;
2983        $(#[$doc:meta])+
2984    ) => {
2985        $(#[$doc])+
2986        #[derive(Debug)]
2987        #[non_exhaustive]
2988        pub struct $ident;
2989
2990        impl crate::private::Sealed for $ident {}
2991
2992        impl EmojiSet for $ident {
2993            type DataMarker = $data_marker;
2994            #[cfg(feature = "compiled_data")]
2995            const SINGLETON: &'static crate::provider::PropertyUnicodeSet<'static> =
2996                &crate::provider::Baked::$singleton;
2997        }
2998    }
2999}
3000
3001make_emoji_set! {
3002    ident: BasicEmoji;
3003    data_marker: crate::provider::PropertyBinaryBasicEmojiV1;
3004    singleton: SINGLETON_PROPERTY_BINARY_BASIC_EMOJI_V1;
3005    /// Characters and character sequences intended for general-purpose, independent, direct input.
3006    ///
3007    /// See [`Unicode Technical Standard #51`](https://unicode.org/reports/tr51/) for more
3008    /// details.
3009    ///
3010    /// # Example
3011    ///
3012    /// ```
3013    /// use icu::properties::EmojiSetData;
3014    /// use icu::properties::props::BasicEmoji;
3015    ///
3016    /// let basic_emoji = EmojiSetData::new::<BasicEmoji>();
3017    ///
3018    /// assert!(!basic_emoji.contains('\u{0020}'));
3019    /// assert!(!basic_emoji.contains('\n'));
3020    /// assert!(basic_emoji.contains('🦃')); // U+1F983 TURKEY
3021    /// assert!(basic_emoji.contains_str("\u{1F983}"));
3022    /// assert!(basic_emoji.contains_str("\u{1F6E4}\u{FE0F}")); // railway track
3023    /// assert!(!basic_emoji.contains_str("\u{0033}\u{FE0F}\u{20E3}"));  // Emoji_Keycap_Sequence, keycap 3
3024    /// ```
3025}
3026
3027#[cfg(test)]
3028mod test_enumerated_property_completeness {
3029    use super::*;
3030    use std::collections::BTreeMap;
3031
3032    fn check_enum<'a, T: NamedEnumeratedProperty>(
3033        lookup: &crate::provider::names::PropertyValueNameToEnumMap<'static>,
3034        consts: impl IntoIterator<Item = &'a T>,
3035    ) where
3036        u16: From<T>,
3037    {
3038        let mut data: BTreeMap<_, _> = lookup
3039            .map
3040            .iter()
3041            .map(|(name, value)| (value, (name, "Data")))
3042            .collect();
3043
3044        let names = crate::PropertyNamesLong::<T>::new();
3045        let consts = consts.into_iter().map(|value| {
3046            (
3047                u16::from(*value) as usize,
3048                (
3049                    names.get(*value).unwrap_or("<unknown>").to_string(),
3050                    "Consts",
3051                ),
3052            )
3053        });
3054
3055        let mut diff = Vec::new();
3056        for t @ (value, _) in consts {
3057            if data.remove(&value).is_none() {
3058                diff.push(t);
3059            }
3060        }
3061        diff.extend(data);
3062
3063        let mut fmt_diff = String::new();
3064        for (value, (name, source)) in diff {
3065            fmt_diff.push_str(&format!("{source}:\t{name} = {value:?}\n"));
3066        }
3067
3068        assert!(
3069            fmt_diff.is_empty(),
3070            "Values defined in data do not match values defined in consts. Difference:\n{}",
3071            fmt_diff
3072        );
3073    }
3074
3075    #[test]
3076    fn test_ea() {
3077        check_enum(
3078            crate::provider::Baked::SINGLETON_PROPERTY_NAME_PARSE_EAST_ASIAN_WIDTH_V1,
3079            EastAsianWidth::ALL_VALUES,
3080        );
3081    }
3082
3083    #[test]
3084    fn test_ccc() {
3085        check_enum(
3086            crate::provider::Baked::SINGLETON_PROPERTY_NAME_PARSE_CANONICAL_COMBINING_CLASS_V1,
3087            CanonicalCombiningClass::ALL_VALUES,
3088        );
3089    }
3090
3091    #[test]
3092    fn test_jt() {
3093        check_enum(
3094            crate::provider::Baked::SINGLETON_PROPERTY_NAME_PARSE_JOINING_TYPE_V1,
3095            JoiningType::ALL_VALUES,
3096        );
3097    }
3098
3099    #[test]
3100    fn test_insc() {
3101        check_enum(
3102            crate::provider::Baked::SINGLETON_PROPERTY_NAME_PARSE_INDIC_SYLLABIC_CATEGORY_V1,
3103            IndicSyllabicCategory::ALL_VALUES,
3104        );
3105    }
3106
3107    #[test]
3108    fn test_sb() {
3109        check_enum(
3110            crate::provider::Baked::SINGLETON_PROPERTY_NAME_PARSE_SENTENCE_BREAK_V1,
3111            SentenceBreak::ALL_VALUES,
3112        );
3113    }
3114
3115    #[test]
3116    fn test_wb() {
3117        check_enum(
3118            crate::provider::Baked::SINGLETON_PROPERTY_NAME_PARSE_WORD_BREAK_V1,
3119            WordBreak::ALL_VALUES,
3120        );
3121    }
3122
3123    #[test]
3124    fn test_bc() {
3125        check_enum(
3126            crate::provider::Baked::SINGLETON_PROPERTY_NAME_PARSE_BIDI_CLASS_V1,
3127            BidiClass::ALL_VALUES,
3128        );
3129    }
3130
3131    #[test]
3132    fn test_hst() {
3133        check_enum(
3134            crate::provider::Baked::SINGLETON_PROPERTY_NAME_PARSE_HANGUL_SYLLABLE_TYPE_V1,
3135            HangulSyllableType::ALL_VALUES,
3136        );
3137    }
3138
3139    #[test]
3140    fn test_vo() {
3141        check_enum(
3142            crate::provider::Baked::SINGLETON_PROPERTY_NAME_PARSE_VERTICAL_ORIENTATION_V1,
3143            VerticalOrientation::ALL_VALUES,
3144        );
3145    }
3146}
icu_properties/props.rs

icu_properties/
props.rs