icu_properties/props.rs
1// This file is part of ICU4X. For terms of use, please see the file
2// called LICENSE at the top level of the ICU4X source tree
3// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
4
5//! This module defines all available properties.
6//!
7//! Properties may be empty marker types and implement [`BinaryProperty`], or enumerations[^1]
8//! and implement [`EnumeratedProperty`].
9//!
10//! [`BinaryProperty`]s are queried through a [`CodePointSetData`](crate::CodePointSetData),
11//! while [`EnumeratedProperty`]s are queried through [`CodePointMapData`](crate::CodePointMapData).
12//!
13//! In addition, some [`EnumeratedProperty`]s also implement [`ParseableEnumeratedProperty`] or
14//! [`NamedEnumeratedProperty`]. For these properties, [`PropertyParser`](crate::PropertyParser),
15//! [`PropertyNamesLong`](crate::PropertyNamesLong), and [`PropertyNamesShort`](crate::PropertyNamesShort)
16//! can be constructed.
17//!
18//! [^1]: either Rust `enum`s, or Rust `struct`s with associated constants (open enums)
19
20pub use crate::names::{NamedEnumeratedProperty, ParseableEnumeratedProperty};
21
22pub use crate::bidi::{BidiMirroringGlyph, BidiPairedBracketType};
23
24/// See [`test_enumerated_property_completeness`] for usage.
25/// Example input:
26/// ```ignore
27/// impl EastAsianWidth {
28/// pub const Neutral: EastAsianWidth = EastAsianWidth(0);
29/// pub const Ambiguous: EastAsianWidth = EastAsianWidth(1);
30/// ...
31/// }
32/// ```
33/// Produces `const ALL_VALUES = &[("Neutral", 0u16), ...];` by
34/// explicitly casting first field of the struct to u16.
35macro_rules! create_const_array {
36 (
37 $ ( #[$meta:meta] )*
38 impl $enum_ty:ident {
39 $( $(#[$const_meta:meta])* $v:vis const $i:ident: $t:ty = $e:expr; )*
40 }
41 ) => {
42 $( #[$meta] )*
43 impl $enum_ty {
44 $(
45 $(#[$const_meta])*
46 $v const $i: $t = $e;
47 )*
48
49 /// All possible values of this enum in the Unicode version
50 /// from this ICU4X release.
51 pub const ALL_VALUES: &'static [$enum_ty] = &[
52 $($enum_ty::$i),*
53 ];
54 }
55
56
57 impl From<$enum_ty> for u16 {
58 fn from(other: $enum_ty) -> Self {
59 other.0 as u16
60 }
61 }
62 }
63}
64
65pub use crate::code_point_map::EnumeratedProperty;
66
67macro_rules! make_enumerated_property {
68 (
69 name: $name:literal;
70 short_name: $short_name:literal;
71 ident: $value_ty:path;
72 data_marker: $data_marker:ty;
73 singleton: $singleton:ident;
74 $(ule_ty: $ule_ty:ty;)?
75 ) => {
76 impl crate::private::Sealed for $value_ty {}
77
78 impl EnumeratedProperty for $value_ty {
79 type DataMarker = $data_marker;
80 #[cfg(feature = "compiled_data")]
81 const SINGLETON: &'static crate::provider::PropertyCodePointMap<'static, Self> =
82 crate::provider::Baked::$singleton;
83 const NAME: &'static [u8] = $name.as_bytes();
84 const SHORT_NAME: &'static [u8] = $short_name.as_bytes();
85 }
86
87 $(
88 impl zerovec::ule::AsULE for $value_ty {
89 type ULE = $ule_ty;
90
91 fn to_unaligned(self) -> Self::ULE {
92 self.0.to_unaligned()
93 }
94 fn from_unaligned(unaligned: Self::ULE) -> Self {
95 Self(zerovec::ule::AsULE::from_unaligned(unaligned))
96 }
97 }
98 )?
99 };
100}
101
102/// Enumerated property Bidi_Class
103///
104/// These are the categories required by the Unicode Bidirectional Algorithm.
105/// For the property values, see [Bidirectional Class Values](https://unicode.org/reports/tr44/#Bidi_Class_Values).
106/// For more information, see [Unicode Standard Annex #9](https://unicode.org/reports/tr41/tr41-28.html#UAX9).
107///
108/// # Example
109///
110/// ```
111/// use icu::properties::{CodePointMapData, props::BidiClass};
112///
113/// assert_eq!(CodePointMapData::<BidiClass>::new().get('y'), BidiClass::LeftToRight); // U+0079
114/// assert_eq!(CodePointMapData::<BidiClass>::new().get('ع'), BidiClass::ArabicLetter); // U+0639
115/// ```
116#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
117#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
118#[cfg_attr(feature = "datagen", derive(databake::Bake))]
119#[cfg_attr(feature = "datagen", databake(path = icu_properties::props))]
120#[allow(clippy::exhaustive_structs)] // newtype
121#[repr(transparent)]
122pub struct BidiClass(pub(crate) u8);
123
124impl BidiClass {
125 /// Returns an ICU4C `UBidiClass` value.
126 pub const fn to_icu4c_value(self) -> u8 {
127 self.0
128 }
129 /// Constructor from an ICU4C `UBidiClass` value.
130 pub const fn from_icu4c_value(value: u8) -> Self {
131 Self(value)
132 }
133}
134
135create_const_array! {
136#[allow(non_upper_case_globals)]
137impl BidiClass {
138 /// (`L`) any strong left-to-right character
139 pub const LeftToRight: BidiClass = BidiClass(0);
140 /// (`R`) any strong right-to-left (non-Arabic-type) character
141 pub const RightToLeft: BidiClass = BidiClass(1);
142 /// (`EN`) any ASCII digit or Eastern Arabic-Indic digit
143 pub const EuropeanNumber: BidiClass = BidiClass(2);
144 /// (`ES`) plus and minus signs
145 pub const EuropeanSeparator: BidiClass = BidiClass(3);
146 /// (`ET`) a terminator in a numeric format context, includes currency signs
147 pub const EuropeanTerminator: BidiClass = BidiClass(4);
148 /// (`AN`) any Arabic-Indic digit
149 pub const ArabicNumber: BidiClass = BidiClass(5);
150 /// (`CS`) commas, colons, and slashes
151 pub const CommonSeparator: BidiClass = BidiClass(6);
152 /// (`B`) various newline characters
153 pub const ParagraphSeparator: BidiClass = BidiClass(7);
154 /// (`S`) various segment-related control codes
155 pub const SegmentSeparator: BidiClass = BidiClass(8);
156 /// (`WS`) spaces
157 pub const WhiteSpace: BidiClass = BidiClass(9);
158 /// (`ON`) most other symbols and punctuation marks
159 pub const OtherNeutral: BidiClass = BidiClass(10);
160 /// (`LRE`) U+202A: the LR embedding control
161 pub const LeftToRightEmbedding: BidiClass = BidiClass(11);
162 /// (`LRO`) U+202D: the LR override control
163 pub const LeftToRightOverride: BidiClass = BidiClass(12);
164 /// (`AL`) any strong right-to-left (Arabic-type) character
165 pub const ArabicLetter: BidiClass = BidiClass(13);
166 /// (`RLE`) U+202B: the RL embedding control
167 pub const RightToLeftEmbedding: BidiClass = BidiClass(14);
168 /// (`RLO`) U+202E: the RL override control
169 pub const RightToLeftOverride: BidiClass = BidiClass(15);
170 /// (`PDF`) U+202C: terminates an embedding or override control
171 pub const PopDirectionalFormat: BidiClass = BidiClass(16);
172 /// (`NSM`) any nonspacing mark
173 pub const NonspacingMark: BidiClass = BidiClass(17);
174 /// (`BN`) most format characters, control codes, or noncharacters
175 pub const BoundaryNeutral: BidiClass = BidiClass(18);
176 /// (`FSI`) U+2068: the first strong isolate control
177 pub const FirstStrongIsolate: BidiClass = BidiClass(19);
178 /// (`LRI`) U+2066: the LR isolate control
179 pub const LeftToRightIsolate: BidiClass = BidiClass(20);
180 /// (`RLI`) U+2067: the RL isolate control
181 pub const RightToLeftIsolate: BidiClass = BidiClass(21);
182 /// (`PDI`) U+2069: terminates an isolate control
183 pub const PopDirectionalIsolate: BidiClass = BidiClass(22);
184}
185}
186
187make_enumerated_property! {
188 name: "Bidi_Class";
189 short_name: "bc";
190 ident: BidiClass;
191 data_marker: crate::provider::PropertyEnumBidiClassV1;
192 singleton: SINGLETON_PROPERTY_ENUM_BIDI_CLASS_V1;
193 ule_ty: u8;
194}
195
196// This exists to encapsulate GeneralCategoryULE so that it can exist in the provider module rather than props
197pub(crate) mod gc {
198 /// Enumerated property General_Category.
199 ///
200 /// General_Category specifies the most general classification of a code point, usually
201 /// determined based on the primary characteristic of the assigned character. For example, is the
202 /// character a letter, a mark, a number, punctuation, or a symbol, and if so, of what type?
203 ///
204 /// GeneralCategory only supports specific subcategories (eg `UppercaseLetter`).
205 /// It does not support grouped categories (eg `Letter`). For grouped categories, use [`GeneralCategoryGroup`](
206 /// crate::props::GeneralCategoryGroup).
207 ///
208 /// # Example
209 ///
210 /// ```
211 /// use icu::properties::{CodePointMapData, props::GeneralCategory};
212 ///
213 /// assert_eq!(CodePointMapData::<GeneralCategory>::new().get('木'), GeneralCategory::OtherLetter); // U+6728
214 /// assert_eq!(CodePointMapData::<GeneralCategory>::new().get('🎃'), GeneralCategory::OtherSymbol); // U+1F383 JACK-O-LANTERN
215 /// ```
216 #[derive(Copy, Clone, PartialEq, Eq, Debug, Ord, PartialOrd, Hash)]
217 #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
218 #[cfg_attr(feature = "datagen", derive(databake::Bake))]
219 #[cfg_attr(feature = "datagen", databake(path = icu_properties::props))]
220 #[allow(clippy::exhaustive_enums)] // this type is stable
221 #[zerovec::make_ule(GeneralCategoryULE)]
222 #[repr(u8)]
223 pub enum GeneralCategory {
224 /// (`Cn`) A reserved unassigned code point or a noncharacter
225 Unassigned = 0,
226
227 /// (`Lu`) An uppercase letter
228 UppercaseLetter = 1,
229 /// (`Ll`) A lowercase letter
230 LowercaseLetter = 2,
231 /// (`Lt`) A digraphic letter, with first part uppercase
232 TitlecaseLetter = 3,
233 /// (`Lm`) A modifier letter
234 ModifierLetter = 4,
235 /// (`Lo`) Other letters, including syllables and ideographs
236 OtherLetter = 5,
237
238 /// (`Mn`) A nonspacing combining mark (zero advance width)
239 NonspacingMark = 6,
240 /// (`Mc`) A spacing combining mark (positive advance width)
241 SpacingMark = 8,
242 /// (`Me`) An enclosing combining mark
243 EnclosingMark = 7,
244
245 /// (`Nd`) A decimal digit
246 DecimalNumber = 9,
247 /// (`Nl`) A letterlike numeric character
248 LetterNumber = 10,
249 /// (`No`) A numeric character of other type
250 OtherNumber = 11,
251
252 /// (`Zs`) A space character (of various non-zero widths)
253 SpaceSeparator = 12,
254 /// (`Zl`) U+2028 LINE SEPARATOR only
255 LineSeparator = 13,
256 /// (`Zp`) U+2029 PARAGRAPH SEPARATOR only
257 ParagraphSeparator = 14,
258
259 /// (`Cc`) A C0 or C1 control code
260 Control = 15,
261 /// (`Cf`) A format control character
262 Format = 16,
263 /// (`Co`) A private-use character
264 PrivateUse = 17,
265 /// (`Cs`) A surrogate code point
266 Surrogate = 18,
267
268 /// (`Pd`) A dash or hyphen punctuation mark
269 DashPunctuation = 19,
270 /// (`Ps`) An opening punctuation mark (of a pair)
271 OpenPunctuation = 20,
272 /// (`Pe`) A closing punctuation mark (of a pair)
273 ClosePunctuation = 21,
274 /// (`Pc`) A connecting punctuation mark, like a tie
275 ConnectorPunctuation = 22,
276 /// (`Pi`) An initial quotation mark
277 InitialPunctuation = 28,
278 /// (`Pf`) A final quotation mark
279 FinalPunctuation = 29,
280 /// (`Po`) A punctuation mark of other type
281 OtherPunctuation = 23,
282
283 /// (`Sm`) A symbol of mathematical use
284 MathSymbol = 24,
285 /// (`Sc`) A currency sign
286 CurrencySymbol = 25,
287 /// (`Sk`) A non-letterlike modifier symbol
288 ModifierSymbol = 26,
289 /// (`So`) A symbol of other type
290 OtherSymbol = 27,
291 }
292}
293
294pub use gc::GeneralCategory;
295
296impl GeneralCategory {
297 /// All possible values of this enum
298 pub const ALL_VALUES: &'static [GeneralCategory] = &[
299 GeneralCategory::Unassigned,
300 GeneralCategory::UppercaseLetter,
301 GeneralCategory::LowercaseLetter,
302 GeneralCategory::TitlecaseLetter,
303 GeneralCategory::ModifierLetter,
304 GeneralCategory::OtherLetter,
305 GeneralCategory::NonspacingMark,
306 GeneralCategory::SpacingMark,
307 GeneralCategory::EnclosingMark,
308 GeneralCategory::DecimalNumber,
309 GeneralCategory::LetterNumber,
310 GeneralCategory::OtherNumber,
311 GeneralCategory::SpaceSeparator,
312 GeneralCategory::LineSeparator,
313 GeneralCategory::ParagraphSeparator,
314 GeneralCategory::Control,
315 GeneralCategory::Format,
316 GeneralCategory::PrivateUse,
317 GeneralCategory::Surrogate,
318 GeneralCategory::DashPunctuation,
319 GeneralCategory::OpenPunctuation,
320 GeneralCategory::ClosePunctuation,
321 GeneralCategory::ConnectorPunctuation,
322 GeneralCategory::InitialPunctuation,
323 GeneralCategory::FinalPunctuation,
324 GeneralCategory::OtherPunctuation,
325 GeneralCategory::MathSymbol,
326 GeneralCategory::CurrencySymbol,
327 GeneralCategory::ModifierSymbol,
328 GeneralCategory::OtherSymbol,
329 ];
330}
331
332#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Debug, Hash, Default)]
333/// Error value for `impl TryFrom<u8> for GeneralCategory`.
334#[non_exhaustive]
335pub struct GeneralCategoryOutOfBoundsError;
336
337impl TryFrom<u8> for GeneralCategory {
338 type Error = GeneralCategoryOutOfBoundsError;
339 /// Construct this [`GeneralCategory`] from an integer, returning
340 /// an error if it is out of bounds
341 fn try_from(val: u8) -> Result<Self, GeneralCategoryOutOfBoundsError> {
342 GeneralCategory::new_from_u8(val).ok_or(GeneralCategoryOutOfBoundsError)
343 }
344}
345
346make_enumerated_property! {
347 name: "General_Category";
348 short_name: "gc";
349 ident: GeneralCategory;
350 data_marker: crate::provider::PropertyEnumGeneralCategoryV1;
351 singleton: SINGLETON_PROPERTY_ENUM_GENERAL_CATEGORY_V1;
352}
353
354/// Groupings of multiple General_Category property values.
355///
356/// Instances of `GeneralCategoryGroup` represent the defined multi-category
357/// values that are useful for users in certain contexts, such as regex. In
358/// other words, unlike [`GeneralCategory`], this supports groups of general
359/// categories: for example, `Letter` /// is the union of `UppercaseLetter`,
360/// `LowercaseLetter`, etc.
361///
362/// See <https://www.unicode.org/reports/tr44/> .
363///
364/// The discriminants correspond to the `U_GC_XX_MASK` constants in ICU4C.
365/// Unlike [`GeneralCategory`], this supports groups of general categories: for example, `Letter`
366/// is the union of `UppercaseLetter`, `LowercaseLetter`, etc.
367///
368/// See `UCharCategory` and `U_GET_GC_MASK` in ICU4C.
369#[derive(Copy, Clone, PartialEq, Debug, Eq)]
370#[allow(clippy::exhaustive_structs)] // newtype
371#[repr(transparent)]
372pub struct GeneralCategoryGroup(pub(crate) u32);
373
374impl crate::private::Sealed for GeneralCategoryGroup {}
375
376use GeneralCategory as GC;
377use GeneralCategoryGroup as GCG;
378
379#[allow(non_upper_case_globals)]
380impl GeneralCategoryGroup {
381 /// (`Lu`) An uppercase letter
382 pub const UppercaseLetter: GeneralCategoryGroup = GCG(1 << (GC::UppercaseLetter as u32));
383 /// (`Ll`) A lowercase letter
384 pub const LowercaseLetter: GeneralCategoryGroup = GCG(1 << (GC::LowercaseLetter as u32));
385 /// (`Lt`) A digraphic letter, with first part uppercase
386 pub const TitlecaseLetter: GeneralCategoryGroup = GCG(1 << (GC::TitlecaseLetter as u32));
387 /// (`Lm`) A modifier letter
388 pub const ModifierLetter: GeneralCategoryGroup = GCG(1 << (GC::ModifierLetter as u32));
389 /// (`Lo`) Other letters, including syllables and ideographs
390 pub const OtherLetter: GeneralCategoryGroup = GCG(1 << (GC::OtherLetter as u32));
391 /// (`LC`) The union of UppercaseLetter, LowercaseLetter, and TitlecaseLetter
392 pub const CasedLetter: GeneralCategoryGroup = GCG((1 << (GC::UppercaseLetter as u32))
393 | (1 << (GC::LowercaseLetter as u32))
394 | (1 << (GC::TitlecaseLetter as u32)));
395 /// (`L`) The union of all letter categories
396 pub const Letter: GeneralCategoryGroup = GCG((1 << (GC::UppercaseLetter as u32))
397 | (1 << (GC::LowercaseLetter as u32))
398 | (1 << (GC::TitlecaseLetter as u32))
399 | (1 << (GC::ModifierLetter as u32))
400 | (1 << (GC::OtherLetter as u32)));
401
402 /// (`Mn`) A nonspacing combining mark (zero advance width)
403 pub const NonspacingMark: GeneralCategoryGroup = GCG(1 << (GC::NonspacingMark as u32));
404 /// (`Mc`) A spacing combining mark (positive advance width)
405 pub const EnclosingMark: GeneralCategoryGroup = GCG(1 << (GC::EnclosingMark as u32));
406 /// (`Me`) An enclosing combining mark
407 pub const SpacingMark: GeneralCategoryGroup = GCG(1 << (GC::SpacingMark as u32));
408 /// (`M`) The union of all mark categories
409 pub const Mark: GeneralCategoryGroup = GCG((1 << (GC::NonspacingMark as u32))
410 | (1 << (GC::EnclosingMark as u32))
411 | (1 << (GC::SpacingMark as u32)));
412
413 /// (`Nd`) A decimal digit
414 pub const DecimalNumber: GeneralCategoryGroup = GCG(1 << (GC::DecimalNumber as u32));
415 /// (`Nl`) A letterlike numeric character
416 pub const LetterNumber: GeneralCategoryGroup = GCG(1 << (GC::LetterNumber as u32));
417 /// (`No`) A numeric character of other type
418 pub const OtherNumber: GeneralCategoryGroup = GCG(1 << (GC::OtherNumber as u32));
419 /// (`N`) The union of all number categories
420 pub const Number: GeneralCategoryGroup = GCG((1 << (GC::DecimalNumber as u32))
421 | (1 << (GC::LetterNumber as u32))
422 | (1 << (GC::OtherNumber as u32)));
423
424 /// (`Zs`) A space character (of various non-zero widths)
425 pub const SpaceSeparator: GeneralCategoryGroup = GCG(1 << (GC::SpaceSeparator as u32));
426 /// (`Zl`) U+2028 LINE SEPARATOR only
427 pub const LineSeparator: GeneralCategoryGroup = GCG(1 << (GC::LineSeparator as u32));
428 /// (`Zp`) U+2029 PARAGRAPH SEPARATOR only
429 pub const ParagraphSeparator: GeneralCategoryGroup = GCG(1 << (GC::ParagraphSeparator as u32));
430 /// (`Z`) The union of all separator categories
431 pub const Separator: GeneralCategoryGroup = GCG((1 << (GC::SpaceSeparator as u32))
432 | (1 << (GC::LineSeparator as u32))
433 | (1 << (GC::ParagraphSeparator as u32)));
434
435 /// (`Cc`) A C0 or C1 control code
436 pub const Control: GeneralCategoryGroup = GCG(1 << (GC::Control as u32));
437 /// (`Cf`) A format control character
438 pub const Format: GeneralCategoryGroup = GCG(1 << (GC::Format as u32));
439 /// (`Co`) A private-use character
440 pub const PrivateUse: GeneralCategoryGroup = GCG(1 << (GC::PrivateUse as u32));
441 /// (`Cs`) A surrogate code point
442 pub const Surrogate: GeneralCategoryGroup = GCG(1 << (GC::Surrogate as u32));
443 /// (`Cn`) A reserved unassigned code point or a noncharacter
444 pub const Unassigned: GeneralCategoryGroup = GCG(1 << (GC::Unassigned as u32));
445 /// (`C`) The union of all control code, reserved, and unassigned categories
446 pub const Other: GeneralCategoryGroup = GCG((1 << (GC::Control as u32))
447 | (1 << (GC::Format as u32))
448 | (1 << (GC::PrivateUse as u32))
449 | (1 << (GC::Surrogate as u32))
450 | (1 << (GC::Unassigned as u32)));
451
452 /// (`Pd`) A dash or hyphen punctuation mark
453 pub const DashPunctuation: GeneralCategoryGroup = GCG(1 << (GC::DashPunctuation as u32));
454 /// (`Ps`) An opening punctuation mark (of a pair)
455 pub const OpenPunctuation: GeneralCategoryGroup = GCG(1 << (GC::OpenPunctuation as u32));
456 /// (`Pe`) A closing punctuation mark (of a pair)
457 pub const ClosePunctuation: GeneralCategoryGroup = GCG(1 << (GC::ClosePunctuation as u32));
458 /// (`Pc`) A connecting punctuation mark, like a tie
459 pub const ConnectorPunctuation: GeneralCategoryGroup =
460 GCG(1 << (GC::ConnectorPunctuation as u32));
461 /// (`Pi`) An initial quotation mark
462 pub const InitialPunctuation: GeneralCategoryGroup = GCG(1 << (GC::InitialPunctuation as u32));
463 /// (`Pf`) A final quotation mark
464 pub const FinalPunctuation: GeneralCategoryGroup = GCG(1 << (GC::FinalPunctuation as u32));
465 /// (`Po`) A punctuation mark of other type
466 pub const OtherPunctuation: GeneralCategoryGroup = GCG(1 << (GC::OtherPunctuation as u32));
467 /// (`P`) The union of all punctuation categories
468 pub const Punctuation: GeneralCategoryGroup = GCG((1 << (GC::DashPunctuation as u32))
469 | (1 << (GC::OpenPunctuation as u32))
470 | (1 << (GC::ClosePunctuation as u32))
471 | (1 << (GC::ConnectorPunctuation as u32))
472 | (1 << (GC::OtherPunctuation as u32))
473 | (1 << (GC::InitialPunctuation as u32))
474 | (1 << (GC::FinalPunctuation as u32)));
475
476 /// (`Sm`) A symbol of mathematical use
477 pub const MathSymbol: GeneralCategoryGroup = GCG(1 << (GC::MathSymbol as u32));
478 /// (`Sc`) A currency sign
479 pub const CurrencySymbol: GeneralCategoryGroup = GCG(1 << (GC::CurrencySymbol as u32));
480 /// (`Sk`) A non-letterlike modifier symbol
481 pub const ModifierSymbol: GeneralCategoryGroup = GCG(1 << (GC::ModifierSymbol as u32));
482 /// (`So`) A symbol of other type
483 pub const OtherSymbol: GeneralCategoryGroup = GCG(1 << (GC::OtherSymbol as u32));
484 /// (`S`) The union of all symbol categories
485 pub const Symbol: GeneralCategoryGroup = GCG((1 << (GC::MathSymbol as u32))
486 | (1 << (GC::CurrencySymbol as u32))
487 | (1 << (GC::ModifierSymbol as u32))
488 | (1 << (GC::OtherSymbol as u32)));
489
490 const ALL: u32 = (1 << (GC::FinalPunctuation as u32 + 1)) - 1;
491
492 /// Return whether the code point belongs in the provided multi-value category.
493 ///
494 /// ```
495 /// use icu::properties::props::{GeneralCategory, GeneralCategoryGroup};
496 /// use icu::properties::CodePointMapData;
497 ///
498 /// let gc = CodePointMapData::<GeneralCategory>::new();
499 ///
500 /// assert_eq!(gc.get('A'), GeneralCategory::UppercaseLetter);
501 /// assert!(GeneralCategoryGroup::CasedLetter.contains(gc.get('A')));
502 ///
503 /// // U+0B1E ORIYA LETTER NYA
504 /// assert_eq!(gc.get('ଞ'), GeneralCategory::OtherLetter);
505 /// assert!(GeneralCategoryGroup::Letter.contains(gc.get('ଞ')));
506 /// assert!(!GeneralCategoryGroup::CasedLetter.contains(gc.get('ଞ')));
507 ///
508 /// // U+0301 COMBINING ACUTE ACCENT
509 /// assert_eq!(gc.get('\u{0301}'), GeneralCategory::NonspacingMark);
510 /// assert!(GeneralCategoryGroup::Mark.contains(gc.get('\u{0301}')));
511 /// assert!(!GeneralCategoryGroup::Letter.contains(gc.get('\u{0301}')));
512 ///
513 /// assert_eq!(gc.get('0'), GeneralCategory::DecimalNumber);
514 /// assert!(GeneralCategoryGroup::Number.contains(gc.get('0')));
515 /// assert!(!GeneralCategoryGroup::Mark.contains(gc.get('0')));
516 ///
517 /// assert_eq!(gc.get('('), GeneralCategory::OpenPunctuation);
518 /// assert!(GeneralCategoryGroup::Punctuation.contains(gc.get('(')));
519 /// assert!(!GeneralCategoryGroup::Number.contains(gc.get('(')));
520 ///
521 /// // U+2713 CHECK MARK
522 /// assert_eq!(gc.get('✓'), GeneralCategory::OtherSymbol);
523 /// assert!(GeneralCategoryGroup::Symbol.contains(gc.get('✓')));
524 /// assert!(!GeneralCategoryGroup::Punctuation.contains(gc.get('✓')));
525 ///
526 /// assert_eq!(gc.get(' '), GeneralCategory::SpaceSeparator);
527 /// assert!(GeneralCategoryGroup::Separator.contains(gc.get(' ')));
528 /// assert!(!GeneralCategoryGroup::Symbol.contains(gc.get(' ')));
529 ///
530 /// // U+E007F CANCEL TAG
531 /// assert_eq!(gc.get('\u{E007F}'), GeneralCategory::Format);
532 /// assert!(GeneralCategoryGroup::Other.contains(gc.get('\u{E007F}')));
533 /// assert!(!GeneralCategoryGroup::Separator.contains(gc.get('\u{E007F}')));
534 /// ```
535 pub const fn contains(self, val: GeneralCategory) -> bool {
536 0 != (1 << (val as u32)) & self.0
537 }
538
539 /// Produce a GeneralCategoryGroup that is the inverse of this one
540 ///
541 /// # Example
542 ///
543 /// ```rust
544 /// use icu::properties::props::{GeneralCategory, GeneralCategoryGroup};
545 ///
546 /// let letter = GeneralCategoryGroup::Letter;
547 /// let not_letter = letter.complement();
548 ///
549 /// assert!(not_letter.contains(GeneralCategory::MathSymbol));
550 /// assert!(!letter.contains(GeneralCategory::MathSymbol));
551 /// assert!(not_letter.contains(GeneralCategory::OtherPunctuation));
552 /// assert!(!letter.contains(GeneralCategory::OtherPunctuation));
553 /// assert!(!not_letter.contains(GeneralCategory::UppercaseLetter));
554 /// assert!(letter.contains(GeneralCategory::UppercaseLetter));
555 /// ```
556 pub const fn complement(self) -> Self {
557 // Mask off things not in Self::ALL to guarantee the mask
558 // values stay in-range
559 GeneralCategoryGroup(!self.0 & Self::ALL)
560 }
561
562 /// Return the group representing all GeneralCategory values
563 ///
564 /// # Example
565 ///
566 /// ```rust
567 /// use icu::properties::props::{GeneralCategory, GeneralCategoryGroup};
568 ///
569 /// let all = GeneralCategoryGroup::all();
570 ///
571 /// assert!(all.contains(GeneralCategory::MathSymbol));
572 /// assert!(all.contains(GeneralCategory::OtherPunctuation));
573 /// assert!(all.contains(GeneralCategory::UppercaseLetter));
574 /// ```
575 pub const fn all() -> Self {
576 Self(Self::ALL)
577 }
578
579 /// Return the empty group
580 ///
581 /// # Example
582 ///
583 /// ```rust
584 /// use icu::properties::props::{GeneralCategory, GeneralCategoryGroup};
585 ///
586 /// let empty = GeneralCategoryGroup::empty();
587 ///
588 /// assert!(!empty.contains(GeneralCategory::MathSymbol));
589 /// assert!(!empty.contains(GeneralCategory::OtherPunctuation));
590 /// assert!(!empty.contains(GeneralCategory::UppercaseLetter));
591 /// ```
592 pub const fn empty() -> Self {
593 Self(0)
594 }
595
596 /// Take the union of two groups
597 ///
598 /// # Example
599 ///
600 /// ```rust
601 /// use icu::properties::props::{GeneralCategory, GeneralCategoryGroup};
602 ///
603 /// let letter = GeneralCategoryGroup::Letter;
604 /// let symbol = GeneralCategoryGroup::Symbol;
605 /// let union = letter.union(symbol);
606 ///
607 /// assert!(union.contains(GeneralCategory::MathSymbol));
608 /// assert!(!union.contains(GeneralCategory::OtherPunctuation));
609 /// assert!(union.contains(GeneralCategory::UppercaseLetter));
610 /// ```
611 pub const fn union(self, other: Self) -> Self {
612 Self(self.0 | other.0)
613 }
614
615 /// Take the intersection of two groups
616 ///
617 /// # Example
618 ///
619 /// ```rust
620 /// use icu::properties::props::{GeneralCategory, GeneralCategoryGroup};
621 ///
622 /// let letter = GeneralCategoryGroup::Letter;
623 /// let lu = GeneralCategoryGroup::UppercaseLetter;
624 /// let intersection = letter.intersection(lu);
625 ///
626 /// assert!(!intersection.contains(GeneralCategory::MathSymbol));
627 /// assert!(!intersection.contains(GeneralCategory::OtherPunctuation));
628 /// assert!(intersection.contains(GeneralCategory::UppercaseLetter));
629 /// assert!(!intersection.contains(GeneralCategory::LowercaseLetter));
630 /// ```
631 pub const fn intersection(self, other: Self) -> Self {
632 Self(self.0 & other.0)
633 }
634}
635
636impl From<GeneralCategory> for GeneralCategoryGroup {
637 fn from(subcategory: GeneralCategory) -> Self {
638 GeneralCategoryGroup(1 << (subcategory as u32))
639 }
640}
641impl From<u32> for GeneralCategoryGroup {
642 fn from(mask: u32) -> Self {
643 // Mask off things not in Self::ALL to guarantee the mask
644 // values stay in-range
645 GeneralCategoryGroup(mask & Self::ALL)
646 }
647}
648impl From<GeneralCategoryGroup> for u32 {
649 fn from(group: GeneralCategoryGroup) -> Self {
650 group.0
651 }
652}
653
654/// Enumerated property Script.
655///
656/// This is used with both the Script and Script_Extensions Unicode properties.
657/// Each character is assigned a single Script, but characters that are used in
658/// a particular subset of scripts will be in more than one Script_Extensions set.
659/// For example, DEVANAGARI DIGIT NINE has Script=Devanagari, but is also in the
660/// Script_Extensions set for Dogra, Kaithi, and Mahajani. If you are trying to
661/// determine whether a code point belongs to a certain script, you should use
662/// [`ScriptWithExtensionsBorrowed::has_script`].
663///
664/// For more information, see UAX #24: <http://www.unicode.org/reports/tr24/>.
665/// See `UScriptCode` in ICU4C.
666///
667/// # Example
668///
669/// ```
670/// use icu::properties::{CodePointMapData, props::Script};
671///
672/// assert_eq!(CodePointMapData::<Script>::new().get('木'), Script::Han); // U+6728
673/// assert_eq!(CodePointMapData::<Script>::new().get('🎃'), Script::Common); // U+1F383 JACK-O-LANTERN
674/// ```
675/// [`ScriptWithExtensionsBorrowed::has_script`]: crate::script::ScriptWithExtensionsBorrowed::has_script
676#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
677#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
678#[cfg_attr(feature = "datagen", derive(databake::Bake))]
679#[cfg_attr(feature = "datagen", databake(path = icu_properties::props))]
680#[allow(clippy::exhaustive_structs)] // newtype
681#[repr(transparent)]
682pub struct Script(pub(crate) u16);
683
684impl Script {
685 /// Returns an ICU4C `UScriptCode` value.
686 pub const fn to_icu4c_value(self) -> u16 {
687 self.0
688 }
689 /// Constructor from an ICU4C `UScriptCode` value.
690 pub const fn from_icu4c_value(value: u16) -> Self {
691 Self(value)
692 }
693}
694
695create_const_array! {
696#[allow(missing_docs)] // These constants don't need individual documentation.
697#[allow(non_upper_case_globals)]
698impl Script {
699 pub const Adlam: Script = Script(167);
700 pub const Ahom: Script = Script(161);
701 pub const AnatolianHieroglyphs: Script = Script(156);
702 pub const Arabic: Script = Script(2);
703 pub const Armenian: Script = Script(3);
704 pub const Avestan: Script = Script(117);
705 pub const Balinese: Script = Script(62);
706 pub const Bamum: Script = Script(130);
707 pub const BassaVah: Script = Script(134);
708 pub const Batak: Script = Script(63);
709 pub const Bengali: Script = Script(4);
710 pub const Bhaiksuki: Script = Script(168);
711 pub const Bopomofo: Script = Script(5);
712 pub const Brahmi: Script = Script(65);
713 pub const Braille: Script = Script(46);
714 pub const Buginese: Script = Script(55);
715 pub const Buhid: Script = Script(44);
716 pub const CanadianAboriginal: Script = Script(40);
717 pub const Carian: Script = Script(104);
718 pub const CaucasianAlbanian: Script = Script(159);
719 pub const Chakma: Script = Script(118);
720 pub const Cham: Script = Script(66);
721 pub const Cherokee: Script = Script(6);
722 pub const Chorasmian: Script = Script(189);
723 pub const Common: Script = Script(0);
724 pub const Coptic: Script = Script(7);
725 pub const Cuneiform: Script = Script(101);
726 pub const Cypriot: Script = Script(47);
727 pub const CyproMinoan: Script = Script(193);
728 pub const Cyrillic: Script = Script(8);
729 pub const Deseret: Script = Script(9);
730 pub const Devanagari: Script = Script(10);
731 pub const DivesAkuru: Script = Script(190);
732 pub const Dogra: Script = Script(178);
733 pub const Duployan: Script = Script(135);
734 pub const EgyptianHieroglyphs: Script = Script(71);
735 pub const Elbasan: Script = Script(136);
736 pub const Elymaic: Script = Script(185);
737 pub const Ethiopian: Script = Script(11);
738 pub const Georgian: Script = Script(12);
739 pub const Glagolitic: Script = Script(56);
740 pub const Gothic: Script = Script(13);
741 pub const Grantha: Script = Script(137);
742 pub const Greek: Script = Script(14);
743 pub const Gujarati: Script = Script(15);
744 pub const GunjalaGondi: Script = Script(179);
745 pub const Gurmukhi: Script = Script(16);
746 pub const Han: Script = Script(17);
747 pub const Hangul: Script = Script(18);
748 pub const HanifiRohingya: Script = Script(182);
749 pub const Hanunoo: Script = Script(43);
750 pub const Hatran: Script = Script(162);
751 pub const Hebrew: Script = Script(19);
752 pub const Hiragana: Script = Script(20);
753 pub const ImperialAramaic: Script = Script(116);
754 pub const Inherited: Script = Script(1);
755 pub const InscriptionalPahlavi: Script = Script(122);
756 pub const InscriptionalParthian: Script = Script(125);
757 pub const Javanese: Script = Script(78);
758 pub const Kaithi: Script = Script(120);
759 pub const Kannada: Script = Script(21);
760 pub const Katakana: Script = Script(22);
761 pub const Kawi: Script = Script(198);
762 pub const KayahLi: Script = Script(79);
763 pub const Kharoshthi: Script = Script(57);
764 pub const KhitanSmallScript: Script = Script(191);
765 pub const Khmer: Script = Script(23);
766 pub const Khojki: Script = Script(157);
767 pub const Khudawadi: Script = Script(145);
768 pub const Lao: Script = Script(24);
769 pub const Latin: Script = Script(25);
770 pub const Lepcha: Script = Script(82);
771 pub const Limbu: Script = Script(48);
772 pub const LinearA: Script = Script(83);
773 pub const LinearB: Script = Script(49);
774 pub const Lisu: Script = Script(131);
775 pub const Lycian: Script = Script(107);
776 pub const Lydian: Script = Script(108);
777 pub const Mahajani: Script = Script(160);
778 pub const Makasar: Script = Script(180);
779 pub const Malayalam: Script = Script(26);
780 pub const Mandaic: Script = Script(84);
781 pub const Manichaean: Script = Script(121);
782 pub const Marchen: Script = Script(169);
783 pub const MasaramGondi: Script = Script(175);
784 pub const Medefaidrin: Script = Script(181);
785 pub const MeeteiMayek: Script = Script(115);
786 pub const MendeKikakui: Script = Script(140);
787 pub const MeroiticCursive: Script = Script(141);
788 pub const MeroiticHieroglyphs: Script = Script(86);
789 pub const Miao: Script = Script(92);
790 pub const Modi: Script = Script(163);
791 pub const Mongolian: Script = Script(27);
792 pub const Mro: Script = Script(149);
793 pub const Multani: Script = Script(164);
794 pub const Myanmar: Script = Script(28);
795 pub const Nabataean: Script = Script(143);
796 pub const NagMundari: Script = Script(199);
797 pub const Nandinagari: Script = Script(187);
798 pub const Nastaliq: Script = Script(200);
799 pub const NewTaiLue: Script = Script(59);
800 pub const Newa: Script = Script(170);
801 pub const Nko: Script = Script(87);
802 pub const Nushu: Script = Script(150);
803 pub const NyiakengPuachueHmong: Script = Script(186);
804 pub const Ogham: Script = Script(29);
805 pub const OlChiki: Script = Script(109);
806 pub const OldHungarian: Script = Script(76);
807 pub const OldItalic: Script = Script(30);
808 pub const OldNorthArabian: Script = Script(142);
809 pub const OldPermic: Script = Script(89);
810 pub const OldPersian: Script = Script(61);
811 pub const OldSogdian: Script = Script(184);
812 pub const OldSouthArabian: Script = Script(133);
813 pub const OldTurkic: Script = Script(88);
814 pub const OldUyghur: Script = Script(194);
815 pub const Oriya: Script = Script(31);
816 pub const Osage: Script = Script(171);
817 pub const Osmanya: Script = Script(50);
818 pub const PahawhHmong: Script = Script(75);
819 pub const Palmyrene: Script = Script(144);
820 pub const PauCinHau: Script = Script(165);
821 pub const PhagsPa: Script = Script(90);
822 pub const Phoenician: Script = Script(91);
823 pub const PsalterPahlavi: Script = Script(123);
824 pub const Rejang: Script = Script(110);
825 pub const Runic: Script = Script(32);
826 pub const Samaritan: Script = Script(126);
827 pub const Saurashtra: Script = Script(111);
828 pub const Sharada: Script = Script(151);
829 pub const Shavian: Script = Script(51);
830 pub const Siddham: Script = Script(166);
831 pub const SignWriting: Script = Script(112);
832 pub const Sinhala: Script = Script(33);
833 pub const Sogdian: Script = Script(183);
834 pub const SoraSompeng: Script = Script(152);
835 pub const Soyombo: Script = Script(176);
836 pub const Sundanese: Script = Script(113);
837 pub const SylotiNagri: Script = Script(58);
838 pub const Syriac: Script = Script(34);
839 pub const Tagalog: Script = Script(42);
840 pub const Tagbanwa: Script = Script(45);
841 pub const TaiLe: Script = Script(52);
842 pub const TaiTham: Script = Script(106);
843 pub const TaiViet: Script = Script(127);
844 pub const Takri: Script = Script(153);
845 pub const Tamil: Script = Script(35);
846 pub const Tangsa: Script = Script(195);
847 pub const Tangut: Script = Script(154);
848 pub const Telugu: Script = Script(36);
849 pub const Thaana: Script = Script(37);
850 pub const Thai: Script = Script(38);
851 pub const Tibetan: Script = Script(39);
852 pub const Tifinagh: Script = Script(60);
853 pub const Tirhuta: Script = Script(158);
854 pub const Toto: Script = Script(196);
855 pub const Ugaritic: Script = Script(53);
856 pub const Unknown: Script = Script(103);
857 pub const Vai: Script = Script(99);
858 pub const Vithkuqi: Script = Script(197);
859 pub const Wancho: Script = Script(188);
860 pub const WarangCiti: Script = Script(146);
861 pub const Yezidi: Script = Script(192);
862 pub const Yi: Script = Script(41);
863 pub const ZanabazarSquare: Script = Script(177);
864}
865}
866
867make_enumerated_property! {
868 name: "Script";
869 short_name: "sc";
870 ident: Script;
871 data_marker: crate::provider::PropertyEnumScriptV1;
872 singleton: SINGLETON_PROPERTY_ENUM_SCRIPT_V1;
873 ule_ty: <u16 as zerovec::ule::AsULE>::ULE;
874}
875
876/// Enumerated property Hangul_Syllable_Type
877///
878/// The Unicode standard provides both precomposed Hangul syllables and conjoining Jamo to compose
879/// arbitrary Hangul syllables. This property provides that ontology of Hangul code points.
880///
881/// For more information, see the [Unicode Korean FAQ](https://www.unicode.org/faq/korean.html).
882///
883/// # Example
884///
885/// ```
886/// use icu::properties::{CodePointMapData, props::HangulSyllableType};
887///
888/// assert_eq!(CodePointMapData::<HangulSyllableType>::new().get('ᄀ'), HangulSyllableType::LeadingJamo); // U+1100
889/// assert_eq!(CodePointMapData::<HangulSyllableType>::new().get('가'), HangulSyllableType::LeadingVowelSyllable); // U+AC00
890/// ```
891#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
892#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
893#[cfg_attr(feature = "datagen", derive(databake::Bake))]
894#[cfg_attr(feature = "datagen", databake(path = icu_properties::props))]
895#[allow(clippy::exhaustive_structs)] // newtype
896#[repr(transparent)]
897pub struct HangulSyllableType(pub(crate) u8);
898
899impl HangulSyllableType {
900 /// Returns an ICU4C `UHangulSyllableType` value.
901 pub const fn to_icu4c_value(self) -> u8 {
902 self.0
903 }
904 /// Constructor from an ICU4C `UHangulSyllableType` value.
905 pub const fn from_icu4c_value(value: u8) -> Self {
906 Self(value)
907 }
908}
909
910create_const_array! {
911#[allow(non_upper_case_globals)]
912impl HangulSyllableType {
913 /// (`NA`) not applicable (e.g. not a Hangul code point).
914 pub const NotApplicable: HangulSyllableType = HangulSyllableType(0);
915 /// (`L`) a conjoining leading consonant Jamo.
916 pub const LeadingJamo: HangulSyllableType = HangulSyllableType(1);
917 /// (`V`) a conjoining vowel Jamo.
918 pub const VowelJamo: HangulSyllableType = HangulSyllableType(2);
919 /// (`T`) a conjoining trailing consonant Jamo.
920 pub const TrailingJamo: HangulSyllableType = HangulSyllableType(3);
921 /// (`LV`) a precomposed syllable with a leading consonant and a vowel.
922 pub const LeadingVowelSyllable: HangulSyllableType = HangulSyllableType(4);
923 /// (`LVT`) a precomposed syllable with a leading consonant, a vowel, and a trailing consonant.
924 pub const LeadingVowelTrailingSyllable: HangulSyllableType = HangulSyllableType(5);
925}
926}
927
928make_enumerated_property! {
929 name: "Hangul_Syllable_Type";
930 short_name: "hst";
931 ident: HangulSyllableType;
932 data_marker: crate::provider::PropertyEnumHangulSyllableTypeV1;
933 singleton: SINGLETON_PROPERTY_ENUM_HANGUL_SYLLABLE_TYPE_V1;
934 ule_ty: u8;
935
936}
937
938/// Enumerated property East_Asian_Width.
939///
940/// See "Definition" in UAX #11 for the summary of each property value:
941/// <https://www.unicode.org/reports/tr11/#Definitions>
942///
943/// # Example
944///
945/// ```
946/// use icu::properties::{CodePointMapData, props::EastAsianWidth};
947///
948/// assert_eq!(CodePointMapData::<EastAsianWidth>::new().get('ア'), EastAsianWidth::Halfwidth); // U+FF71: Halfwidth Katakana Letter A
949/// assert_eq!(CodePointMapData::<EastAsianWidth>::new().get('ア'), EastAsianWidth::Wide); //U+30A2: Katakana Letter A
950/// ```
951#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
952#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
953#[cfg_attr(feature = "datagen", derive(databake::Bake))]
954#[cfg_attr(feature = "datagen", databake(path = icu_properties::props))]
955#[allow(clippy::exhaustive_structs)] // newtype
956#[repr(transparent)]
957pub struct EastAsianWidth(pub(crate) u8);
958
959impl EastAsianWidth {
960 /// Returns an ICU4C `UEastAsianWidth` value.
961 pub const fn to_icu4c_value(self) -> u8 {
962 self.0
963 }
964 /// Constructor from an ICU4C `UEastAsianWidth` value.
965 pub const fn from_icu4c_value(value: u8) -> Self {
966 Self(value)
967 }
968}
969
970create_const_array! {
971#[allow(missing_docs)] // These constants don't need individual documentation.
972#[allow(non_upper_case_globals)]
973impl EastAsianWidth {
974 pub const Neutral: EastAsianWidth = EastAsianWidth(0); //name="N"
975 pub const Ambiguous: EastAsianWidth = EastAsianWidth(1); //name="A"
976 pub const Halfwidth: EastAsianWidth = EastAsianWidth(2); //name="H"
977 pub const Fullwidth: EastAsianWidth = EastAsianWidth(3); //name="F"
978 pub const Narrow: EastAsianWidth = EastAsianWidth(4); //name="Na"
979 pub const Wide: EastAsianWidth = EastAsianWidth(5); //name="W"
980}
981}
982
983make_enumerated_property! {
984 name: "East_Asian_Width";
985 short_name: "ea";
986 ident: EastAsianWidth;
987 data_marker: crate::provider::PropertyEnumEastAsianWidthV1;
988 singleton: SINGLETON_PROPERTY_ENUM_EAST_ASIAN_WIDTH_V1;
989 ule_ty: u8;
990}
991
992/// Enumerated property Line_Break.
993///
994/// See "Line Breaking Properties" in UAX #14 for the summary of each property
995/// value: <https://www.unicode.org/reports/tr14/#Properties>
996///
997/// The numeric value is compatible with `ULineBreak` in ICU4C.
998///
999/// **Note:** Use `icu::segmenter` for an all-in-one break iterator implementation.
1000///
1001/// # Example
1002///
1003/// ```
1004/// use icu::properties::{CodePointMapData, props::LineBreak};
1005///
1006/// assert_eq!(CodePointMapData::<LineBreak>::new().get(')'), LineBreak::CloseParenthesis); // U+0029: Right Parenthesis
1007/// assert_eq!(CodePointMapData::<LineBreak>::new().get('ぁ'), LineBreak::ConditionalJapaneseStarter); //U+3041: Hiragana Letter Small A
1008/// ```
1009#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
1010#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
1011#[cfg_attr(feature = "datagen", derive(databake::Bake))]
1012#[cfg_attr(feature = "datagen", databake(path = icu_properties::props))]
1013#[allow(clippy::exhaustive_structs)] // newtype
1014#[repr(transparent)]
1015pub struct LineBreak(pub(crate) u8);
1016
1017impl LineBreak {
1018 /// Returns an ICU4C `ULineBreak` value.
1019 pub const fn to_icu4c_value(self) -> u8 {
1020 self.0
1021 }
1022 /// Constructor from an ICU4C `ULineBreak` value.
1023 pub const fn from_icu4c_value(value: u8) -> Self {
1024 Self(value)
1025 }
1026}
1027
1028create_const_array! {
1029#[allow(missing_docs)] // These constants don't need individual documentation.
1030#[allow(non_upper_case_globals)]
1031impl LineBreak {
1032 pub const Unknown: LineBreak = LineBreak(0); // name="XX"
1033 pub const Ambiguous: LineBreak = LineBreak(1); // name="AI"
1034 pub const Alphabetic: LineBreak = LineBreak(2); // name="AL"
1035 pub const BreakBoth: LineBreak = LineBreak(3); // name="B2"
1036 pub const BreakAfter: LineBreak = LineBreak(4); // name="BA"
1037 pub const BreakBefore: LineBreak = LineBreak(5); // name="BB"
1038 pub const MandatoryBreak: LineBreak = LineBreak(6); // name="BK"
1039 pub const ContingentBreak: LineBreak = LineBreak(7); // name="CB"
1040 pub const ClosePunctuation: LineBreak = LineBreak(8); // name="CL"
1041 pub const CombiningMark: LineBreak = LineBreak(9); // name="CM"
1042 pub const CarriageReturn: LineBreak = LineBreak(10); // name="CR"
1043 pub const Exclamation: LineBreak = LineBreak(11); // name="EX"
1044 pub const Glue: LineBreak = LineBreak(12); // name="GL"
1045 pub const Hyphen: LineBreak = LineBreak(13); // name="HY"
1046 pub const Ideographic: LineBreak = LineBreak(14); // name="ID"
1047 pub const Inseparable: LineBreak = LineBreak(15); // name="IN"
1048 pub const InfixNumeric: LineBreak = LineBreak(16); // name="IS"
1049 pub const LineFeed: LineBreak = LineBreak(17); // name="LF"
1050 pub const Nonstarter: LineBreak = LineBreak(18); // name="NS"
1051 pub const Numeric: LineBreak = LineBreak(19); // name="NU"
1052 pub const OpenPunctuation: LineBreak = LineBreak(20); // name="OP"
1053 pub const PostfixNumeric: LineBreak = LineBreak(21); // name="PO"
1054 pub const PrefixNumeric: LineBreak = LineBreak(22); // name="PR"
1055 pub const Quotation: LineBreak = LineBreak(23); // name="QU"
1056 pub const ComplexContext: LineBreak = LineBreak(24); // name="SA"
1057 pub const Surrogate: LineBreak = LineBreak(25); // name="SG"
1058 pub const Space: LineBreak = LineBreak(26); // name="SP"
1059 pub const BreakSymbols: LineBreak = LineBreak(27); // name="SY"
1060 pub const ZWSpace: LineBreak = LineBreak(28); // name="ZW"
1061 pub const NextLine: LineBreak = LineBreak(29); // name="NL"
1062 pub const WordJoiner: LineBreak = LineBreak(30); // name="WJ"
1063 pub const H2: LineBreak = LineBreak(31); // name="H2"
1064 pub const H3: LineBreak = LineBreak(32); // name="H3"
1065 pub const JL: LineBreak = LineBreak(33); // name="JL"
1066 pub const JT: LineBreak = LineBreak(34); // name="JT"
1067 pub const JV: LineBreak = LineBreak(35); // name="JV"
1068 pub const CloseParenthesis: LineBreak = LineBreak(36); // name="CP"
1069 pub const ConditionalJapaneseStarter: LineBreak = LineBreak(37); // name="CJ"
1070 pub const HebrewLetter: LineBreak = LineBreak(38); // name="HL"
1071 pub const RegionalIndicator: LineBreak = LineBreak(39); // name="RI"
1072 pub const EBase: LineBreak = LineBreak(40); // name="EB"
1073 pub const EModifier: LineBreak = LineBreak(41); // name="EM"
1074 pub const ZWJ: LineBreak = LineBreak(42); // name="ZWJ"
1075
1076 // Added in ICU 74:
1077 pub const Aksara: LineBreak = LineBreak(43); // name="AK"
1078 pub const AksaraPrebase: LineBreak = LineBreak(44); // name=AP"
1079 pub const AksaraStart: LineBreak = LineBreak(45); // name=AS"
1080 pub const ViramaFinal: LineBreak = LineBreak(46); // name=VF"
1081 pub const Virama: LineBreak = LineBreak(47); // name=VI"
1082}
1083}
1084
1085make_enumerated_property! {
1086 name: "Line_Break";
1087 short_name: "lb";
1088 ident: LineBreak;
1089 data_marker: crate::provider::PropertyEnumLineBreakV1;
1090 singleton: SINGLETON_PROPERTY_ENUM_LINE_BREAK_V1;
1091 ule_ty: u8;
1092}
1093
1094/// Enumerated property Grapheme_Cluster_Break.
1095///
1096/// See "Default Grapheme Cluster Boundary Specification" in UAX #29 for the
1097/// summary of each property value:
1098/// <https://www.unicode.org/reports/tr29/#Default_Grapheme_Cluster_Table>
1099///
1100/// **Note:** Use `icu::segmenter` for an all-in-one break iterator implementation.
1101///
1102/// # Example
1103///
1104/// ```
1105/// use icu::properties::{CodePointMapData, props::GraphemeClusterBreak};
1106///
1107/// assert_eq!(CodePointMapData::<GraphemeClusterBreak>::new().get('🇦'), GraphemeClusterBreak::RegionalIndicator); // U+1F1E6: Regional Indicator Symbol Letter A
1108/// assert_eq!(CodePointMapData::<GraphemeClusterBreak>::new().get('ำ'), GraphemeClusterBreak::SpacingMark); //U+0E33: Thai Character Sara Am
1109/// ```
1110#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
1111#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
1112#[cfg_attr(feature = "datagen", derive(databake::Bake))]
1113#[cfg_attr(feature = "datagen", databake(path = icu_properties::props))]
1114#[allow(clippy::exhaustive_structs)] // this type is stable
1115#[repr(transparent)]
1116pub struct GraphemeClusterBreak(pub(crate) u8);
1117
1118impl GraphemeClusterBreak {
1119 /// Returns an ICU4C `UGraphemeClusterBreak` value.
1120 pub const fn to_icu4c_value(self) -> u8 {
1121 self.0
1122 }
1123 /// Constructor from an ICU4C `UGraphemeClusterBreak` value.
1124 pub const fn from_icu4c_value(value: u8) -> Self {
1125 Self(value)
1126 }
1127}
1128
1129create_const_array! {
1130#[allow(missing_docs)] // These constants don't need individual documentation.
1131#[allow(non_upper_case_globals)]
1132impl GraphemeClusterBreak {
1133 pub const Other: GraphemeClusterBreak = GraphemeClusterBreak(0); // name="XX"
1134 pub const Control: GraphemeClusterBreak = GraphemeClusterBreak(1); // name="CN"
1135 pub const CR: GraphemeClusterBreak = GraphemeClusterBreak(2); // name="CR"
1136 pub const Extend: GraphemeClusterBreak = GraphemeClusterBreak(3); // name="EX"
1137 pub const L: GraphemeClusterBreak = GraphemeClusterBreak(4); // name="L"
1138 pub const LF: GraphemeClusterBreak = GraphemeClusterBreak(5); // name="LF"
1139 pub const LV: GraphemeClusterBreak = GraphemeClusterBreak(6); // name="LV"
1140 pub const LVT: GraphemeClusterBreak = GraphemeClusterBreak(7); // name="LVT"
1141 pub const T: GraphemeClusterBreak = GraphemeClusterBreak(8); // name="T"
1142 pub const V: GraphemeClusterBreak = GraphemeClusterBreak(9); // name="V"
1143 pub const SpacingMark: GraphemeClusterBreak = GraphemeClusterBreak(10); // name="SM"
1144 pub const Prepend: GraphemeClusterBreak = GraphemeClusterBreak(11); // name="PP"
1145 pub const RegionalIndicator: GraphemeClusterBreak = GraphemeClusterBreak(12); // name="RI"
1146 /// This value is obsolete and unused.
1147 pub const EBase: GraphemeClusterBreak = GraphemeClusterBreak(13); // name="EB"
1148 /// This value is obsolete and unused.
1149 pub const EBaseGAZ: GraphemeClusterBreak = GraphemeClusterBreak(14); // name="EBG"
1150 /// This value is obsolete and unused.
1151 pub const EModifier: GraphemeClusterBreak = GraphemeClusterBreak(15); // name="EM"
1152 /// This value is obsolete and unused.
1153 pub const GlueAfterZwj: GraphemeClusterBreak = GraphemeClusterBreak(16); // name="GAZ"
1154 pub const ZWJ: GraphemeClusterBreak = GraphemeClusterBreak(17); // name="ZWJ"
1155}
1156}
1157
1158make_enumerated_property! {
1159 name: "Grapheme_Cluster_Break";
1160 short_name: "GCB";
1161 ident: GraphemeClusterBreak;
1162 data_marker: crate::provider::PropertyEnumGraphemeClusterBreakV1;
1163 singleton: SINGLETON_PROPERTY_ENUM_GRAPHEME_CLUSTER_BREAK_V1;
1164 ule_ty: u8;
1165}
1166
1167/// Enumerated property Word_Break.
1168///
1169/// See "Default Word Boundary Specification" in UAX #29 for the summary of
1170/// each property value:
1171/// <https://www.unicode.org/reports/tr29/#Default_Word_Boundaries>.
1172///
1173/// **Note:** Use `icu::segmenter` for an all-in-one break iterator implementation.
1174///
1175/// # Example
1176///
1177/// ```
1178/// use icu::properties::{CodePointMapData, props::WordBreak};
1179///
1180/// assert_eq!(CodePointMapData::<WordBreak>::new().get('.'), WordBreak::MidNumLet); // U+002E: Full Stop
1181/// assert_eq!(CodePointMapData::<WordBreak>::new().get(','), WordBreak::MidNum); // U+FF0C: Fullwidth Comma
1182/// ```
1183#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
1184#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
1185#[cfg_attr(feature = "datagen", derive(databake::Bake))]
1186#[cfg_attr(feature = "datagen", databake(path = icu_properties::props))]
1187#[allow(clippy::exhaustive_structs)] // newtype
1188#[repr(transparent)]
1189pub struct WordBreak(pub(crate) u8);
1190
1191impl WordBreak {
1192 /// Returns an ICU4C `UWordBreak` value.
1193 pub const fn to_icu4c_value(self) -> u8 {
1194 self.0
1195 }
1196 /// Constructor from an ICU4C `UWordBreak` value.
1197 pub const fn from_icu4c_value(value: u8) -> Self {
1198 Self(value)
1199 }
1200}
1201
1202create_const_array! {
1203#[allow(missing_docs)] // These constants don't need individual documentation.
1204#[allow(non_upper_case_globals)]
1205impl WordBreak {
1206 pub const Other: WordBreak = WordBreak(0); // name="XX"
1207 pub const ALetter: WordBreak = WordBreak(1); // name="LE"
1208 pub const Format: WordBreak = WordBreak(2); // name="FO"
1209 pub const Katakana: WordBreak = WordBreak(3); // name="KA"
1210 pub const MidLetter: WordBreak = WordBreak(4); // name="ML"
1211 pub const MidNum: WordBreak = WordBreak(5); // name="MN"
1212 pub const Numeric: WordBreak = WordBreak(6); // name="NU"
1213 pub const ExtendNumLet: WordBreak = WordBreak(7); // name="EX"
1214 pub const CR: WordBreak = WordBreak(8); // name="CR"
1215 pub const Extend: WordBreak = WordBreak(9); // name="Extend"
1216 pub const LF: WordBreak = WordBreak(10); // name="LF"
1217 pub const MidNumLet: WordBreak = WordBreak(11); // name="MB"
1218 pub const Newline: WordBreak = WordBreak(12); // name="NL"
1219 pub const RegionalIndicator: WordBreak = WordBreak(13); // name="RI"
1220 pub const HebrewLetter: WordBreak = WordBreak(14); // name="HL"
1221 pub const SingleQuote: WordBreak = WordBreak(15); // name="SQ"
1222 pub const DoubleQuote: WordBreak = WordBreak(16); // name=DQ
1223 /// This value is obsolete and unused.
1224 pub const EBase: WordBreak = WordBreak(17); // name="EB"
1225 /// This value is obsolete and unused.
1226 pub const EBaseGAZ: WordBreak = WordBreak(18); // name="EBG"
1227 /// This value is obsolete and unused.
1228 pub const EModifier: WordBreak = WordBreak(19); // name="EM"
1229 /// This value is obsolete and unused.
1230 pub const GlueAfterZwj: WordBreak = WordBreak(20); // name="GAZ"
1231 pub const ZWJ: WordBreak = WordBreak(21); // name="ZWJ"
1232 pub const WSegSpace: WordBreak = WordBreak(22); // name="WSegSpace"
1233}
1234}
1235
1236make_enumerated_property! {
1237 name: "Word_Break";
1238 short_name: "WB";
1239 ident: WordBreak;
1240 data_marker: crate::provider::PropertyEnumWordBreakV1;
1241 singleton: SINGLETON_PROPERTY_ENUM_WORD_BREAK_V1;
1242 ule_ty: u8;
1243}
1244
1245/// Enumerated property Sentence_Break.
1246///
1247/// See "Default Sentence Boundary Specification" in UAX #29 for the summary of
1248/// each property value:
1249/// <https://www.unicode.org/reports/tr29/#Default_Word_Boundaries>.
1250///
1251/// **Note:** Use `icu::segmenter` for an all-in-one break iterator implementation.
1252///
1253/// # Example
1254///
1255/// ```
1256/// use icu::properties::{CodePointMapData, props::SentenceBreak};
1257///
1258/// assert_eq!(CodePointMapData::<SentenceBreak>::new().get('9'), SentenceBreak::Numeric); // U+FF19: Fullwidth Digit Nine
1259/// assert_eq!(CodePointMapData::<SentenceBreak>::new().get(','), SentenceBreak::SContinue); // U+002C: Comma
1260/// ```
1261#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
1262#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
1263#[cfg_attr(feature = "datagen", derive(databake::Bake))]
1264#[cfg_attr(feature = "datagen", databake(path = icu_properties::props))]
1265#[allow(clippy::exhaustive_structs)] // newtype
1266#[repr(transparent)]
1267pub struct SentenceBreak(pub(crate) u8);
1268
1269impl SentenceBreak {
1270 /// Returns an ICU4C `USentenceBreak` value.
1271 pub const fn to_icu4c_value(self) -> u8 {
1272 self.0
1273 }
1274 /// Constructor from an ICU4C `USentenceBreak` value.
1275 pub const fn from_icu4c_value(value: u8) -> Self {
1276 Self(value)
1277 }
1278}
1279
1280create_const_array! {
1281#[allow(missing_docs)] // These constants don't need individual documentation.
1282#[allow(non_upper_case_globals)]
1283impl SentenceBreak {
1284 pub const Other: SentenceBreak = SentenceBreak(0); // name="XX"
1285 pub const ATerm: SentenceBreak = SentenceBreak(1); // name="AT"
1286 pub const Close: SentenceBreak = SentenceBreak(2); // name="CL"
1287 pub const Format: SentenceBreak = SentenceBreak(3); // name="FO"
1288 pub const Lower: SentenceBreak = SentenceBreak(4); // name="LO"
1289 pub const Numeric: SentenceBreak = SentenceBreak(5); // name="NU"
1290 pub const OLetter: SentenceBreak = SentenceBreak(6); // name="LE"
1291 pub const Sep: SentenceBreak = SentenceBreak(7); // name="SE"
1292 pub const Sp: SentenceBreak = SentenceBreak(8); // name="SP"
1293 pub const STerm: SentenceBreak = SentenceBreak(9); // name="ST"
1294 pub const Upper: SentenceBreak = SentenceBreak(10); // name="UP"
1295 pub const CR: SentenceBreak = SentenceBreak(11); // name="CR"
1296 pub const Extend: SentenceBreak = SentenceBreak(12); // name="EX"
1297 pub const LF: SentenceBreak = SentenceBreak(13); // name="LF"
1298 pub const SContinue: SentenceBreak = SentenceBreak(14); // name="SC"
1299}
1300}
1301
1302make_enumerated_property! {
1303 name: "Sentence_Break";
1304 short_name: "SB";
1305 ident: SentenceBreak;
1306 data_marker: crate::provider::PropertyEnumSentenceBreakV1;
1307 singleton: SINGLETON_PROPERTY_ENUM_SENTENCE_BREAK_V1;
1308 ule_ty: u8;
1309}
1310
1311/// Property Canonical_Combining_Class.
1312/// See UAX #15:
1313/// <https://www.unicode.org/reports/tr15/>.
1314///
1315/// See `icu::normalizer::properties::CanonicalCombiningClassMap` for the API
1316/// to look up the Canonical_Combining_Class property by scalar value.
1317///
1318/// **Note:** See `icu::normalizer::CanonicalCombiningClassMap` for the preferred API
1319/// to look up the Canonical_Combining_Class property by scalar value.
1320///
1321/// # Example
1322///
1323/// ```
1324/// use icu::properties::{CodePointMapData, props::CanonicalCombiningClass};
1325///
1326/// assert_eq!(CodePointMapData::<CanonicalCombiningClass>::new().get('a'), CanonicalCombiningClass::NotReordered); // U+0061: LATIN SMALL LETTER A
1327/// assert_eq!(CodePointMapData::<CanonicalCombiningClass>::new().get('\u{0301}'), CanonicalCombiningClass::Above); // U+0301: COMBINING ACUTE ACCENT
1328/// ```
1329//
1330// NOTE: The Pernosco debugger has special knowledge
1331// of this struct. Please do not change the bit layout
1332// or the crate-module-qualified name of this struct
1333// without coordination.
1334#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
1335#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
1336#[cfg_attr(feature = "datagen", derive(databake::Bake))]
1337#[cfg_attr(feature = "datagen", databake(path = icu_properties::props))]
1338#[allow(clippy::exhaustive_structs)] // newtype
1339#[repr(transparent)]
1340pub struct CanonicalCombiningClass(pub(crate) u8);
1341
1342impl CanonicalCombiningClass {
1343 /// Returns an ICU4C `UCanonicalCombiningClass` value.
1344 pub const fn to_icu4c_value(self) -> u8 {
1345 self.0
1346 }
1347 /// Constructor from an ICU4C `UCanonicalCombiningClass` value.
1348 pub const fn from_icu4c_value(value: u8) -> Self {
1349 Self(value)
1350 }
1351}
1352
1353create_const_array! {
1354// These constant names come from PropertyValueAliases.txt
1355#[allow(missing_docs)] // These constants don't need individual documentation.
1356#[allow(non_upper_case_globals)]
1357impl CanonicalCombiningClass {
1358 pub const NotReordered: CanonicalCombiningClass = CanonicalCombiningClass(0); // name="NR"
1359 pub const Overlay: CanonicalCombiningClass = CanonicalCombiningClass(1); // name="OV"
1360 pub const HanReading: CanonicalCombiningClass = CanonicalCombiningClass(6); // name="HANR"
1361 pub const Nukta: CanonicalCombiningClass = CanonicalCombiningClass(7); // name="NK"
1362 pub const KanaVoicing: CanonicalCombiningClass = CanonicalCombiningClass(8); // name="KV"
1363 pub const Virama: CanonicalCombiningClass = CanonicalCombiningClass(9); // name="VR"
1364 pub const CCC10: CanonicalCombiningClass = CanonicalCombiningClass(10); // name="CCC10"
1365 pub const CCC11: CanonicalCombiningClass = CanonicalCombiningClass(11); // name="CCC11"
1366 pub const CCC12: CanonicalCombiningClass = CanonicalCombiningClass(12); // name="CCC12"
1367 pub const CCC13: CanonicalCombiningClass = CanonicalCombiningClass(13); // name="CCC13"
1368 pub const CCC14: CanonicalCombiningClass = CanonicalCombiningClass(14); // name="CCC14"
1369 pub const CCC15: CanonicalCombiningClass = CanonicalCombiningClass(15); // name="CCC15"
1370 pub const CCC16: CanonicalCombiningClass = CanonicalCombiningClass(16); // name="CCC16"
1371 pub const CCC17: CanonicalCombiningClass = CanonicalCombiningClass(17); // name="CCC17"
1372 pub const CCC18: CanonicalCombiningClass = CanonicalCombiningClass(18); // name="CCC18"
1373 pub const CCC19: CanonicalCombiningClass = CanonicalCombiningClass(19); // name="CCC19"
1374 pub const CCC20: CanonicalCombiningClass = CanonicalCombiningClass(20); // name="CCC20"
1375 pub const CCC21: CanonicalCombiningClass = CanonicalCombiningClass(21); // name="CCC21"
1376 pub const CCC22: CanonicalCombiningClass = CanonicalCombiningClass(22); // name="CCC22"
1377 pub const CCC23: CanonicalCombiningClass = CanonicalCombiningClass(23); // name="CCC23"
1378 pub const CCC24: CanonicalCombiningClass = CanonicalCombiningClass(24); // name="CCC24"
1379 pub const CCC25: CanonicalCombiningClass = CanonicalCombiningClass(25); // name="CCC25"
1380 pub const CCC26: CanonicalCombiningClass = CanonicalCombiningClass(26); // name="CCC26"
1381 pub const CCC27: CanonicalCombiningClass = CanonicalCombiningClass(27); // name="CCC27"
1382 pub const CCC28: CanonicalCombiningClass = CanonicalCombiningClass(28); // name="CCC28"
1383 pub const CCC29: CanonicalCombiningClass = CanonicalCombiningClass(29); // name="CCC29"
1384 pub const CCC30: CanonicalCombiningClass = CanonicalCombiningClass(30); // name="CCC30"
1385 pub const CCC31: CanonicalCombiningClass = CanonicalCombiningClass(31); // name="CCC31"
1386 pub const CCC32: CanonicalCombiningClass = CanonicalCombiningClass(32); // name="CCC32"
1387 pub const CCC33: CanonicalCombiningClass = CanonicalCombiningClass(33); // name="CCC33"
1388 pub const CCC34: CanonicalCombiningClass = CanonicalCombiningClass(34); // name="CCC34"
1389 pub const CCC35: CanonicalCombiningClass = CanonicalCombiningClass(35); // name="CCC35"
1390 pub const CCC36: CanonicalCombiningClass = CanonicalCombiningClass(36); // name="CCC36"
1391 pub const CCC84: CanonicalCombiningClass = CanonicalCombiningClass(84); // name="CCC84"
1392 pub const CCC91: CanonicalCombiningClass = CanonicalCombiningClass(91); // name="CCC91"
1393 pub const CCC103: CanonicalCombiningClass = CanonicalCombiningClass(103); // name="CCC103"
1394 pub const CCC107: CanonicalCombiningClass = CanonicalCombiningClass(107); // name="CCC107"
1395 pub const CCC118: CanonicalCombiningClass = CanonicalCombiningClass(118); // name="CCC118"
1396 pub const CCC122: CanonicalCombiningClass = CanonicalCombiningClass(122); // name="CCC122"
1397 pub const CCC129: CanonicalCombiningClass = CanonicalCombiningClass(129); // name="CCC129"
1398 pub const CCC130: CanonicalCombiningClass = CanonicalCombiningClass(130); // name="CCC130"
1399 pub const CCC132: CanonicalCombiningClass = CanonicalCombiningClass(132); // name="CCC132"
1400 pub const CCC133: CanonicalCombiningClass = CanonicalCombiningClass(133); // name="CCC133" // RESERVED
1401 pub const AttachedBelowLeft: CanonicalCombiningClass = CanonicalCombiningClass(200); // name="ATBL"
1402 pub const AttachedBelow: CanonicalCombiningClass = CanonicalCombiningClass(202); // name="ATB"
1403 pub const AttachedAbove: CanonicalCombiningClass = CanonicalCombiningClass(214); // name="ATA"
1404 pub const AttachedAboveRight: CanonicalCombiningClass = CanonicalCombiningClass(216); // name="ATAR"
1405 pub const BelowLeft: CanonicalCombiningClass = CanonicalCombiningClass(218); // name="BL"
1406 pub const Below: CanonicalCombiningClass = CanonicalCombiningClass(220); // name="B"
1407 pub const BelowRight: CanonicalCombiningClass = CanonicalCombiningClass(222); // name="BR"
1408 pub const Left: CanonicalCombiningClass = CanonicalCombiningClass(224); // name="L"
1409 pub const Right: CanonicalCombiningClass = CanonicalCombiningClass(226); // name="R"
1410 pub const AboveLeft: CanonicalCombiningClass = CanonicalCombiningClass(228); // name="AL"
1411 pub const Above: CanonicalCombiningClass = CanonicalCombiningClass(230); // name="A"
1412 pub const AboveRight: CanonicalCombiningClass = CanonicalCombiningClass(232); // name="AR"
1413 pub const DoubleBelow: CanonicalCombiningClass = CanonicalCombiningClass(233); // name="DB"
1414 pub const DoubleAbove: CanonicalCombiningClass = CanonicalCombiningClass(234); // name="DA"
1415 pub const IotaSubscript: CanonicalCombiningClass = CanonicalCombiningClass(240); // name="IS"
1416}
1417}
1418
1419make_enumerated_property! {
1420 name: "Canonical_Combining_Class";
1421 short_name: "ccc";
1422 ident: CanonicalCombiningClass;
1423 data_marker: crate::provider::PropertyEnumCanonicalCombiningClassV1;
1424 singleton: SINGLETON_PROPERTY_ENUM_CANONICAL_COMBINING_CLASS_V1;
1425 ule_ty: u8;
1426}
1427
1428/// Property Indic_Syllabic_Category.
1429/// See UAX #44:
1430/// <https://www.unicode.org/reports/tr44/#Indic_Syllabic_Category>.
1431///
1432/// # Example
1433///
1434/// ```
1435/// use icu::properties::{CodePointMapData, props::IndicSyllabicCategory};
1436///
1437/// assert_eq!(CodePointMapData::<IndicSyllabicCategory>::new().get('a'), IndicSyllabicCategory::Other);
1438/// assert_eq!(CodePointMapData::<IndicSyllabicCategory>::new().get('\u{0900}'), IndicSyllabicCategory::Bindu); // U+0900: DEVANAGARI SIGN INVERTED CANDRABINDU
1439/// ```
1440#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
1441#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
1442#[cfg_attr(feature = "datagen", derive(databake::Bake))]
1443#[cfg_attr(feature = "datagen", databake(path = icu_properties::props))]
1444#[allow(clippy::exhaustive_structs)] // newtype
1445#[repr(transparent)]
1446pub struct IndicSyllabicCategory(pub(crate) u8);
1447
1448impl IndicSyllabicCategory {
1449 /// Returns an ICU4C `UIndicSyllabicCategory` value.
1450 pub const fn to_icu4c_value(self) -> u8 {
1451 self.0
1452 }
1453 /// Constructor from an ICU4C `UIndicSyllabicCategory` value.
1454 pub const fn from_icu4c_value(value: u8) -> Self {
1455 Self(value)
1456 }
1457}
1458
1459create_const_array! {
1460#[allow(missing_docs)] // These constants don't need individual documentation.
1461#[allow(non_upper_case_globals)]
1462impl IndicSyllabicCategory {
1463 pub const Other: IndicSyllabicCategory = IndicSyllabicCategory(0);
1464 pub const Avagraha: IndicSyllabicCategory = IndicSyllabicCategory(1);
1465 pub const Bindu: IndicSyllabicCategory = IndicSyllabicCategory(2);
1466 pub const BrahmiJoiningNumber: IndicSyllabicCategory = IndicSyllabicCategory(3);
1467 pub const CantillationMark: IndicSyllabicCategory = IndicSyllabicCategory(4);
1468 pub const Consonant: IndicSyllabicCategory = IndicSyllabicCategory(5);
1469 pub const ConsonantDead: IndicSyllabicCategory = IndicSyllabicCategory(6);
1470 pub const ConsonantFinal: IndicSyllabicCategory = IndicSyllabicCategory(7);
1471 pub const ConsonantHeadLetter: IndicSyllabicCategory = IndicSyllabicCategory(8);
1472 pub const ConsonantInitialPostfixed: IndicSyllabicCategory = IndicSyllabicCategory(9);
1473 pub const ConsonantKiller: IndicSyllabicCategory = IndicSyllabicCategory(10);
1474 pub const ConsonantMedial: IndicSyllabicCategory = IndicSyllabicCategory(11);
1475 pub const ConsonantPlaceholder: IndicSyllabicCategory = IndicSyllabicCategory(12);
1476 pub const ConsonantPrecedingRepha: IndicSyllabicCategory = IndicSyllabicCategory(13);
1477 pub const ConsonantPrefixed: IndicSyllabicCategory = IndicSyllabicCategory(14);
1478 pub const ConsonantSucceedingRepha: IndicSyllabicCategory = IndicSyllabicCategory(15);
1479 pub const ConsonantSubjoined: IndicSyllabicCategory = IndicSyllabicCategory(16);
1480 pub const ConsonantWithStacker: IndicSyllabicCategory = IndicSyllabicCategory(17);
1481 pub const GeminationMark: IndicSyllabicCategory = IndicSyllabicCategory(18);
1482 pub const InvisibleStacker: IndicSyllabicCategory = IndicSyllabicCategory(19);
1483 pub const Joiner: IndicSyllabicCategory = IndicSyllabicCategory(20);
1484 pub const ModifyingLetter: IndicSyllabicCategory = IndicSyllabicCategory(21);
1485 pub const NonJoiner: IndicSyllabicCategory = IndicSyllabicCategory(22);
1486 pub const Nukta: IndicSyllabicCategory = IndicSyllabicCategory(23);
1487 pub const Number: IndicSyllabicCategory = IndicSyllabicCategory(24);
1488 pub const NumberJoiner: IndicSyllabicCategory = IndicSyllabicCategory(25);
1489 pub const PureKiller: IndicSyllabicCategory = IndicSyllabicCategory(26);
1490 pub const RegisterShifter: IndicSyllabicCategory = IndicSyllabicCategory(27);
1491 pub const SyllableModifier: IndicSyllabicCategory = IndicSyllabicCategory(28);
1492 pub const ToneLetter: IndicSyllabicCategory = IndicSyllabicCategory(29);
1493 pub const ToneMark: IndicSyllabicCategory = IndicSyllabicCategory(30);
1494 pub const Virama: IndicSyllabicCategory = IndicSyllabicCategory(31);
1495 pub const Visarga: IndicSyllabicCategory = IndicSyllabicCategory(32);
1496 pub const Vowel: IndicSyllabicCategory = IndicSyllabicCategory(33);
1497 pub const VowelDependent: IndicSyllabicCategory = IndicSyllabicCategory(34);
1498 pub const VowelIndependent: IndicSyllabicCategory = IndicSyllabicCategory(35);
1499 pub const ReorderingKiller: IndicSyllabicCategory = IndicSyllabicCategory(36);
1500}
1501}
1502
1503make_enumerated_property! {
1504 name: "Indic_Syllabic_Category";
1505 short_name: "InSC";
1506 ident: IndicSyllabicCategory;
1507 data_marker: crate::provider::PropertyEnumIndicSyllabicCategoryV1;
1508 singleton: SINGLETON_PROPERTY_ENUM_INDIC_SYLLABIC_CATEGORY_V1;
1509 ule_ty: u8;
1510}
1511
1512/// Enumerated property Joining_Type.
1513///
1514/// See Section 9.2, Arabic Cursive Joining in The Unicode Standard for the summary of
1515/// each property value.
1516///
1517/// # Example
1518///
1519/// ```
1520/// use icu::properties::{CodePointMapData, props::JoiningType};
1521///
1522/// assert_eq!(CodePointMapData::<JoiningType>::new().get('ؠ'), JoiningType::DualJoining); // U+0620: Arabic Letter Kashmiri Yeh
1523/// assert_eq!(CodePointMapData::<JoiningType>::new().get('𐫍'), JoiningType::LeftJoining); // U+10ACD: Manichaean Letter Heth
1524/// ```
1525#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
1526#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
1527#[cfg_attr(feature = "datagen", derive(databake::Bake))]
1528#[cfg_attr(feature = "datagen", databake(path = icu_properties::props))]
1529#[allow(clippy::exhaustive_structs)] // newtype
1530#[repr(transparent)]
1531pub struct JoiningType(pub(crate) u8);
1532
1533impl JoiningType {
1534 /// Returns an ICU4C `UJoiningType` value.
1535 pub const fn to_icu4c_value(self) -> u8 {
1536 self.0
1537 }
1538 /// Constructor from an ICU4C `UJoiningType` value.
1539 pub const fn from_icu4c_value(value: u8) -> Self {
1540 Self(value)
1541 }
1542}
1543
1544create_const_array! {
1545#[allow(missing_docs)] // These constants don't need individual documentation.
1546#[allow(non_upper_case_globals)]
1547impl JoiningType {
1548 pub const NonJoining: JoiningType = JoiningType(0); // name="U"
1549 pub const JoinCausing: JoiningType = JoiningType(1); // name="C"
1550 pub const DualJoining: JoiningType = JoiningType(2); // name="D"
1551 pub const LeftJoining: JoiningType = JoiningType(3); // name="L"
1552 pub const RightJoining: JoiningType = JoiningType(4); // name="R"
1553 pub const Transparent: JoiningType = JoiningType(5); // name="T"
1554}
1555}
1556
1557make_enumerated_property! {
1558 name: "Joining_Type";
1559 short_name: "jt";
1560 ident: JoiningType;
1561 data_marker: crate::provider::PropertyEnumJoiningTypeV1;
1562 singleton: SINGLETON_PROPERTY_ENUM_JOINING_TYPE_V1;
1563 ule_ty: u8;
1564}
1565
1566/// Property Vertical_Orientation
1567///
1568/// See UTR #50:
1569/// <https://www.unicode.org/reports/tr50/#vo>
1570///
1571/// # Example
1572///
1573/// ```
1574/// use icu::properties::{CodePointMapData, props::VerticalOrientation};
1575///
1576/// assert_eq!(CodePointMapData::<VerticalOrientation>::new().get('a'), VerticalOrientation::Rotated);
1577/// assert_eq!(CodePointMapData::<VerticalOrientation>::new().get('§'), VerticalOrientation::Upright);
1578/// assert_eq!(CodePointMapData::<VerticalOrientation>::new().get32(0x2329), VerticalOrientation::TransformedRotated);
1579/// assert_eq!(CodePointMapData::<VerticalOrientation>::new().get32(0x3001), VerticalOrientation::TransformedUpright);
1580/// ```
1581#[derive(Copy, Clone, Debug, Eq, PartialEq, Ord, PartialOrd, Hash)]
1582#[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
1583#[cfg_attr(feature = "datagen", derive(databake::Bake))]
1584#[cfg_attr(feature = "datagen", databake(path = icu_properties::props))]
1585#[allow(clippy::exhaustive_structs)] // newtype
1586#[repr(transparent)]
1587pub struct VerticalOrientation(pub(crate) u8);
1588
1589impl VerticalOrientation {
1590 /// Returns an ICU4C `UVerticalOrientation` value.
1591 pub const fn to_icu4c_value(self) -> u8 {
1592 self.0
1593 }
1594 /// Constructor from an ICU4C `UVerticalOrientation` value.
1595 pub const fn from_icu4c_value(value: u8) -> Self {
1596 Self(value)
1597 }
1598}
1599
1600create_const_array! {
1601#[allow(missing_docs)] // These constants don't need individual documentation.
1602#[allow(non_upper_case_globals)]
1603impl VerticalOrientation {
1604 pub const Rotated: VerticalOrientation = VerticalOrientation(0); // name="R"
1605 pub const TransformedRotated: VerticalOrientation = VerticalOrientation(1); // name="Tr"
1606 pub const TransformedUpright: VerticalOrientation = VerticalOrientation(2); // name="Tu"
1607 pub const Upright: VerticalOrientation = VerticalOrientation(3); // name="U"
1608}
1609}
1610
1611make_enumerated_property! {
1612 name: "Vertical_Orientation";
1613 short_name: "vo";
1614 ident: VerticalOrientation;
1615 data_marker: crate::provider::PropertyEnumVerticalOrientationV1;
1616 singleton: SINGLETON_PROPERTY_ENUM_VERTICAL_ORIENTATION_V1;
1617 ule_ty: u8;
1618}
1619
1620pub use crate::code_point_set::BinaryProperty;
1621
1622macro_rules! make_binary_property {
1623 (
1624 name: $name:literal;
1625 short_name: $short_name:literal;
1626 ident: $ident:ident;
1627 data_marker: $data_marker:ty;
1628 singleton: $singleton:ident;
1629 $(#[$doc:meta])+
1630 ) => {
1631 $(#[$doc])+
1632 #[derive(Debug)]
1633 #[non_exhaustive]
1634 pub struct $ident;
1635
1636 impl crate::private::Sealed for $ident {}
1637
1638 impl BinaryProperty for $ident {
1639 type DataMarker = $data_marker;
1640 #[cfg(feature = "compiled_data")]
1641 const SINGLETON: &'static crate::provider::PropertyCodePointSet<'static> =
1642 &crate::provider::Baked::$singleton;
1643 const NAME: &'static [u8] = $name.as_bytes();
1644 const SHORT_NAME: &'static [u8] = $short_name.as_bytes();
1645 }
1646 };
1647}
1648
1649make_binary_property! {
1650 name: "ASCII_Hex_Digit";
1651 short_name: "AHex";
1652 ident: AsciiHexDigit;
1653 data_marker: crate::provider::PropertyBinaryAsciiHexDigitV1;
1654 singleton: SINGLETON_PROPERTY_BINARY_ASCII_HEX_DIGIT_V1;
1655 /// ASCII characters commonly used for the representation of hexadecimal numbers.
1656 ///
1657 /// # Example
1658 ///
1659 /// ```
1660 /// use icu::properties::CodePointSetData;
1661 /// use icu::properties::props::AsciiHexDigit;
1662 ///
1663 /// let ascii_hex_digit = CodePointSetData::new::<AsciiHexDigit>();
1664 ///
1665 /// assert!(ascii_hex_digit.contains('3'));
1666 /// assert!(!ascii_hex_digit.contains('੩')); // U+0A69 GURMUKHI DIGIT THREE
1667 /// assert!(ascii_hex_digit.contains('A'));
1668 /// assert!(!ascii_hex_digit.contains('Ä')); // U+00C4 LATIN CAPITAL LETTER A WITH DIAERESIS
1669 /// ```
1670}
1671
1672make_binary_property! {
1673 name: "Alnum";
1674 short_name: "Alnum";
1675 ident: Alnum;
1676 data_marker: crate::provider::PropertyBinaryAlnumV1;
1677 singleton: SINGLETON_PROPERTY_BINARY_ALNUM_V1;
1678 /// Characters with the `Alphabetic` or `Decimal_Number` property.
1679 ///
1680 /// This is defined for POSIX compatibility.
1681}
1682
1683make_binary_property! {
1684 name: "Alphabetic";
1685 short_name: "Alpha";
1686 ident: Alphabetic;
1687 data_marker: crate::provider::PropertyBinaryAlphabeticV1;
1688 singleton: SINGLETON_PROPERTY_BINARY_ALPHABETIC_V1;
1689 /// Alphabetic characters.
1690 ///
1691 /// # Example
1692 ///
1693 /// ```
1694 /// use icu::properties::CodePointSetData;
1695 /// use icu::properties::props::Alphabetic;
1696 ///
1697 /// let alphabetic = CodePointSetData::new::<Alphabetic>();
1698 ///
1699 /// assert!(!alphabetic.contains('3'));
1700 /// assert!(!alphabetic.contains('੩')); // U+0A69 GURMUKHI DIGIT THREE
1701 /// assert!(alphabetic.contains('A'));
1702 /// assert!(alphabetic.contains('Ä')); // U+00C4 LATIN CAPITAL LETTER A WITH DIAERESIS
1703 /// ```
1704
1705}
1706
1707make_binary_property! {
1708 name: "Bidi_Control";
1709 short_name: "Bidi_C";
1710 ident: BidiControl;
1711 data_marker: crate::provider::PropertyBinaryBidiControlV1;
1712 singleton: SINGLETON_PROPERTY_BINARY_BIDI_CONTROL_V1;
1713 /// Format control characters which have specific functions in the Unicode Bidirectional
1714 /// Algorithm.
1715 ///
1716 /// # Example
1717 ///
1718 /// ```
1719 /// use icu::properties::CodePointSetData;
1720 /// use icu::properties::props::BidiControl;
1721 ///
1722 /// let bidi_control = CodePointSetData::new::<BidiControl>();
1723 ///
1724 /// assert!(bidi_control.contains('\u{200F}')); // RIGHT-TO-LEFT MARK
1725 /// assert!(!bidi_control.contains('ش')); // U+0634 ARABIC LETTER SHEEN
1726 /// ```
1727
1728}
1729
1730make_binary_property! {
1731 name: "Bidi_Mirrored";
1732 short_name: "Bidi_M";
1733 ident: BidiMirrored;
1734 data_marker: crate::provider::PropertyBinaryBidiMirroredV1;
1735 singleton: SINGLETON_PROPERTY_BINARY_BIDI_MIRRORED_V1;
1736 /// Characters that are mirrored in bidirectional text.
1737 ///
1738 /// # Example
1739 ///
1740 /// ```
1741 /// use icu::properties::CodePointSetData;
1742 /// use icu::properties::props::BidiMirrored;
1743 ///
1744 /// let bidi_mirrored = CodePointSetData::new::<BidiMirrored>();
1745 ///
1746 /// assert!(bidi_mirrored.contains('['));
1747 /// assert!(bidi_mirrored.contains(']'));
1748 /// assert!(bidi_mirrored.contains('∑')); // U+2211 N-ARY SUMMATION
1749 /// assert!(!bidi_mirrored.contains('ཉ')); // U+0F49 TIBETAN LETTER NYA
1750 /// ```
1751
1752}
1753
1754make_binary_property! {
1755 name: "Blank";
1756 short_name: "Blank";
1757 ident: Blank;
1758 data_marker: crate::provider::PropertyBinaryBlankV1;
1759 singleton: SINGLETON_PROPERTY_BINARY_BLANK_V1;
1760 /// Horizontal whitespace characters
1761
1762}
1763
1764make_binary_property! {
1765 name: "Cased";
1766 short_name: "Cased";
1767 ident: Cased;
1768 data_marker: crate::provider::PropertyBinaryCasedV1;
1769 singleton: SINGLETON_PROPERTY_BINARY_CASED_V1;
1770 /// Uppercase, lowercase, and titlecase characters.
1771 ///
1772 /// # Example
1773 ///
1774 /// ```
1775 /// use icu::properties::CodePointSetData;
1776 /// use icu::properties::props::Cased;
1777 ///
1778 /// let cased = CodePointSetData::new::<Cased>();
1779 ///
1780 /// assert!(cased.contains('Ꙡ')); // U+A660 CYRILLIC CAPITAL LETTER REVERSED TSE
1781 /// assert!(!cased.contains('ދ')); // U+078B THAANA LETTER DHAALU
1782 /// ```
1783
1784}
1785
1786make_binary_property! {
1787 name: "Case_Ignorable";
1788 short_name: "CI";
1789 ident: CaseIgnorable;
1790 data_marker: crate::provider::PropertyBinaryCaseIgnorableV1;
1791 singleton: SINGLETON_PROPERTY_BINARY_CASE_IGNORABLE_V1;
1792 /// Characters which are ignored for casing purposes.
1793 ///
1794 /// # Example
1795 ///
1796 /// ```
1797 /// use icu::properties::CodePointSetData;
1798 /// use icu::properties::props::CaseIgnorable;
1799 ///
1800 /// let case_ignorable = CodePointSetData::new::<CaseIgnorable>();
1801 ///
1802 /// assert!(case_ignorable.contains(':'));
1803 /// assert!(!case_ignorable.contains('λ')); // U+03BB GREEK SMALL LETTER LAMBDA
1804 /// ```
1805
1806}
1807
1808make_binary_property! {
1809 name: "Full_Composition_Exclusion";
1810 short_name: "Comp_Ex";
1811 ident: FullCompositionExclusion;
1812 data_marker: crate::provider::PropertyBinaryFullCompositionExclusionV1;
1813 singleton: SINGLETON_PROPERTY_BINARY_FULL_COMPOSITION_EXCLUSION_V1;
1814 /// Characters that are excluded from composition.
1815 ///
1816 /// See <https://unicode.org/Public/UNIDATA/CompositionExclusions.txt>
1817
1818}
1819
1820make_binary_property! {
1821 name: "Changes_When_Casefolded";
1822 short_name: "CWCF";
1823 ident: ChangesWhenCasefolded;
1824 data_marker: crate::provider::PropertyBinaryChangesWhenCasefoldedV1;
1825 singleton: SINGLETON_PROPERTY_BINARY_CHANGES_WHEN_CASEFOLDED_V1;
1826 /// Characters whose normalized forms are not stable under case folding.
1827 ///
1828 /// # Example
1829 ///
1830 /// ```
1831 /// use icu::properties::CodePointSetData;
1832 /// use icu::properties::props::ChangesWhenCasefolded;
1833 ///
1834 /// let changes_when_casefolded = CodePointSetData::new::<ChangesWhenCasefolded>();
1835 ///
1836 /// assert!(changes_when_casefolded.contains('ß')); // U+00DF LATIN SMALL LETTER SHARP S
1837 /// assert!(!changes_when_casefolded.contains('ᜉ')); // U+1709 TAGALOG LETTER PA
1838 /// ```
1839
1840}
1841
1842make_binary_property! {
1843 name: "Changes_When_Casemapped";
1844 short_name: "CWCM";
1845 ident: ChangesWhenCasemapped;
1846 data_marker: crate::provider::PropertyBinaryChangesWhenCasemappedV1;
1847 singleton: SINGLETON_PROPERTY_BINARY_CHANGES_WHEN_CASEMAPPED_V1;
1848 /// Characters which may change when they undergo case mapping.
1849
1850}
1851
1852make_binary_property! {
1853 name: "Changes_When_NFKC_Casefolded";
1854 short_name: "CWKCF";
1855 ident: ChangesWhenNfkcCasefolded;
1856 data_marker: crate::provider::PropertyBinaryChangesWhenNfkcCasefoldedV1;
1857 singleton: SINGLETON_PROPERTY_BINARY_CHANGES_WHEN_NFKC_CASEFOLDED_V1;
1858 /// Characters which are not identical to their `NFKC_Casefold` mapping.
1859 ///
1860 /// # Example
1861 ///
1862 /// ```
1863 /// use icu::properties::CodePointSetData;
1864 /// use icu::properties::props::ChangesWhenNfkcCasefolded;
1865 ///
1866 /// let changes_when_nfkc_casefolded = CodePointSetData::new::<ChangesWhenNfkcCasefolded>();
1867 ///
1868 /// assert!(changes_when_nfkc_casefolded.contains('🄵')); // U+1F135 SQUARED LATIN CAPITAL LETTER F
1869 /// assert!(!changes_when_nfkc_casefolded.contains('f'));
1870 /// ```
1871
1872}
1873
1874make_binary_property! {
1875 name: "Changes_When_Lowercased";
1876 short_name: "CWL";
1877 ident: ChangesWhenLowercased;
1878 data_marker: crate::provider::PropertyBinaryChangesWhenLowercasedV1;
1879 singleton: SINGLETON_PROPERTY_BINARY_CHANGES_WHEN_LOWERCASED_V1;
1880 /// Characters whose normalized forms are not stable under a `toLowercase` mapping.
1881 ///
1882 /// # Example
1883 ///
1884 /// ```
1885 /// use icu::properties::CodePointSetData;
1886 /// use icu::properties::props::ChangesWhenLowercased;
1887 ///
1888 /// let changes_when_lowercased = CodePointSetData::new::<ChangesWhenLowercased>();
1889 ///
1890 /// assert!(changes_when_lowercased.contains('Ⴔ')); // U+10B4 GEORGIAN CAPITAL LETTER PHAR
1891 /// assert!(!changes_when_lowercased.contains('ფ')); // U+10E4 GEORGIAN LETTER PHAR
1892 /// ```
1893
1894}
1895
1896make_binary_property! {
1897 name: "Changes_When_Titlecased";
1898 short_name: "CWT";
1899 ident: ChangesWhenTitlecased;
1900 data_marker: crate::provider::PropertyBinaryChangesWhenTitlecasedV1;
1901 singleton: SINGLETON_PROPERTY_BINARY_CHANGES_WHEN_TITLECASED_V1;
1902 /// Characters whose normalized forms are not stable under a `toTitlecase` mapping.
1903 ///
1904 /// # Example
1905 ///
1906 /// ```
1907 /// use icu::properties::CodePointSetData;
1908 /// use icu::properties::props::ChangesWhenTitlecased;
1909 ///
1910 /// let changes_when_titlecased = CodePointSetData::new::<ChangesWhenTitlecased>();
1911 ///
1912 /// assert!(changes_when_titlecased.contains('æ')); // U+00E6 LATIN SMALL LETTER AE
1913 /// assert!(!changes_when_titlecased.contains('Æ')); // U+00E6 LATIN CAPITAL LETTER AE
1914 /// ```
1915
1916}
1917
1918make_binary_property! {
1919 name: "Changes_When_Uppercased";
1920 short_name: "CWU";
1921 ident: ChangesWhenUppercased;
1922 data_marker: crate::provider::PropertyBinaryChangesWhenUppercasedV1;
1923 singleton: SINGLETON_PROPERTY_BINARY_CHANGES_WHEN_UPPERCASED_V1;
1924 /// Characters whose normalized forms are not stable under a `toUppercase` mapping.
1925 ///
1926 /// # Example
1927 ///
1928 /// ```
1929 /// use icu::properties::CodePointSetData;
1930 /// use icu::properties::props::ChangesWhenUppercased;
1931 ///
1932 /// let changes_when_uppercased = CodePointSetData::new::<ChangesWhenUppercased>();
1933 ///
1934 /// assert!(changes_when_uppercased.contains('ւ')); // U+0582 ARMENIAN SMALL LETTER YIWN
1935 /// assert!(!changes_when_uppercased.contains('Ւ')); // U+0552 ARMENIAN CAPITAL LETTER YIWN
1936 /// ```
1937
1938}
1939
1940make_binary_property! {
1941 name: "Dash";
1942 short_name: "Dash";
1943 ident: Dash;
1944 data_marker: crate::provider::PropertyBinaryDashV1;
1945 singleton: SINGLETON_PROPERTY_BINARY_DASH_V1;
1946 /// Punctuation characters explicitly called out as dashes in the Unicode Standard, plus
1947 /// their compatibility equivalents.
1948 ///
1949 /// # Example
1950 ///
1951 /// ```
1952 /// use icu::properties::CodePointSetData;
1953 /// use icu::properties::props::Dash;
1954 ///
1955 /// let dash = CodePointSetData::new::<Dash>();
1956 ///
1957 /// assert!(dash.contains('⸺')); // U+2E3A TWO-EM DASH
1958 /// assert!(dash.contains('-')); // U+002D
1959 /// assert!(!dash.contains('=')); // U+003D
1960 /// ```
1961
1962}
1963
1964make_binary_property! {
1965 name: "Deprecated";
1966 short_name: "Dep";
1967 ident: Deprecated;
1968 data_marker: crate::provider::PropertyBinaryDeprecatedV1;
1969 singleton: SINGLETON_PROPERTY_BINARY_DEPRECATED_V1;
1970 /// Deprecated characters.
1971 ///
1972 /// No characters will ever be removed from the standard, but the
1973 /// usage of deprecated characters is strongly discouraged.
1974 ///
1975 /// # Example
1976 ///
1977 /// ```
1978 /// use icu::properties::CodePointSetData;
1979 /// use icu::properties::props::Deprecated;
1980 ///
1981 /// let deprecated = CodePointSetData::new::<Deprecated>();
1982 ///
1983 /// assert!(deprecated.contains('ឣ')); // U+17A3 KHMER INDEPENDENT VOWEL QAQ
1984 /// assert!(!deprecated.contains('A'));
1985 /// ```
1986
1987}
1988
1989make_binary_property! {
1990 name: "Default_Ignorable_Code_Point";
1991 short_name: "DI";
1992 ident: DefaultIgnorableCodePoint;
1993 data_marker: crate::provider::PropertyBinaryDefaultIgnorableCodePointV1;
1994 singleton: SINGLETON_PROPERTY_BINARY_DEFAULT_IGNORABLE_CODE_POINT_V1;
1995 /// For programmatic determination of default ignorable code points.
1996 ///
1997 /// New characters that
1998 /// should be ignored in rendering (unless explicitly supported) will be assigned in these
1999 /// ranges, permitting programs to correctly handle the default rendering of such
2000 /// characters when not otherwise supported.
2001 ///
2002 /// # Example
2003 ///
2004 /// ```
2005 /// use icu::properties::CodePointSetData;
2006 /// use icu::properties::props::DefaultIgnorableCodePoint;
2007 ///
2008 /// let default_ignorable_code_point = CodePointSetData::new::<DefaultIgnorableCodePoint>();
2009 ///
2010 /// assert!(default_ignorable_code_point.contains('\u{180B}')); // MONGOLIAN FREE VARIATION SELECTOR ONE
2011 /// assert!(!default_ignorable_code_point.contains('E'));
2012 /// ```
2013
2014}
2015
2016make_binary_property! {
2017 name: "Diacritic";
2018 short_name: "Dia";
2019 ident: Diacritic;
2020 data_marker: crate::provider::PropertyBinaryDiacriticV1;
2021 singleton: SINGLETON_PROPERTY_BINARY_DIACRITIC_V1;
2022 /// Characters that linguistically modify the meaning of another character to which they apply.
2023 ///
2024 /// # Example
2025 ///
2026 /// ```
2027 /// use icu::properties::CodePointSetData;
2028 /// use icu::properties::props::Diacritic;
2029 ///
2030 /// let diacritic = CodePointSetData::new::<Diacritic>();
2031 ///
2032 /// assert!(diacritic.contains('\u{05B3}')); // HEBREW POINT HATAF QAMATS
2033 /// assert!(!diacritic.contains('א')); // U+05D0 HEBREW LETTER ALEF
2034 /// ```
2035
2036}
2037
2038make_binary_property! {
2039 name: "Emoji_Modifier_Base";
2040 short_name: "EBase";
2041 ident: EmojiModifierBase;
2042 data_marker: crate::provider::PropertyBinaryEmojiModifierBaseV1;
2043 singleton: SINGLETON_PROPERTY_BINARY_EMOJI_MODIFIER_BASE_V1;
2044 /// Characters that can serve as a base for emoji modifiers.
2045 ///
2046 /// # Example
2047 ///
2048 /// ```
2049 /// use icu::properties::CodePointSetData;
2050 /// use icu::properties::props::EmojiModifierBase;
2051 ///
2052 /// let emoji_modifier_base = CodePointSetData::new::<EmojiModifierBase>();
2053 ///
2054 /// assert!(emoji_modifier_base.contains('✊')); // U+270A RAISED FIST
2055 /// assert!(!emoji_modifier_base.contains('⛰')); // U+26F0 MOUNTAIN
2056 /// ```
2057
2058}
2059
2060make_binary_property! {
2061 name: "Emoji_Component";
2062 short_name: "EComp";
2063 ident: EmojiComponent;
2064 data_marker: crate::provider::PropertyBinaryEmojiComponentV1;
2065 singleton: SINGLETON_PROPERTY_BINARY_EMOJI_COMPONENT_V1;
2066 /// Characters used in emoji sequences that normally do not appear on emoji keyboards as
2067 /// separate choices, such as base characters for emoji keycaps.
2068 ///
2069 /// # Example
2070 ///
2071 /// ```
2072 /// use icu::properties::CodePointSetData;
2073 /// use icu::properties::props::EmojiComponent;
2074 ///
2075 /// let emoji_component = CodePointSetData::new::<EmojiComponent>();
2076 ///
2077 /// assert!(emoji_component.contains('🇹')); // U+1F1F9 REGIONAL INDICATOR SYMBOL LETTER T
2078 /// assert!(emoji_component.contains('\u{20E3}')); // COMBINING ENCLOSING KEYCAP
2079 /// assert!(emoji_component.contains('7'));
2080 /// assert!(!emoji_component.contains('T'));
2081 /// ```
2082
2083}
2084
2085make_binary_property! {
2086 name: "Emoji_Modifier";
2087 short_name: "EMod";
2088 ident: EmojiModifier;
2089 data_marker: crate::provider::PropertyBinaryEmojiModifierV1;
2090 singleton: SINGLETON_PROPERTY_BINARY_EMOJI_MODIFIER_V1;
2091 /// Characters that are emoji modifiers.
2092 ///
2093 /// # Example
2094 ///
2095 /// ```
2096 /// use icu::properties::CodePointSetData;
2097 /// use icu::properties::props::EmojiModifier;
2098 ///
2099 /// let emoji_modifier = CodePointSetData::new::<EmojiModifier>();
2100 ///
2101 /// assert!(emoji_modifier.contains('\u{1F3FD}')); // EMOJI MODIFIER FITZPATRICK TYPE-4
2102 /// assert!(!emoji_modifier.contains('\u{200C}')); // ZERO WIDTH NON-JOINER
2103 /// ```
2104
2105}
2106
2107make_binary_property! {
2108 name: "Emoji";
2109 short_name: "Emoji";
2110 ident: Emoji;
2111 data_marker: crate::provider::PropertyBinaryEmojiV1;
2112 singleton: SINGLETON_PROPERTY_BINARY_EMOJI_V1;
2113 /// Characters that are emoji.
2114 ///
2115 /// # Example
2116 ///
2117 /// ```
2118 /// use icu::properties::CodePointSetData;
2119 /// use icu::properties::props::Emoji;
2120 ///
2121 /// let emoji = CodePointSetData::new::<Emoji>();
2122 ///
2123 /// assert!(emoji.contains('🔥')); // U+1F525 FIRE
2124 /// assert!(!emoji.contains('V'));
2125 /// ```
2126
2127}
2128
2129make_binary_property! {
2130 name: "Emoji_Presentation";
2131 short_name: "EPres";
2132 ident: EmojiPresentation;
2133 data_marker: crate::provider::PropertyBinaryEmojiPresentationV1;
2134 singleton: SINGLETON_PROPERTY_BINARY_EMOJI_PRESENTATION_V1;
2135 /// Characters that have emoji presentation by default.
2136 ///
2137 /// # Example
2138 ///
2139 /// ```
2140 /// use icu::properties::CodePointSetData;
2141 /// use icu::properties::props::EmojiPresentation;
2142 ///
2143 /// let emoji_presentation = CodePointSetData::new::<EmojiPresentation>();
2144 ///
2145 /// assert!(emoji_presentation.contains('🦬')); // U+1F9AC BISON
2146 /// assert!(!emoji_presentation.contains('♻')); // U+267B BLACK UNIVERSAL RECYCLING SYMBOL
2147 /// ```
2148
2149}
2150
2151make_binary_property! {
2152 name: "Extender";
2153 short_name: "Ext";
2154 ident: Extender;
2155 data_marker: crate::provider::PropertyBinaryExtenderV1;
2156 singleton: SINGLETON_PROPERTY_BINARY_EXTENDER_V1;
2157 /// Characters whose principal function is to extend the value of a preceding alphabetic
2158 /// character or to extend the shape of adjacent characters.
2159 ///
2160 /// # Example
2161 ///
2162 /// ```
2163 /// use icu::properties::CodePointSetData;
2164 /// use icu::properties::props::Extender;
2165 ///
2166 /// let extender = CodePointSetData::new::<Extender>();
2167 ///
2168 /// assert!(extender.contains('ヾ')); // U+30FE KATAKANA VOICED ITERATION MARK
2169 /// assert!(extender.contains('ー')); // U+30FC KATAKANA-HIRAGANA PROLONGED SOUND MARK
2170 /// assert!(!extender.contains('・')); // U+30FB KATAKANA MIDDLE DOT
2171 /// ```
2172
2173}
2174
2175make_binary_property! {
2176 name: "Extended_Pictographic";
2177 short_name: "ExtPict";
2178 ident: ExtendedPictographic;
2179 data_marker: crate::provider::PropertyBinaryExtendedPictographicV1;
2180 singleton: SINGLETON_PROPERTY_BINARY_EXTENDED_PICTOGRAPHIC_V1;
2181 /// Pictographic symbols, as well as reserved ranges in blocks largely associated with
2182 /// emoji characters
2183 ///
2184 /// # Example
2185 ///
2186 /// ```
2187 /// use icu::properties::CodePointSetData;
2188 /// use icu::properties::props::ExtendedPictographic;
2189 ///
2190 /// let extended_pictographic = CodePointSetData::new::<ExtendedPictographic>();
2191 ///
2192 /// assert!(extended_pictographic.contains('🥳')); // U+1F973 FACE WITH PARTY HORN AND PARTY HAT
2193 /// assert!(!extended_pictographic.contains('🇪')); // U+1F1EA REGIONAL INDICATOR SYMBOL LETTER E
2194 /// ```
2195
2196}
2197
2198make_binary_property! {
2199 name: "Graph";
2200 short_name: "Graph";
2201 ident: Graph;
2202 data_marker: crate::provider::PropertyBinaryGraphV1;
2203 singleton: SINGLETON_PROPERTY_BINARY_GRAPH_V1;
2204 /// Invisible characters.
2205 ///
2206 /// This is defined for POSIX compatibility.
2207
2208}
2209
2210make_binary_property! {
2211 name: "Grapheme_Base";
2212 short_name: "Gr_Base";
2213 ident: GraphemeBase;
2214 data_marker: crate::provider::PropertyBinaryGraphemeBaseV1;
2215 singleton: SINGLETON_PROPERTY_BINARY_GRAPHEME_BASE_V1;
2216 /// Property used together with the definition of Standard Korean Syllable Block to define
2217 /// "Grapheme base".
2218 ///
2219 /// See D58 in Chapter 3, Conformance in the Unicode Standard.
2220 ///
2221 /// # Example
2222 ///
2223 /// ```
2224 /// use icu::properties::CodePointSetData;
2225 /// use icu::properties::props::GraphemeBase;
2226 ///
2227 /// let grapheme_base = CodePointSetData::new::<GraphemeBase>();
2228 ///
2229 /// assert!(grapheme_base.contains('ക')); // U+0D15 MALAYALAM LETTER KA
2230 /// assert!(grapheme_base.contains('\u{0D3F}')); // U+0D3F MALAYALAM VOWEL SIGN I
2231 /// assert!(!grapheme_base.contains('\u{0D3E}')); // U+0D3E MALAYALAM VOWEL SIGN AA
2232 /// ```
2233
2234}
2235
2236make_binary_property! {
2237 name: "Grapheme_Extend";
2238 short_name: "Gr_Ext";
2239 ident: GraphemeExtend;
2240 data_marker: crate::provider::PropertyBinaryGraphemeExtendV1;
2241 singleton: SINGLETON_PROPERTY_BINARY_GRAPHEME_EXTEND_V1;
2242 /// Property used to define "Grapheme extender".
2243 ///
2244 /// See D59 in Chapter 3, Conformance in the
2245 /// Unicode Standard.
2246 ///
2247 /// # Example
2248 ///
2249 /// ```
2250 /// use icu::properties::CodePointSetData;
2251 /// use icu::properties::props::GraphemeExtend;
2252 ///
2253 /// let grapheme_extend = CodePointSetData::new::<GraphemeExtend>();
2254 ///
2255 /// assert!(!grapheme_extend.contains('ക')); // U+0D15 MALAYALAM LETTER KA
2256 /// assert!(!grapheme_extend.contains('\u{0D3F}')); // U+0D3F MALAYALAM VOWEL SIGN I
2257 /// assert!(grapheme_extend.contains('\u{0D3E}')); // U+0D3E MALAYALAM VOWEL SIGN AA
2258 /// ```
2259
2260}
2261
2262make_binary_property! {
2263 name: "Grapheme_Link";
2264 short_name: "Gr_Link";
2265 ident: GraphemeLink;
2266 data_marker: crate::provider::PropertyBinaryGraphemeLinkV1;
2267 singleton: SINGLETON_PROPERTY_BINARY_GRAPHEME_LINK_V1;
2268 /// Deprecated property.
2269 ///
2270 /// Formerly proposed for programmatic determination of grapheme
2271 /// cluster boundaries.
2272}
2273
2274make_binary_property! {
2275 name: "Hex_Digit";
2276 short_name: "Hex";
2277 ident: HexDigit;
2278 data_marker: crate::provider::PropertyBinaryHexDigitV1;
2279 singleton: SINGLETON_PROPERTY_BINARY_HEX_DIGIT_V1;
2280 /// Characters commonly used for the representation of hexadecimal numbers, plus their
2281 /// compatibility equivalents.
2282 ///
2283 /// # Example
2284 ///
2285 /// ```
2286 /// use icu::properties::CodePointSetData;
2287 /// use icu::properties::props::HexDigit;
2288 ///
2289 /// let hex_digit = CodePointSetData::new::<HexDigit>();
2290 ///
2291 /// assert!(hex_digit.contains('0'));
2292 /// assert!(!hex_digit.contains('੩')); // U+0A69 GURMUKHI DIGIT THREE
2293 /// assert!(hex_digit.contains('f'));
2294 /// assert!(hex_digit.contains('f')); // U+FF46 FULLWIDTH LATIN SMALL LETTER F
2295 /// assert!(hex_digit.contains('F')); // U+FF26 FULLWIDTH LATIN CAPITAL LETTER F
2296 /// assert!(!hex_digit.contains('Ä')); // U+00C4 LATIN CAPITAL LETTER A WITH DIAERESIS
2297 /// ```
2298}
2299
2300make_binary_property! {
2301 name: "Hyphen";
2302 short_name: "Hyphen";
2303 ident: Hyphen;
2304 data_marker: crate::provider::PropertyBinaryHyphenV1;
2305 singleton: SINGLETON_PROPERTY_BINARY_HYPHEN_V1;
2306 /// Deprecated property.
2307 ///
2308 /// Dashes which are used to mark connections between pieces of
2309 /// words, plus the Katakana middle dot.
2310}
2311
2312make_binary_property! {
2313 name: "Id_Continue";
2314 short_name: "IDC";
2315 ident: IdContinue;
2316 data_marker: crate::provider::PropertyBinaryIdContinueV1;
2317 singleton: SINGLETON_PROPERTY_BINARY_ID_CONTINUE_V1;
2318 /// Characters that can come after the first character in an identifier.
2319 ///
2320 /// If using NFKC to
2321 /// fold differences between characters, use [`XidContinue`] instead. See
2322 /// [`Unicode Standard Annex #31`](https://www.unicode.org/reports/tr31/tr31-35.html) for
2323 /// more details.
2324 ///
2325 /// # Example
2326 ///
2327 /// ```
2328 /// use icu::properties::CodePointSetData;
2329 /// use icu::properties::props::IdContinue;
2330 ///
2331 /// let id_continue = CodePointSetData::new::<IdContinue>();
2332 ///
2333 /// assert!(id_continue.contains('x'));
2334 /// assert!(id_continue.contains('1'));
2335 /// assert!(id_continue.contains('_'));
2336 /// assert!(id_continue.contains('ߝ')); // U+07DD NKO LETTER FA
2337 /// assert!(!id_continue.contains('ⓧ')); // U+24E7 CIRCLED LATIN SMALL LETTER X
2338 /// assert!(id_continue.contains('\u{FC5E}')); // ARABIC LIGATURE SHADDA WITH DAMMATAN ISOLATED FORM
2339 /// ```
2340}
2341
2342make_binary_property! {
2343 name: "Ideographic";
2344 short_name: "Ideo";
2345 ident: Ideographic;
2346 data_marker: crate::provider::PropertyBinaryIdeographicV1;
2347 singleton: SINGLETON_PROPERTY_BINARY_IDEOGRAPHIC_V1;
2348 /// Characters considered to be CJKV (Chinese, Japanese, Korean, and Vietnamese)
2349 /// ideographs, or related siniform ideographs
2350 ///
2351 /// # Example
2352 ///
2353 /// ```
2354 /// use icu::properties::CodePointSetData;
2355 /// use icu::properties::props::Ideographic;
2356 ///
2357 /// let ideographic = CodePointSetData::new::<Ideographic>();
2358 ///
2359 /// assert!(ideographic.contains('川')); // U+5DDD CJK UNIFIED IDEOGRAPH-5DDD
2360 /// assert!(!ideographic.contains('밥')); // U+BC25 HANGUL SYLLABLE BAB
2361 /// ```
2362}
2363
2364make_binary_property! {
2365 name: "Id_Start";
2366 short_name: "IDS";
2367 ident: IdStart;
2368 data_marker: crate::provider::PropertyBinaryIdStartV1;
2369 singleton: SINGLETON_PROPERTY_BINARY_ID_START_V1;
2370 /// Characters that can begin an identifier.
2371 ///
2372 /// If using NFKC to fold differences between
2373 /// characters, use [`XidStart`] instead. See [`Unicode Standard Annex
2374 /// #31`](https://www.unicode.org/reports/tr31/tr31-35.html) for more details.
2375 ///
2376 /// # Example
2377 ///
2378 /// ```
2379 /// use icu::properties::CodePointSetData;
2380 /// use icu::properties::props::IdStart;
2381 ///
2382 /// let id_start = CodePointSetData::new::<IdStart>();
2383 ///
2384 /// assert!(id_start.contains('x'));
2385 /// assert!(!id_start.contains('1'));
2386 /// assert!(!id_start.contains('_'));
2387 /// assert!(id_start.contains('ߝ')); // U+07DD NKO LETTER FA
2388 /// assert!(!id_start.contains('ⓧ')); // U+24E7 CIRCLED LATIN SMALL LETTER X
2389 /// assert!(id_start.contains('\u{FC5E}')); // ARABIC LIGATURE SHADDA WITH DAMMATAN ISOLATED FORM
2390 /// ```
2391}
2392
2393make_binary_property! {
2394 name: "Ids_Binary_Operator";
2395 short_name: "IDSB";
2396 ident: IdsBinaryOperator;
2397 data_marker: crate::provider::PropertyBinaryIdsBinaryOperatorV1;
2398 singleton: SINGLETON_PROPERTY_BINARY_IDS_BINARY_OPERATOR_V1;
2399 /// Characters used in Ideographic Description Sequences.
2400 ///
2401 /// # Example
2402 ///
2403 /// ```
2404 /// use icu::properties::CodePointSetData;
2405 /// use icu::properties::props::IdsBinaryOperator;
2406 ///
2407 /// let ids_binary_operator = CodePointSetData::new::<IdsBinaryOperator>();
2408 ///
2409 /// assert!(ids_binary_operator.contains('\u{2FF5}')); // IDEOGRAPHIC DESCRIPTION CHARACTER SURROUND FROM ABOVE
2410 /// assert!(!ids_binary_operator.contains('\u{3006}')); // IDEOGRAPHIC CLOSING MARK
2411 /// ```
2412}
2413
2414make_binary_property! {
2415 name: "Ids_Trinary_Operator";
2416 short_name: "IDST";
2417 ident: IdsTrinaryOperator;
2418 data_marker: crate::provider::PropertyBinaryIdsTrinaryOperatorV1;
2419 singleton: SINGLETON_PROPERTY_BINARY_IDS_TRINARY_OPERATOR_V1;
2420 /// Characters used in Ideographic Description Sequences.
2421 ///
2422 /// # Example
2423 ///
2424 /// ```
2425 /// use icu::properties::CodePointSetData;
2426 /// use icu::properties::props::IdsTrinaryOperator;
2427 ///
2428 /// let ids_trinary_operator = CodePointSetData::new::<IdsTrinaryOperator>();
2429 ///
2430 /// assert!(ids_trinary_operator.contains('\u{2FF2}')); // IDEOGRAPHIC DESCRIPTION CHARACTER LEFT TO MIDDLE AND RIGHT
2431 /// assert!(ids_trinary_operator.contains('\u{2FF3}')); // IDEOGRAPHIC DESCRIPTION CHARACTER ABOVE TO MIDDLE AND BELOW
2432 /// assert!(!ids_trinary_operator.contains('\u{2FF4}'));
2433 /// assert!(!ids_trinary_operator.contains('\u{2FF5}')); // IDEOGRAPHIC DESCRIPTION CHARACTER SURROUND FROM ABOVE
2434 /// assert!(!ids_trinary_operator.contains('\u{3006}')); // IDEOGRAPHIC CLOSING MARK
2435 /// ```
2436}
2437
2438make_binary_property! {
2439 name: "Join_Control";
2440 short_name: "Join_C";
2441 ident: JoinControl;
2442 data_marker: crate::provider::PropertyBinaryJoinControlV1;
2443 singleton: SINGLETON_PROPERTY_BINARY_JOIN_CONTROL_V1;
2444 /// Format control characters which have specific functions for control of cursive joining
2445 /// and ligation.
2446 ///
2447 /// # Example
2448 ///
2449 /// ```
2450 /// use icu::properties::CodePointSetData;
2451 /// use icu::properties::props::JoinControl;
2452 ///
2453 /// let join_control = CodePointSetData::new::<JoinControl>();
2454 ///
2455 /// assert!(join_control.contains('\u{200C}')); // ZERO WIDTH NON-JOINER
2456 /// assert!(join_control.contains('\u{200D}')); // ZERO WIDTH JOINER
2457 /// assert!(!join_control.contains('\u{200E}'));
2458 /// ```
2459}
2460
2461make_binary_property! {
2462 name: "Logical_Order_Exception";
2463 short_name: "LOE";
2464 ident: LogicalOrderException;
2465 data_marker: crate::provider::PropertyBinaryLogicalOrderExceptionV1;
2466 singleton: SINGLETON_PROPERTY_BINARY_LOGICAL_ORDER_EXCEPTION_V1;
2467 /// A small number of spacing vowel letters occurring in certain Southeast Asian scripts such as Thai and Lao.
2468 ///
2469 /// # Example
2470 ///
2471 /// ```
2472 /// use icu::properties::CodePointSetData;
2473 /// use icu::properties::props::LogicalOrderException;
2474 ///
2475 /// let logical_order_exception = CodePointSetData::new::<LogicalOrderException>();
2476 ///
2477 /// assert!(logical_order_exception.contains('ແ')); // U+0EC1 LAO VOWEL SIGN EI
2478 /// assert!(!logical_order_exception.contains('ະ')); // U+0EB0 LAO VOWEL SIGN A
2479 /// ```
2480}
2481
2482make_binary_property! {
2483 name: "Lowercase";
2484 short_name: "Lower";
2485 ident: Lowercase;
2486 data_marker: crate::provider::PropertyBinaryLowercaseV1;
2487 singleton: SINGLETON_PROPERTY_BINARY_LOWERCASE_V1;
2488 /// Lowercase characters.
2489 ///
2490 /// # Example
2491 ///
2492 /// ```
2493 /// use icu::properties::CodePointSetData;
2494 /// use icu::properties::props::Lowercase;
2495 ///
2496 /// let lowercase = CodePointSetData::new::<Lowercase>();
2497 ///
2498 /// assert!(lowercase.contains('a'));
2499 /// assert!(!lowercase.contains('A'));
2500 /// ```
2501}
2502
2503make_binary_property! {
2504 name: "Math";
2505 short_name: "Math";
2506 ident: Math;
2507 data_marker: crate::provider::PropertyBinaryMathV1;
2508 singleton: SINGLETON_PROPERTY_BINARY_MATH_V1;
2509 /// Characters used in mathematical notation.
2510 ///
2511 /// # Example
2512 ///
2513 /// ```
2514 /// use icu::properties::CodePointSetData;
2515 /// use icu::properties::props::Math;
2516 ///
2517 /// let math = CodePointSetData::new::<Math>();
2518 ///
2519 /// assert!(math.contains('='));
2520 /// assert!(math.contains('+'));
2521 /// assert!(!math.contains('-'));
2522 /// assert!(math.contains('−')); // U+2212 MINUS SIGN
2523 /// assert!(!math.contains('/'));
2524 /// assert!(math.contains('∕')); // U+2215 DIVISION SLASH
2525 /// ```
2526}
2527
2528make_binary_property! {
2529 name: "Noncharacter_Code_Point";
2530 short_name: "NChar";
2531 ident: NoncharacterCodePoint;
2532 data_marker: crate::provider::PropertyBinaryNoncharacterCodePointV1;
2533 singleton: SINGLETON_PROPERTY_BINARY_NONCHARACTER_CODE_POINT_V1;
2534 /// Code points permanently reserved for internal use.
2535 ///
2536 /// # Example
2537 ///
2538 /// ```
2539 /// use icu::properties::CodePointSetData;
2540 /// use icu::properties::props::NoncharacterCodePoint;
2541 ///
2542 /// let noncharacter_code_point = CodePointSetData::new::<NoncharacterCodePoint>();
2543 ///
2544 /// assert!(noncharacter_code_point.contains('\u{FDD0}'));
2545 /// assert!(noncharacter_code_point.contains('\u{FFFF}'));
2546 /// assert!(!noncharacter_code_point.contains('\u{10000}'));
2547 /// ```
2548}
2549
2550make_binary_property! {
2551 name: "NFC_Inert";
2552 short_name: "NFC_Inert";
2553 ident: NfcInert;
2554 data_marker: crate::provider::PropertyBinaryNfcInertV1;
2555 singleton: SINGLETON_PROPERTY_BINARY_NFC_INERT_V1;
2556 /// Characters that are inert under NFC, i.e., they do not interact with adjacent characters.
2557}
2558
2559make_binary_property! {
2560 name: "NFD_Inert";
2561 short_name: "NFD_Inert";
2562 ident: NfdInert;
2563 data_marker: crate::provider::PropertyBinaryNfdInertV1;
2564 singleton: SINGLETON_PROPERTY_BINARY_NFD_INERT_V1;
2565 /// Characters that are inert under NFD, i.e., they do not interact with adjacent characters.
2566}
2567
2568make_binary_property! {
2569 name: "NFKC_Inert";
2570 short_name: "NFKC_Inert";
2571 ident: NfkcInert;
2572 data_marker: crate::provider::PropertyBinaryNfkcInertV1;
2573 singleton: SINGLETON_PROPERTY_BINARY_NFKC_INERT_V1;
2574 /// Characters that are inert under NFKC, i.e., they do not interact with adjacent characters.
2575}
2576
2577make_binary_property! {
2578 name: "NFKD_Inert";
2579 short_name: "NFKD_Inert";
2580 ident: NfkdInert;
2581 data_marker: crate::provider::PropertyBinaryNfkdInertV1;
2582 singleton: SINGLETON_PROPERTY_BINARY_NFKD_INERT_V1;
2583 /// Characters that are inert under NFKD, i.e., they do not interact with adjacent characters.
2584}
2585
2586make_binary_property! {
2587 name: "Pattern_Syntax";
2588 short_name: "Pat_Syn";
2589 ident: PatternSyntax;
2590 data_marker: crate::provider::PropertyBinaryPatternSyntaxV1;
2591 singleton: SINGLETON_PROPERTY_BINARY_PATTERN_SYNTAX_V1;
2592 /// Characters used as syntax in patterns (such as regular expressions).
2593 ///
2594 /// See [`Unicode
2595 /// Standard Annex #31`](https://www.unicode.org/reports/tr31/tr31-35.html) for more
2596 /// details.
2597 ///
2598 /// # Example
2599 ///
2600 /// ```
2601 /// use icu::properties::CodePointSetData;
2602 /// use icu::properties::props::PatternSyntax;
2603 ///
2604 /// let pattern_syntax = CodePointSetData::new::<PatternSyntax>();
2605 ///
2606 /// assert!(pattern_syntax.contains('{'));
2607 /// assert!(pattern_syntax.contains('⇒')); // U+21D2 RIGHTWARDS DOUBLE ARROW
2608 /// assert!(!pattern_syntax.contains('0'));
2609 /// ```
2610}
2611
2612make_binary_property! {
2613 name: "Pattern_White_Space";
2614 short_name: "Pat_WS";
2615 ident: PatternWhiteSpace;
2616 data_marker: crate::provider::PropertyBinaryPatternWhiteSpaceV1;
2617 singleton: SINGLETON_PROPERTY_BINARY_PATTERN_WHITE_SPACE_V1;
2618 /// Characters used as whitespace in patterns (such as regular expressions).
2619 ///
2620 /// See
2621 /// [`Unicode Standard Annex #31`](https://www.unicode.org/reports/tr31/tr31-35.html) for
2622 /// more details.
2623 ///
2624 /// # Example
2625 ///
2626 /// ```
2627 /// use icu::properties::CodePointSetData;
2628 /// use icu::properties::props::PatternWhiteSpace;
2629 ///
2630 /// let pattern_white_space = CodePointSetData::new::<PatternWhiteSpace>();
2631 ///
2632 /// assert!(pattern_white_space.contains(' '));
2633 /// assert!(pattern_white_space.contains('\u{2029}')); // PARAGRAPH SEPARATOR
2634 /// assert!(pattern_white_space.contains('\u{000A}')); // NEW LINE
2635 /// assert!(!pattern_white_space.contains('\u{00A0}')); // NO-BREAK SPACE
2636 /// ```
2637}
2638
2639make_binary_property! {
2640 name: "Prepended_Concatenation_Mark";
2641 short_name: "PCM";
2642 ident: PrependedConcatenationMark;
2643 data_marker: crate::provider::PropertyBinaryPrependedConcatenationMarkV1;
2644 singleton: SINGLETON_PROPERTY_BINARY_PREPENDED_CONCATENATION_MARK_V1;
2645 /// A small class of visible format controls, which precede and then span a sequence of
2646 /// other characters, usually digits.
2647}
2648
2649make_binary_property! {
2650 name: "Print";
2651 short_name: "Print";
2652 ident: Print;
2653 data_marker: crate::provider::PropertyBinaryPrintV1;
2654 singleton: SINGLETON_PROPERTY_BINARY_PRINT_V1;
2655 /// Printable characters (visible characters and whitespace).
2656 ///
2657 /// This is defined for POSIX compatibility.
2658}
2659
2660make_binary_property! {
2661 name: "Quotation_Mark";
2662 short_name: "QMark";
2663 ident: QuotationMark;
2664 data_marker: crate::provider::PropertyBinaryQuotationMarkV1;
2665 singleton: SINGLETON_PROPERTY_BINARY_QUOTATION_MARK_V1;
2666 /// Punctuation characters that function as quotation marks.
2667 ///
2668 /// # Example
2669 ///
2670 /// ```
2671 /// use icu::properties::CodePointSetData;
2672 /// use icu::properties::props::QuotationMark;
2673 ///
2674 /// let quotation_mark = CodePointSetData::new::<QuotationMark>();
2675 ///
2676 /// assert!(quotation_mark.contains('\''));
2677 /// assert!(quotation_mark.contains('„')); // U+201E DOUBLE LOW-9 QUOTATION MARK
2678 /// assert!(!quotation_mark.contains('<'));
2679 /// ```
2680}
2681
2682make_binary_property! {
2683 name: "Radical";
2684 short_name: "Radical";
2685 ident: Radical;
2686 data_marker: crate::provider::PropertyBinaryRadicalV1;
2687 singleton: SINGLETON_PROPERTY_BINARY_RADICAL_V1;
2688 /// Characters used in the definition of Ideographic Description Sequences.
2689 ///
2690 /// # Example
2691 ///
2692 /// ```
2693 /// use icu::properties::CodePointSetData;
2694 /// use icu::properties::props::Radical;
2695 ///
2696 /// let radical = CodePointSetData::new::<Radical>();
2697 ///
2698 /// assert!(radical.contains('⺆')); // U+2E86 CJK RADICAL BOX
2699 /// assert!(!radical.contains('丹')); // U+F95E CJK COMPATIBILITY IDEOGRAPH-F95E
2700 /// ```
2701}
2702
2703make_binary_property! {
2704 name: "Regional_Indicator";
2705 short_name: "RI";
2706 ident: RegionalIndicator;
2707 data_marker: crate::provider::PropertyBinaryRegionalIndicatorV1;
2708 singleton: SINGLETON_PROPERTY_BINARY_REGIONAL_INDICATOR_V1;
2709 /// Regional indicator characters, `U+1F1E6..U+1F1FF`.
2710 ///
2711 /// # Example
2712 ///
2713 /// ```
2714 /// use icu::properties::CodePointSetData;
2715 /// use icu::properties::props::RegionalIndicator;
2716 ///
2717 /// let regional_indicator = CodePointSetData::new::<RegionalIndicator>();
2718 ///
2719 /// assert!(regional_indicator.contains('🇹')); // U+1F1F9 REGIONAL INDICATOR SYMBOL LETTER T
2720 /// assert!(!regional_indicator.contains('Ⓣ')); // U+24C9 CIRCLED LATIN CAPITAL LETTER T
2721 /// assert!(!regional_indicator.contains('T'));
2722 /// ```
2723}
2724
2725make_binary_property! {
2726 name: "Soft_Dotted";
2727 short_name: "SD";
2728 ident: SoftDotted;
2729 data_marker: crate::provider::PropertyBinarySoftDottedV1;
2730 singleton: SINGLETON_PROPERTY_BINARY_SOFT_DOTTED_V1;
2731 /// Characters with a "soft dot", like i or j.
2732 ///
2733 /// An accent placed on these characters causes
2734 /// the dot to disappear.
2735 ///
2736 /// # Example
2737 ///
2738 /// ```
2739 /// use icu::properties::CodePointSetData;
2740 /// use icu::properties::props::SoftDotted;
2741 ///
2742 /// let soft_dotted = CodePointSetData::new::<SoftDotted>();
2743 ///
2744 /// assert!(soft_dotted.contains('і')); //U+0456 CYRILLIC SMALL LETTER BYELORUSSIAN-UKRAINIAN I
2745 /// assert!(!soft_dotted.contains('ı')); // U+0131 LATIN SMALL LETTER DOTLESS I
2746 /// ```
2747}
2748
2749make_binary_property! {
2750 name: "Segment_Starter";
2751 short_name: "Segment_Starter";
2752 ident: SegmentStarter;
2753 data_marker: crate::provider::PropertyBinarySegmentStarterV1;
2754 singleton: SINGLETON_PROPERTY_BINARY_SEGMENT_STARTER_V1;
2755 /// Characters that are starters in terms of Unicode normalization and combining character
2756 /// sequences.
2757}
2758
2759make_binary_property! {
2760 name: "Case_Sensitive";
2761 short_name: "Case_Sensitive";
2762 ident: CaseSensitive;
2763 data_marker: crate::provider::PropertyBinaryCaseSensitiveV1;
2764 singleton: SINGLETON_PROPERTY_BINARY_CASE_SENSITIVE_V1;
2765 /// Characters that are either the source of a case mapping or in the target of a case
2766 /// mapping.
2767}
2768
2769make_binary_property! {
2770 name: "Sentence_Terminal";
2771 short_name: "STerm";
2772 ident: SentenceTerminal;
2773 data_marker: crate::provider::PropertyBinarySentenceTerminalV1;
2774 singleton: SINGLETON_PROPERTY_BINARY_SENTENCE_TERMINAL_V1;
2775 /// Punctuation characters that generally mark the end of sentences.
2776 ///
2777 /// # Example
2778 ///
2779 /// ```
2780 /// use icu::properties::CodePointSetData;
2781 /// use icu::properties::props::SentenceTerminal;
2782 ///
2783 /// let sentence_terminal = CodePointSetData::new::<SentenceTerminal>();
2784 ///
2785 /// assert!(sentence_terminal.contains('.'));
2786 /// assert!(sentence_terminal.contains('?'));
2787 /// assert!(sentence_terminal.contains('᪨')); // U+1AA8 TAI THAM SIGN KAAN
2788 /// assert!(!sentence_terminal.contains(','));
2789 /// assert!(!sentence_terminal.contains('¿')); // U+00BF INVERTED QUESTION MARK
2790 /// ```
2791}
2792
2793make_binary_property! {
2794 name: "Terminal_Punctuation";
2795 short_name: "Term";
2796 ident: TerminalPunctuation;
2797 data_marker: crate::provider::PropertyBinaryTerminalPunctuationV1;
2798 singleton: SINGLETON_PROPERTY_BINARY_TERMINAL_PUNCTUATION_V1;
2799 /// Punctuation characters that generally mark the end of textual units.
2800 ///
2801 /// # Example
2802 ///
2803 /// ```
2804 /// use icu::properties::CodePointSetData;
2805 /// use icu::properties::props::TerminalPunctuation;
2806 ///
2807 /// let terminal_punctuation = CodePointSetData::new::<TerminalPunctuation>();
2808 ///
2809 /// assert!(terminal_punctuation.contains('.'));
2810 /// assert!(terminal_punctuation.contains('?'));
2811 /// assert!(terminal_punctuation.contains('᪨')); // U+1AA8 TAI THAM SIGN KAAN
2812 /// assert!(terminal_punctuation.contains(','));
2813 /// assert!(!terminal_punctuation.contains('¿')); // U+00BF INVERTED QUESTION MARK
2814 /// ```
2815}
2816
2817make_binary_property! {
2818 name: "Unified_Ideograph";
2819 short_name: "UIdeo";
2820 ident: UnifiedIdeograph;
2821 data_marker: crate::provider::PropertyBinaryUnifiedIdeographV1;
2822 singleton: SINGLETON_PROPERTY_BINARY_UNIFIED_IDEOGRAPH_V1;
2823 /// A property which specifies the exact set of Unified CJK Ideographs in the standard.
2824 ///
2825 /// # Example
2826 ///
2827 /// ```
2828 /// use icu::properties::CodePointSetData;
2829 /// use icu::properties::props::UnifiedIdeograph;
2830 ///
2831 /// let unified_ideograph = CodePointSetData::new::<UnifiedIdeograph>();
2832 ///
2833 /// assert!(unified_ideograph.contains('川')); // U+5DDD CJK UNIFIED IDEOGRAPH-5DDD
2834 /// assert!(unified_ideograph.contains('木')); // U+6728 CJK UNIFIED IDEOGRAPH-6728
2835 /// assert!(!unified_ideograph.contains('𛅸')); // U+1B178 NUSHU CHARACTER-1B178
2836 /// ```
2837}
2838
2839make_binary_property! {
2840 name: "Uppercase";
2841 short_name: "Upper";
2842 ident: Uppercase;
2843 data_marker: crate::provider::PropertyBinaryUppercaseV1;
2844 singleton: SINGLETON_PROPERTY_BINARY_UPPERCASE_V1;
2845 /// Uppercase characters.
2846 ///
2847 /// # Example
2848 ///
2849 /// ```
2850 /// use icu::properties::CodePointSetData;
2851 /// use icu::properties::props::Uppercase;
2852 ///
2853 /// let uppercase = CodePointSetData::new::<Uppercase>();
2854 ///
2855 /// assert!(uppercase.contains('U'));
2856 /// assert!(!uppercase.contains('u'));
2857 /// ```
2858}
2859
2860make_binary_property! {
2861 name: "Variation_Selector";
2862 short_name: "VS";
2863 ident: VariationSelector;
2864 data_marker: crate::provider::PropertyBinaryVariationSelectorV1;
2865 singleton: SINGLETON_PROPERTY_BINARY_VARIATION_SELECTOR_V1;
2866 /// Characters that are Variation Selectors.
2867 ///
2868 /// # Example
2869 ///
2870 /// ```
2871 /// use icu::properties::CodePointSetData;
2872 /// use icu::properties::props::VariationSelector;
2873 ///
2874 /// let variation_selector = CodePointSetData::new::<VariationSelector>();
2875 ///
2876 /// assert!(variation_selector.contains('\u{180D}')); // MONGOLIAN FREE VARIATION SELECTOR THREE
2877 /// assert!(!variation_selector.contains('\u{303E}')); // IDEOGRAPHIC VARIATION INDICATOR
2878 /// assert!(variation_selector.contains('\u{FE0F}')); // VARIATION SELECTOR-16
2879 /// assert!(!variation_selector.contains('\u{FE10}')); // PRESENTATION FORM FOR VERTICAL COMMA
2880 /// assert!(variation_selector.contains('\u{E01EF}')); // VARIATION SELECTOR-256
2881 /// ```
2882}
2883
2884make_binary_property! {
2885 name: "White_Space";
2886 short_name: "space";
2887 ident: WhiteSpace;
2888 data_marker: crate::provider::PropertyBinaryWhiteSpaceV1;
2889 singleton: SINGLETON_PROPERTY_BINARY_WHITE_SPACE_V1;
2890 /// Spaces, separator characters and other control characters which should be treated by
2891 /// programming languages as "white space" for the purpose of parsing elements.
2892 ///
2893 /// # Example
2894 ///
2895 /// ```
2896 /// use icu::properties::CodePointSetData;
2897 /// use icu::properties::props::WhiteSpace;
2898 ///
2899 /// let white_space = CodePointSetData::new::<WhiteSpace>();
2900 ///
2901 /// assert!(white_space.contains(' '));
2902 /// assert!(white_space.contains('\u{000A}')); // NEW LINE
2903 /// assert!(white_space.contains('\u{00A0}')); // NO-BREAK SPACE
2904 /// assert!(!white_space.contains('\u{200B}')); // ZERO WIDTH SPACE
2905 /// ```
2906}
2907
2908make_binary_property! {
2909 name: "Xdigit";
2910 short_name: "Xdigit";
2911 ident: Xdigit;
2912 data_marker: crate::provider::PropertyBinaryXdigitV1;
2913 singleton: SINGLETON_PROPERTY_BINARY_XDIGIT_V1;
2914 /// Hexadecimal digits
2915 ///
2916 /// This is defined for POSIX compatibility.
2917}
2918
2919make_binary_property! {
2920 name: "XID_Continue";
2921 short_name: "XIDC";
2922 ident: XidContinue;
2923 data_marker: crate::provider::PropertyBinaryXidContinueV1;
2924 singleton: SINGLETON_PROPERTY_BINARY_XID_CONTINUE_V1;
2925 /// Characters that can come after the first character in an identifier.
2926 ///
2927 /// See [`Unicode Standard Annex
2928 /// #31`](https://www.unicode.org/reports/tr31/tr31-35.html) for more details.
2929 ///
2930 /// # Example
2931 ///
2932 /// ```
2933 /// use icu::properties::CodePointSetData;
2934 /// use icu::properties::props::XidContinue;
2935 ///
2936 /// let xid_continue = CodePointSetData::new::<XidContinue>();
2937 ///
2938 /// assert!(xid_continue.contains('x'));
2939 /// assert!(xid_continue.contains('1'));
2940 /// assert!(xid_continue.contains('_'));
2941 /// assert!(xid_continue.contains('ߝ')); // U+07DD NKO LETTER FA
2942 /// assert!(!xid_continue.contains('ⓧ')); // U+24E7 CIRCLED LATIN SMALL LETTER X
2943 /// assert!(!xid_continue.contains('\u{FC5E}')); // ARABIC LIGATURE SHADDA WITH DAMMATAN ISOLATED FORM
2944 /// ```
2945}
2946
2947make_binary_property! {
2948 name: "XID_Start";
2949 short_name: "XIDS";
2950 ident: XidStart;
2951 data_marker: crate::provider::PropertyBinaryXidStartV1;
2952 singleton: SINGLETON_PROPERTY_BINARY_XID_START_V1;
2953 /// Characters that can begin an identifier.
2954 ///
2955 /// See [`Unicode
2956 /// Standard Annex #31`](https://www.unicode.org/reports/tr31/tr31-35.html) for more
2957 /// details.
2958 ///
2959 /// # Example
2960 ///
2961 /// ```
2962 /// use icu::properties::CodePointSetData;
2963 /// use icu::properties::props::XidStart;
2964 ///
2965 /// let xid_start = CodePointSetData::new::<XidStart>();
2966 ///
2967 /// assert!(xid_start.contains('x'));
2968 /// assert!(!xid_start.contains('1'));
2969 /// assert!(!xid_start.contains('_'));
2970 /// assert!(xid_start.contains('ߝ')); // U+07DD NKO LETTER FA
2971 /// assert!(!xid_start.contains('ⓧ')); // U+24E7 CIRCLED LATIN SMALL LETTER X
2972 /// assert!(!xid_start.contains('\u{FC5E}')); // ARABIC LIGATURE SHADDA WITH DAMMATAN ISOLATED FORM
2973 /// ```
2974}
2975
2976pub use crate::emoji::EmojiSet;
2977
2978macro_rules! make_emoji_set {
2979 (
2980 ident: $ident:ident;
2981 data_marker: $data_marker:ty;
2982 singleton: $singleton:ident;
2983 $(#[$doc:meta])+
2984 ) => {
2985 $(#[$doc])+
2986 #[derive(Debug)]
2987 #[non_exhaustive]
2988 pub struct $ident;
2989
2990 impl crate::private::Sealed for $ident {}
2991
2992 impl EmojiSet for $ident {
2993 type DataMarker = $data_marker;
2994 #[cfg(feature = "compiled_data")]
2995 const SINGLETON: &'static crate::provider::PropertyUnicodeSet<'static> =
2996 &crate::provider::Baked::$singleton;
2997 }
2998 }
2999}
3000
3001make_emoji_set! {
3002 ident: BasicEmoji;
3003 data_marker: crate::provider::PropertyBinaryBasicEmojiV1;
3004 singleton: SINGLETON_PROPERTY_BINARY_BASIC_EMOJI_V1;
3005 /// Characters and character sequences intended for general-purpose, independent, direct input.
3006 ///
3007 /// See [`Unicode Technical Standard #51`](https://unicode.org/reports/tr51/) for more
3008 /// details.
3009 ///
3010 /// # Example
3011 ///
3012 /// ```
3013 /// use icu::properties::EmojiSetData;
3014 /// use icu::properties::props::BasicEmoji;
3015 ///
3016 /// let basic_emoji = EmojiSetData::new::<BasicEmoji>();
3017 ///
3018 /// assert!(!basic_emoji.contains('\u{0020}'));
3019 /// assert!(!basic_emoji.contains('\n'));
3020 /// assert!(basic_emoji.contains('🦃')); // U+1F983 TURKEY
3021 /// assert!(basic_emoji.contains_str("\u{1F983}"));
3022 /// assert!(basic_emoji.contains_str("\u{1F6E4}\u{FE0F}")); // railway track
3023 /// assert!(!basic_emoji.contains_str("\u{0033}\u{FE0F}\u{20E3}")); // Emoji_Keycap_Sequence, keycap 3
3024 /// ```
3025}
3026
3027#[cfg(test)]
3028mod test_enumerated_property_completeness {
3029 use super::*;
3030 use std::collections::BTreeMap;
3031
3032 fn check_enum<'a, T: NamedEnumeratedProperty>(
3033 lookup: &crate::provider::names::PropertyValueNameToEnumMap<'static>,
3034 consts: impl IntoIterator<Item = &'a T>,
3035 ) where
3036 u16: From<T>,
3037 {
3038 let mut data: BTreeMap<_, _> = lookup
3039 .map
3040 .iter()
3041 .map(|(name, value)| (value, (name, "Data")))
3042 .collect();
3043
3044 let names = crate::PropertyNamesLong::<T>::new();
3045 let consts = consts.into_iter().map(|value| {
3046 (
3047 u16::from(*value) as usize,
3048 (
3049 names.get(*value).unwrap_or("<unknown>").to_string(),
3050 "Consts",
3051 ),
3052 )
3053 });
3054
3055 let mut diff = Vec::new();
3056 for t @ (value, _) in consts {
3057 if data.remove(&value).is_none() {
3058 diff.push(t);
3059 }
3060 }
3061 diff.extend(data);
3062
3063 let mut fmt_diff = String::new();
3064 for (value, (name, source)) in diff {
3065 fmt_diff.push_str(&format!("{source}:\t{name} = {value:?}\n"));
3066 }
3067
3068 assert!(
3069 fmt_diff.is_empty(),
3070 "Values defined in data do not match values defined in consts. Difference:\n{}",
3071 fmt_diff
3072 );
3073 }
3074
3075 #[test]
3076 fn test_ea() {
3077 check_enum(
3078 crate::provider::Baked::SINGLETON_PROPERTY_NAME_PARSE_EAST_ASIAN_WIDTH_V1,
3079 EastAsianWidth::ALL_VALUES,
3080 );
3081 }
3082
3083 #[test]
3084 fn test_ccc() {
3085 check_enum(
3086 crate::provider::Baked::SINGLETON_PROPERTY_NAME_PARSE_CANONICAL_COMBINING_CLASS_V1,
3087 CanonicalCombiningClass::ALL_VALUES,
3088 );
3089 }
3090
3091 #[test]
3092 fn test_jt() {
3093 check_enum(
3094 crate::provider::Baked::SINGLETON_PROPERTY_NAME_PARSE_JOINING_TYPE_V1,
3095 JoiningType::ALL_VALUES,
3096 );
3097 }
3098
3099 #[test]
3100 fn test_insc() {
3101 check_enum(
3102 crate::provider::Baked::SINGLETON_PROPERTY_NAME_PARSE_INDIC_SYLLABIC_CATEGORY_V1,
3103 IndicSyllabicCategory::ALL_VALUES,
3104 );
3105 }
3106
3107 #[test]
3108 fn test_sb() {
3109 check_enum(
3110 crate::provider::Baked::SINGLETON_PROPERTY_NAME_PARSE_SENTENCE_BREAK_V1,
3111 SentenceBreak::ALL_VALUES,
3112 );
3113 }
3114
3115 #[test]
3116 fn test_wb() {
3117 check_enum(
3118 crate::provider::Baked::SINGLETON_PROPERTY_NAME_PARSE_WORD_BREAK_V1,
3119 WordBreak::ALL_VALUES,
3120 );
3121 }
3122
3123 #[test]
3124 fn test_bc() {
3125 check_enum(
3126 crate::provider::Baked::SINGLETON_PROPERTY_NAME_PARSE_BIDI_CLASS_V1,
3127 BidiClass::ALL_VALUES,
3128 );
3129 }
3130
3131 #[test]
3132 fn test_hst() {
3133 check_enum(
3134 crate::provider::Baked::SINGLETON_PROPERTY_NAME_PARSE_HANGUL_SYLLABLE_TYPE_V1,
3135 HangulSyllableType::ALL_VALUES,
3136 );
3137 }
3138
3139 #[test]
3140 fn test_vo() {
3141 check_enum(
3142 crate::provider::Baked::SINGLETON_PROPERTY_NAME_PARSE_VERTICAL_ORIENTATION_V1,
3143 VerticalOrientation::ALL_VALUES,
3144 );
3145 }
3146}