1#[cfg(feature = "datagen")]
8use alloc::collections::BTreeMap;
9use core::num::TryFromIntError;
10use icu_collections::codepointtrie::TrieValue;
11use zerovec::ule::{AsULE, RawBytesULE, UleError, ULE};
12
13#[derive(Copy, Clone, Debug, Eq, PartialEq)]
21#[cfg_attr(feature = "serde", derive(serde::Deserialize))]
22#[cfg_attr(feature = "datagen", derive(serde::Serialize, databake::Bake))]
23#[cfg_attr(feature = "datagen", databake(path = icu_casemap::provider::data))]
24pub enum CaseType {
25 Lower = 1,
27 Upper = 2,
29 Title = 3,
31}
32
33impl CaseType {
34 pub(crate) const CASE_MASK: u16 = 0x3;
35
36 #[inline]
42 pub(crate) fn from_masked_bits(b: u16) -> Option<Self> {
43 debug_assert!(b & Self::CASE_MASK == b);
44 match b {
45 0 => None,
46 1 => Some(CaseType::Lower),
47 2 => Some(CaseType::Upper),
48 _ => Some(CaseType::Title),
49 }
50 }
51}
52
53#[derive(Copy, Clone, Debug, Eq, PartialEq)]
63#[cfg_attr(feature = "serde", derive(serde::Deserialize))]
64#[cfg_attr(feature = "datagen", derive(serde::Serialize, databake::Bake))]
65#[cfg_attr(feature = "datagen", databake(path = icu_casemap::provider::data))]
66#[derive(Default)]
67pub enum DotType {
68 #[default]
70 NoDot = 0,
71 SoftDotted = 1,
73 Above = 2,
75 OtherAccent = 3,
77}
78
79impl DotType {
80 pub(crate) const DOT_MASK: u16 = 0x3;
81
82 #[inline]
87 pub(crate) fn from_masked_bits(b: u16) -> Self {
88 debug_assert!(b & Self::DOT_MASK == b);
89 match b {
90 0 => DotType::NoDot,
91 1 => DotType::SoftDotted,
92 2 => DotType::Above,
93 _ => DotType::OtherAccent,
94 }
95 }
96}
97
98#[derive(Copy, Clone, Debug, Eq, PartialEq)]
99pub(crate) enum MappingKind {
100 Lower = 0,
101 Fold = 1,
102 Upper = 2,
103 Title = 3,
104}
105
106#[derive(Copy, Clone, Debug, Eq, PartialEq)]
114#[cfg_attr(feature = "serde", derive(serde::Deserialize))]
115#[cfg_attr(feature = "datagen", derive(serde::Serialize, databake::Bake))]
116#[cfg_attr(feature = "datagen", databake(path = icu_casemap::provider::data))]
117pub struct CaseMapData {
118 pub ignoreable: bool,
120 pub kind: CaseMapDataKind,
122}
123
124#[cfg_attr(feature = "serde", derive(serde::Deserialize))]
132#[cfg_attr(feature = "datagen", derive(serde::Serialize, databake::Bake))]
133#[cfg_attr(feature = "datagen", databake(path = icu_casemap::provider::data))]
134#[derive(Copy, Clone, Debug, Eq, PartialEq)]
135pub enum CaseMapDataKind {
136 Exception(Option<CaseType>, u16),
141 Uncased(NonExceptionData),
143 Delta(NonExceptionData, CaseType, i16),
146}
147
148#[cfg_attr(feature = "serde", derive(serde::Deserialize))]
156#[cfg_attr(feature = "datagen", derive(serde::Serialize, databake::Bake))]
157#[cfg_attr(feature = "datagen", databake(path = icu_casemap::provider::data))]
158#[derive(Copy, Clone, Debug, Eq, PartialEq)]
159pub struct NonExceptionData {
160 pub sensitive: bool,
162 pub dot_type: DotType,
164}
165
166impl CaseMapData {
167 #[inline]
168 pub(crate) fn case_type(self) -> Option<CaseType> {
169 match self.kind {
170 CaseMapDataKind::Exception(case_type, ..) => case_type,
171 CaseMapDataKind::Delta(_, case_type, _) => Some(case_type),
172 CaseMapDataKind::Uncased(..) => None,
173 }
174 }
175
176 #[inline]
177 pub(crate) fn is_upper_or_title(self) -> bool {
178 match self.case_type() {
179 None | Some(CaseType::Lower) => false,
180 Some(CaseType::Upper) | Some(CaseType::Title) => true,
181 }
182 }
183
184 #[inline]
185 pub(crate) fn is_relevant_to(self, kind: MappingKind) -> bool {
186 match kind {
187 MappingKind::Lower | MappingKind::Fold => self.is_upper_or_title(),
188 MappingKind::Upper | MappingKind::Title => self.case_type() == Some(CaseType::Lower),
189 }
190 }
191
192 #[inline]
193 pub(crate) fn is_ignorable(self) -> bool {
194 self.ignoreable
195 }
196
197 #[inline]
198 pub(crate) fn has_exception(self) -> bool {
199 matches!(self.kind, CaseMapDataKind::Exception(..))
200 }
201
202 #[inline]
206 pub(crate) fn is_sensitive(self) -> bool {
207 match self.kind {
208 CaseMapDataKind::Exception(..) => false,
209 CaseMapDataKind::Delta(ned, ..) => ned.sensitive,
210 CaseMapDataKind::Uncased(ned) => ned.sensitive,
211 }
212 }
213
214 #[inline]
215 pub(crate) fn dot_type(self) -> DotType {
216 match self.kind {
217 CaseMapDataKind::Exception(..) => DotType::NoDot,
218 CaseMapDataKind::Delta(ned, ..) => ned.dot_type,
219 CaseMapDataKind::Uncased(ned) => ned.dot_type,
220 }
221 }
222
223 #[inline]
228 pub(crate) fn delta(self) -> i16 {
229 debug_assert!(!self.has_exception());
230 match self.kind {
231 CaseMapDataKind::Exception(..) => 0,
232 CaseMapDataKind::Delta(.., delta) => delta,
233 CaseMapDataKind::Uncased(..) => 0,
234 }
235 }
236
237 #[inline]
240 pub(crate) fn exception_index(self) -> u16 {
241 debug_assert!(self.has_exception());
242 if let CaseMapDataKind::Exception(_, i) = self.kind {
243 i
244 } else {
245 0
246 }
247 }
248
249 #[cfg(feature = "datagen")]
256 pub(crate) fn with_updated_exception(self, updates: &BTreeMap<u16, u16>) -> Self {
257 let kind = if let CaseMapDataKind::Exception(ty, index) = self.kind {
258 if let Some(updated_exception) = updates.get(&index) {
259 CaseMapDataKind::Exception(ty, *updated_exception)
260 } else {
261 self.kind
262 }
263 } else {
264 self.kind
265 };
266
267 Self { kind, ..self }
268 }
269
270 #[cfg(any(feature = "datagen", test))]
272 pub(crate) fn try_from_icu_integer(int: u16) -> Result<Self, UleError> {
273 let raw = int.to_unaligned();
274 CaseMapDataULE::validate_bytes(raw.as_bytes())?;
275
276 let this = Self::from_unaligned(CaseMapDataULE(raw));
277 Ok(this)
278 }
279}
280
281impl TrieValue for CaseMapData {
282 type TryFromU32Error = TryFromIntError;
283
284 fn try_from_u32(i: u32) -> Result<Self, Self::TryFromU32Error> {
285 u16::try_from(i).map(|u| AsULE::from_unaligned(CaseMapDataULE(u.to_unaligned())))
286 }
287
288 fn to_u32(self) -> u32 {
289 u32::from(self.to_unaligned().0.as_unsigned_int())
290 }
291}
292
293#[derive(Copy, Clone, Debug, Eq, PartialEq)]
332#[repr(transparent)]
333pub struct CaseMapDataULE(RawBytesULE<2>);
334
335impl CaseMapDataULE {
336 const CASE_TYPE_BITS: u16 = 0x3;
338 const CASE_IGNOREABLE_BIT: u16 = 0x4;
340 const EXCEPTION_BIT: u16 = 0x8;
342 const CASE_SENSITIVE_BIT: u16 = 0x10;
344 const EXCEPTION_SHIFT: u16 = 4;
346 const DELTA_SHIFT: u16 = 7;
348 const DOT_TYPE_BITS: u16 = 0x60;
350 const DOT_SHIFT: u16 = 5;
351}
352
353unsafe impl ULE for CaseMapDataULE {
369 fn validate_bytes(bytes: &[u8]) -> Result<(), UleError> {
370 let sixteens = RawBytesULE::<2>::parse_bytes_to_slice(bytes)?;
371
372 for sixteen in sixteens {
373 let sixteen = sixteen.as_unsigned_int();
374 if sixteen & Self::EXCEPTION_BIT == 0 {
377 if sixteen & Self::CASE_TYPE_BITS == 0 {
379 if sixteen >> Self::DELTA_SHIFT != 0 {
381 return Err(UleError::parse::<Self>());
383 }
384 }
385 }
386 }
387 Ok(())
388 }
389}
390
391impl AsULE for CaseMapData {
392 type ULE = CaseMapDataULE;
393
394 fn from_unaligned(ule: Self::ULE) -> Self {
395 let sixteen = ule.0.as_unsigned_int();
396
397 let ignoreable = (sixteen & CaseMapDataULE::CASE_IGNOREABLE_BIT) != 0;
398 let exception = (sixteen & CaseMapDataULE::EXCEPTION_BIT) != 0;
399
400 let case_type = sixteen & CaseMapDataULE::CASE_TYPE_BITS;
401 let case_type = CaseType::from_masked_bits(case_type);
402 let kind = if exception {
403 let exception = sixteen >> CaseMapDataULE::EXCEPTION_SHIFT;
405 CaseMapDataKind::Exception(case_type, exception)
406 } else {
407 let dot_type = (sixteen & CaseMapDataULE::DOT_TYPE_BITS) >> CaseMapDataULE::DOT_SHIFT;
408 let dot_type = DotType::from_masked_bits(dot_type);
409 let sensitive = (sixteen & CaseMapDataULE::CASE_SENSITIVE_BIT) != 0;
410 let ned = NonExceptionData {
411 dot_type,
412 sensitive,
413 };
414 if let Some(case_type) = case_type {
415 let delta = (sixteen as i16) >> CaseMapDataULE::DELTA_SHIFT;
419 CaseMapDataKind::Delta(ned, case_type, delta)
420 } else {
421 CaseMapDataKind::Uncased(ned)
422 }
423 };
424 CaseMapData { ignoreable, kind }
425 }
426
427 fn to_unaligned(self) -> Self::ULE {
428 let mut sixteen = 0;
429 if self.ignoreable {
430 sixteen |= CaseMapDataULE::CASE_IGNOREABLE_BIT;
431 }
432 match self.kind {
433 CaseMapDataKind::Exception(case_type, e) => {
434 sixteen |= CaseMapDataULE::EXCEPTION_BIT;
435 sixteen |= e << CaseMapDataULE::EXCEPTION_SHIFT;
436 sixteen |= case_type.map(|c| c as u16).unwrap_or(0);
437 }
438 CaseMapDataKind::Uncased(ned) => {
439 sixteen |= (ned.dot_type as u16) << CaseMapDataULE::DOT_SHIFT;
440 if ned.sensitive {
441 sixteen |= CaseMapDataULE::CASE_SENSITIVE_BIT;
442 }
443 }
446 CaseMapDataKind::Delta(ned, case_type, delta) => {
447 sixteen |= (delta << CaseMapDataULE::DELTA_SHIFT) as u16;
450 sixteen |= (ned.dot_type as u16) << CaseMapDataULE::DOT_SHIFT;
451 if ned.sensitive {
452 sixteen |= CaseMapDataULE::CASE_SENSITIVE_BIT;
453 }
454 sixteen |= case_type as u16;
455 }
456 }
457 CaseMapDataULE(sixteen.to_unaligned())
458 }
459}
460
461#[cfg(test)]
462mod tests {
463 use super::*;
464
465 #[test]
466 fn test_roundtrip() {
467 const TESTCASES: &[CaseMapData] = &[
468 CaseMapData {
469 ignoreable: true,
470 kind: CaseMapDataKind::Exception(Some(CaseType::Title), 923),
471 },
472 CaseMapData {
473 ignoreable: false,
474 kind: CaseMapDataKind::Exception(None, 923),
475 },
476 CaseMapData {
477 ignoreable: true,
478 kind: CaseMapDataKind::Delta(
479 NonExceptionData {
480 sensitive: true,
481 dot_type: DotType::SoftDotted,
482 },
483 CaseType::Upper,
484 50,
485 ),
486 },
487 CaseMapData {
488 ignoreable: false,
489 kind: CaseMapDataKind::Delta(
490 NonExceptionData {
491 sensitive: true,
492 dot_type: DotType::SoftDotted,
493 },
494 CaseType::Upper,
495 -50,
496 ),
497 },
498 CaseMapData {
499 ignoreable: false,
500 kind: CaseMapDataKind::Uncased(NonExceptionData {
501 sensitive: false,
502 dot_type: DotType::SoftDotted,
503 }),
504 },
505 ];
506
507 for case in TESTCASES {
508 let ule = case.to_unaligned();
509 let roundtrip = CaseMapData::from_unaligned(ule);
510 assert_eq!(*case, roundtrip);
511 let integer = ule.0.as_unsigned_int();
512 let roundtrip2 = CaseMapData::try_from_icu_integer(integer).unwrap();
513 assert_eq!(*case, roundtrip2);
514 }
515 }
516 #[test]
517 fn test_integer_roundtrip() {
518 fn test_single_integer(int: u16) {
520 let cmd = CaseMapData::try_from_icu_integer(int).unwrap();
521 assert_eq!(int, cmd.to_unaligned().0.as_unsigned_int())
522 }
523
524 test_single_integer(84);
525 test_single_integer(2503);
526 }
527}