1// This file is part of ICU4X. For terms of use, please see the file
2// called LICENSE at the top level of the ICU4X source tree
3// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
45//! 🚧 \[Unstable\] Data provider struct definitions for this ICU4X component.
6//!
7//! <div class="stab unstable">
8//! 🚧 This code is considered unstable; it may change at any time, in breaking or non-breaking ways,
9//! including in SemVer minor releases. While the serde representation of data structs is guaranteed
10//! to be stable, their Rust representation might not be. Use with caution.
11//! </div>
12//!
13//! Read more about data providers: [`icu_provider`]
1415// Provider structs must be stable
16#![allow(clippy::exhaustive_structs, clippy::exhaustive_enums)]
1718use icu_provider::prelude::*;
1920use crate::provider::data::CaseMapData;
21use crate::provider::exceptions::CaseMapExceptions;
22use icu_collections::codepointtrie::CodePointTrie;
23#[cfg(feature = "datagen")]
24use icu_collections::codepointtrie::CodePointTrieHeader;
2526pub mod data;
27pub mod exception_helpers;
28pub mod exceptions;
29#[cfg(feature = "datagen")]
30mod exceptions_builder;
31mod unfold;
3233#[cfg(feature = "compiled_data")]
34#[derive(Debug)]
35/// Baked data
36///
37/// <div class="stab unstable">
38/// 🚧 This code is considered unstable; it may change at any time, in breaking or non-breaking ways,
39/// including in SemVer minor releases. In particular, the `DataProvider` implementations are only
40/// guaranteed to match with this version's `*_unstable` providers. Use with caution.
41/// </div>
42pub struct Baked;
4344#[cfg(feature = "compiled_data")]
45#[allow(unused_imports)]
46const _: () = {
47use icu_casemap_data::*;
48pub mod icu {
49pub use crate as casemap;
50pub use icu_collections as collections;
51 }
52make_provider!(Baked);
53impl_case_map_v1!(Baked);
54impl_case_map_unfold_v1!(Baked);
55};
5657icu_provider::data_marker!(
58/// Marker for casemapping data.
59CaseMapV1,
60"case/map/v1",
61 CaseMap<'static>,
62 is_singleton = true
63);
6465icu_provider::data_marker!(
66/// Reverse case mapping data.
67CaseMapUnfoldV1,
68"case/map/unfold/v1",
69 CaseMapUnfold<'static>,
70 is_singleton = true
71);
7273#[cfg(feature = "datagen")]
74/// The latest minimum set of markers required by this component.
75pub const MARKERS: &[DataMarkerInfo] = &[CaseMapUnfoldV1::INFO, CaseMapV1::INFO];
7677pub use self::unfold::CaseMapUnfold;
7879/// This type contains all of the casemapping data
80///
81/// The methods in the provider module are primarily about accessing its data,
82/// however the full algorithms are also implemented as methods on this type in
83/// the `internals` module of this crate.
84///
85/// <div class="stab unstable">
86/// 🚧 This code is considered unstable; it may change at any time, in breaking or non-breaking ways,
87/// including in SemVer minor releases. While the serde representation of data structs is guaranteed
88/// to be stable, their Rust representation might not be. Use with caution.
89/// </div>
90#[derive(Debug, PartialEq, Clone, yoke::Yokeable, zerofrom::ZeroFrom)]
91#[cfg_attr(feature = "datagen", derive(serde::Serialize, databake::Bake))]
92#[cfg_attr(feature = "datagen", databake(path = icu_casemap::provider))]
93#[yoke(prove_covariance_manually)]
94/// CaseMapper provides low-level access to the data necessary to
95/// convert characters and strings to upper, lower, or title case.
96pub struct CaseMap<'data> {
97/// Case mapping data
98pub trie: CodePointTrie<'data, CaseMapData>,
99/// Exceptions to the case mapping data
100pub exceptions: CaseMapExceptions<'data>,
101}
102103icu_provider::data_struct!(
104 CaseMap<'_>,
105#[cfg(feature = "datagen")]
106);
107108#[cfg(feature = "serde")]
109impl<'de> serde::Deserialize<'de> for CaseMap<'de> {
110fn deserialize<D: serde::Deserializer<'de>>(deserializer: D) -> Result<Self, D::Error> {
111#[derive(serde::Deserialize)]
112pub struct Raw<'data> {
113#[serde(borrow)]
114pub trie: CodePointTrie<'data, CaseMapData>,
115#[serde(borrow)]
116pub exceptions: CaseMapExceptions<'data>,
117 }
118119let Raw { trie, exceptions } = Raw::deserialize(deserializer)?;
120let result = Self { trie, exceptions };
121debug_assert!(result.validate().is_ok());
122Ok(result)
123 }
124}
125126impl CaseMap<'_> {
127/// Creates a new CaseMap using data exported by the
128// `icuexportdata` tool in ICU4C. Validates that the data is
129 // consistent.
130#[cfg(feature = "datagen")]
131pub fn try_from_icu(
132 trie_header: CodePointTrieHeader,
133 trie_index: &[u16],
134 trie_data: &[u16],
135 exceptions: &[u16],
136 ) -> Result<Self, DataError> {
137use self::exceptions_builder::CaseMapExceptionsBuilder;
138use zerovec::ZeroVec;
139let exceptions_builder = CaseMapExceptionsBuilder::new(exceptions);
140let (exceptions, idx_map) = exceptions_builder.build()?;
141142let trie_index = ZeroVec::alloc_from_slice(trie_index);
143144#[allow(clippy::unwrap_used)] // datagen only
145let trie_data = trie_data
146 .iter()
147 .map(|&i| {
148 CaseMapData::try_from_icu_integer(i)
149 .unwrap()
150 .with_updated_exception(&idx_map)
151 })
152 .collect::<ZeroVec<_>>();
153154let trie = CodePointTrie::try_new(trie_header, trie_index, trie_data)
155 .map_err(|_| DataError::custom("Casemapping data does not form valid trie"))?;
156157let result = Self { trie, exceptions };
158 result.validate().map_err(DataError::custom)?;
159Ok(result)
160 }
161162/// Given an existing CaseMapper, validates that the data is
163 /// consistent. A CaseMapper created by the ICU transformer has
164 /// already been validated. Calling this function is only
165 /// necessary if you are concerned about data corruption after
166 /// deserializing.
167#[cfg(any(feature = "serde", feature = "datagen"))]
168 #[allow(unused)] // is only used in debug mode for serde
169pub(crate) fn validate(&self) -> Result<(), &'static str> {
170// First, validate that exception data is well-formed.
171let valid_exception_indices = self.exceptions.validate()?;
172173let validate_delta = |c: char, delta: i32| -> Result<(), &'static str> {
174let new_c =
175 u32::try_from(c as i32 + delta).map_err(|_| "Delta larger than character")?;
176 char::from_u32(new_c).ok_or("Invalid delta")?;
177Ok(())
178 };
179180for i in 0..char::MAX as u32 {
181if let Some(c) = char::from_u32(i) {
182let data = self.lookup_data(c);
183if data.has_exception() {
184let idx = data.exception_index();
185let exception = self.exceptions.get(idx);
186// Verify that the exception index points to a valid exception header.
187if !valid_exception_indices.contains(&idx) {
188return Err("Invalid exception index in trie data");
189 }
190 exception.validate()?;
191 } else {
192 validate_delta(c, data.delta() as i32)?;
193 }
194 }
195 }
196Ok(())
197 }
198199pub(crate) fn lookup_data(&self, c: char) -> CaseMapData {
200self.trie.get32(c as u32)
201 }
202}