icu_provider_source/normalizer/
mod.rsuse crate::SourceDataProvider;
use icu::collections::char16trie::Char16Trie;
use icu::collections::codepointtrie::CodePointTrie;
use icu::normalizer::provider::*;
use icu_provider::prelude::*;
use std::collections::HashSet;
use std::convert::TryFrom;
use zerovec::ZeroVec;
mod normalizer_serde;
macro_rules! normalization_provider {
($marker:ident, $serde_struct:ident, $file_name:literal, $conversion:expr, $toml_data:ident) => {
use icu::normalizer::provider::$marker;
impl DataProvider<$marker> for SourceDataProvider {
fn load(&self, req: DataRequest) -> Result<DataResponse<$marker>, DataError> {
self.check_req::<$marker>(req)?;
let $toml_data: &normalizer_serde::$serde_struct =
self.icuexport()?.read_and_parse_toml(&format!(
"norm/{}/{}.toml",
self.trie_type(),
$file_name
))?;
$conversion
}
}
impl crate::IterableDataProviderCached<$marker> for SourceDataProvider {
fn iter_ids_cached(&self) -> Result<HashSet<DataIdentifierCow<'static>>, DataError> {
Ok(HashSet::from_iter([Default::default()]))
}
}
};
}
macro_rules! normalization_data_provider {
($marker:ident, $file_name:literal) => {
normalization_provider!(
$marker,
DecompositionData,
$file_name,
{
let trie = CodePointTrie::<u32>::try_from(&toml_data.trie)
.map_err(|e| DataError::custom("trie conversion").with_display_context(&e))?;
Ok(DataResponse {
metadata: Default::default(),
payload: DataPayload::from_owned(DecompositionDataV2 {
trie,
passthrough_cap: toml_data.cap,
}),
})
},
toml_data );
};
}
macro_rules! normalization_tables_provider {
($marker:ident, $file_name:literal) => {
normalization_provider!(
$marker,
DecompositionTables,
$file_name,
{
let scalars24 = toml_data
.scalars32
.iter()
.map(|&u| {
u.try_into()
.map_err(|_| DataError::custom("scalars24 conversion"))
})
.collect::<Result<Vec<char>, DataError>>()?;
Ok(DataResponse {
metadata: Default::default(),
payload: DataPayload::from_owned(DecompositionTablesV1 {
scalars16: ZeroVec::alloc_from_slice(&toml_data.scalars16),
scalars24: ZeroVec::alloc_from_slice(&scalars24),
}),
})
},
toml_data );
};
}
macro_rules! normalization_canonical_compositions_provider {
($marker:ident, $file_name:literal) => {
normalization_provider!(
$marker,
CanonicalCompositions,
$file_name,
{
Ok(DataResponse {
metadata: Default::default(),
payload: DataPayload::from_owned(CanonicalCompositionsV1 {
canonical_compositions: Char16Trie::new(ZeroVec::alloc_from_slice(
&toml_data.compositions,
)),
}),
})
},
toml_data );
};
}
macro_rules! normalization_non_recursive_decomposition_supplement_provider {
($marker:ident, $file_name:literal) => {
normalization_provider!(
$marker,
NonRecursiveDecompositionSupplement,
$file_name,
{
let trie = CodePointTrie::<u32>::try_from(&toml_data.trie)
.map_err(|e| DataError::custom("trie conversion").with_display_context(&e))?;
let scalars24 = toml_data
.scalars32
.iter()
.map(|&u| {
u.try_into()
.map_err(|_| DataError::custom("scalars24 conversion"))
})
.collect::<Result<Vec<char>, DataError>>()?;
Ok(DataResponse {
metadata: Default::default(),
payload: DataPayload::from_owned(NonRecursiveDecompositionSupplementV1 {
trie,
scalars24: ZeroVec::alloc_from_slice(&scalars24),
}),
})
},
toml_data );
};
}
normalization_data_provider!(CanonicalDecompositionDataV2Marker, "nfd");
normalization_data_provider!(CompatibilityDecompositionDataV2Marker, "nfkd");
normalization_data_provider!(Uts46DecompositionDataV2Marker, "uts46d");
normalization_tables_provider!(CanonicalDecompositionTablesV1Marker, "nfdex");
normalization_tables_provider!(CompatibilityDecompositionTablesV1Marker, "nfkdex");
normalization_canonical_compositions_provider!(CanonicalCompositionsV1Marker, "compositions");
normalization_non_recursive_decomposition_supplement_provider!(
NonRecursiveDecompositionSupplementV1Marker,
"decompositionex"
);