icu_provider_source/properties/
script.rsuse crate::SourceDataProvider;
use icu::collections::codepointtrie::CodePointTrie;
use icu::properties::props::Script;
use icu::properties::provider::{
ScriptWithExtensionsPropertyV1, ScriptWithExtensionsPropertyV1Marker,
};
use icu::properties::script::ScriptWithExt;
use icu_provider::prelude::*;
use std::collections::HashSet;
use std::convert::TryFrom;
use zerovec::{VarZeroVec, ZeroSlice, ZeroVec};
impl DataProvider<ScriptWithExtensionsPropertyV1Marker> for SourceDataProvider {
fn load(
&self,
req: DataRequest,
) -> Result<DataResponse<ScriptWithExtensionsPropertyV1Marker>, DataError> {
self.check_req::<ScriptWithExtensionsPropertyV1Marker>(req)?;
let scx_data = self
.icuexport()?
.read_and_parse_toml::<super::uprops_serde::script_extensions::Main>(&format!(
"uprops/{}/scx.toml",
self.trie_type(),
))?
.script_extensions
.first()
.ok_or_else(|| DataError::custom("Could not parse Script_Extensions data from TOML"))?;
let cpt_data = &scx_data.code_point_trie;
let scx_array_data = &scx_data.script_code_array;
let trie = CodePointTrie::<ScriptWithExt>::try_from(cpt_data).map_err(|e| {
DataError::custom("Could not parse CodePointTrie TOML").with_display_context(&e)
})?;
let ule_scx_array_data: Vec<ZeroVec<Script>> = scx_array_data
.iter()
.map(|v| v.iter().map(|i| Script(*i)).collect::<ZeroVec<Script>>())
.collect::<Vec<ZeroVec<Script>>>();
let scx_vzv: VarZeroVec<ZeroSlice<Script>> =
VarZeroVec::from(ule_scx_array_data.as_slice());
let data_struct = ScriptWithExtensionsPropertyV1 {
trie,
extensions: scx_vzv,
};
Ok(DataResponse {
metadata: Default::default(),
payload: DataPayload::from_owned(data_struct),
})
}
}
impl crate::IterableDataProviderCached<ScriptWithExtensionsPropertyV1Marker>
for SourceDataProvider
{
fn iter_ids_cached(&self) -> Result<HashSet<DataIdentifierCow<'static>>, DataError> {
Ok(HashSet::from_iter([Default::default()]))
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_script_val_from_script_extensions() {
let provider = SourceDataProvider::new_testing();
let swe =
icu::properties::script::ScriptWithExtensions::try_new_unstable(&provider).unwrap();
let swe = swe.as_borrowed();
assert_eq!(swe.get_script_val('𐓐'), Script::Osage); assert_eq!(swe.get_script_val('🥳'), Script::Common); assert_eq!(swe.get_script_val32(0x200D), Script::Inherited); assert_eq!(swe.get_script_val('௫'), Script::Tamil); assert_eq!(swe.get_script_val32(0x11303), Script::Grantha); assert_eq!(swe.get_script_val32(0x30A0), Script::Common); }
#[test]
fn test_scx_array_from_script_extensions() {
let provider = SourceDataProvider::new_testing();
let swe =
icu::properties::script::ScriptWithExtensions::try_new_unstable(&provider).unwrap();
let swe = swe.as_borrowed();
assert_eq!(
swe.get_script_extensions_val('𐓐') .iter()
.collect::<Vec<_>>(),
[Script::Osage]
);
assert_eq!(
swe.get_script_extensions_val('🥳') .iter()
.collect::<Vec<_>>(),
[Script::Common]
);
assert_eq!(
swe.get_script_extensions_val32(0x200D) .iter()
.collect::<Vec<_>>(),
[Script::Inherited]
);
assert_eq!(
swe.get_script_extensions_val('௫') .iter()
.collect::<Vec<_>>(),
[Script::Tamil, Script::Grantha]
);
assert_eq!(
swe.get_script_extensions_val32(0x11303) .iter()
.collect::<Vec<_>>(),
[Script::Tamil, Script::Grantha]
);
assert_eq!(
swe.get_script_extensions_val32(0x30A0) .iter()
.collect::<Vec<_>>(),
[Script::Hiragana, Script::Katakana]
);
assert_eq!(
swe.get_script_extensions_val32(0x200D) .iter()
.next(),
Some(Script::Inherited)
);
assert!(swe
.get_script_extensions_val32(0x11303) .contains(&Script::Grantha));
assert!(!swe
.get_script_extensions_val32(0x11303) .contains(&Script::Common));
assert_eq!(
swe.get_script_extensions_val32(0x11_0000) .iter()
.collect::<Vec<_>>(),
[Script::Unknown]
);
}
#[test]
fn test_has_script() {
let provider = SourceDataProvider::new_testing();
let swe =
icu::properties::script::ScriptWithExtensions::try_new_unstable(&provider).unwrap();
let swe = swe.as_borrowed();
assert!(swe.has_script('𐓐', Script::Osage));
assert!(!swe.has_script('𐓐', Script::Common));
assert!(!swe.has_script('𐓐', Script::Inherited));
assert!(swe.has_script('🥳', Script::Common));
assert!(!swe.has_script('🥳', Script::Inherited));
assert!(!swe.has_script32(0x200D, Script::Common));
assert!(swe.has_script32(0x200D, Script::Inherited));
assert!(swe.has_script('௫', Script::Tamil));
assert!(swe.has_script('௫', Script::Grantha));
assert!(!swe.has_script('௫', Script::Common));
assert!(!swe.has_script('௫', Script::Inherited));
assert!(swe.has_script32(0x11303, Script::Tamil));
assert!(swe.has_script32(0x11303, Script::Grantha));
assert!(!swe.has_script32(0x11303, Script::Common));
assert!(!swe.has_script32(0x11303, Script::Inherited));
assert!(swe.has_script32(0x30A0, Script::Hiragana));
assert!(swe.has_script32(0x30A0, Script::Katakana));
assert!(!swe.has_script32(0x30A0, Script::Common));
assert!(!swe.has_script32(0x30A0, Script::Inherited));
assert!(!swe.has_script32(0x0964, Script::Common));
assert!(swe.has_script32(0x0964, Script::Devanagari));
assert!(swe.has_script32(0x0964, Script::Bengali));
assert!(!swe.has_script32(0x063F, Script::Common));
assert!(swe.has_script32(0x063F, Script::Arabic)); assert!(!swe.has_script32(0x063F, Script::Syriac));
assert!(!swe.has_script32(0x063F, Script::Thaana));
assert!(!swe.has_script32(0x0640, Script::Common)); assert!(swe.has_script32(0x0640, Script::Arabic));
assert!(swe.has_script32(0x0640, Script::Syriac));
assert!(!swe.has_script32(0x0640, Script::Thaana));
assert!(!swe.has_script32(0x0650, Script::Inherited)); assert!(swe.has_script32(0x0650, Script::Arabic));
assert!(swe.has_script32(0x0650, Script::Syriac));
assert!(!swe.has_script32(0x0650, Script::Thaana));
assert!(!swe.has_script32(0x0660, Script::Common));
assert!(swe.has_script32(0x0660, Script::Arabic)); assert!(!swe.has_script32(0x0660, Script::Syriac));
assert!(swe.has_script32(0x0660, Script::Thaana));
assert!(!swe.has_script32(0xFDF2, Script::Common));
assert!(swe.has_script32(0xFDF2, Script::Arabic)); assert!(!swe.has_script32(0xFDF2, Script::Syriac));
assert!(swe.has_script32(0xFDF2, Script::Thaana));
assert!(!swe.has_script32(0x0640, Script(0xAFFE)));
}
#[test]
fn test_get_script_extensions_set() {
let provider = SourceDataProvider::new_testing();
let swe =
icu::properties::script::ScriptWithExtensions::try_new_unstable(&provider).unwrap();
let swe = swe.as_borrowed();
let grantha = swe.get_script_extensions_set(Script::Grantha);
assert!(!grantha.contains32(0x0BE5)); assert!(grantha.contains32(0x0BE6)); assert!(grantha.contains32(0x0BEB)); assert!(grantha.contains32(0x0BEF)); assert!(grantha.contains32(0x0BF2)); assert!(grantha.contains32(0x0BF3)); assert!(!grantha.contains32(0x0BF4)); assert!(grantha.contains32(0x11300)); assert!(grantha.contains32(0x11301)); assert!(grantha.contains32(0x11302)); assert!(grantha.contains32(0x11303)); assert!(!grantha.contains32(0x11304)); assert!(grantha.contains32(0x11305)); let tamil = swe.get_script_extensions_set(Script::Tamil);
assert!(!tamil.contains32(0x0BE5)); assert!(tamil.contains32(0x0BE6)); assert!(tamil.contains32(0x0BEB)); assert!(tamil.contains32(0x0BEF)); assert!(tamil.contains32(0x0BF2)); assert!(tamil.contains32(0x0BF3)); assert!(tamil.contains32(0x0BF4)); assert!(!tamil.contains32(0x11300)); assert!(tamil.contains32(0x11301)); assert!(!tamil.contains32(0x11302)); assert!(tamil.contains32(0x11303)); assert!(!tamil.contains32(0x11304)); assert!(!tamil.contains32(0x11305)); let hiragana = swe.get_script_extensions_set(Script::Hiragana);
assert!(hiragana.contains32(0x3046)); assert!(hiragana.contains32(0x309F)); assert!(hiragana.contains32(0x30A0)); assert!(!hiragana.contains32(0x30A1)); assert!(hiragana.contains32(0x30FB)); assert!(hiragana.contains32(0x30FC)); assert!(!hiragana.contains32(0x30FD)); let katakana = swe.get_script_extensions_set(Script::Katakana);
assert!(!katakana.contains32(0x3046)); assert!(!katakana.contains32(0x309F)); assert!(katakana.contains32(0x30A0)); assert!(katakana.contains32(0x30A1)); assert!(katakana.contains32(0x30FB)); assert!(katakana.contains32(0x30FC)); assert!(katakana.contains32(0x30FD)); let common = swe.get_script_extensions_set(Script::Common);
assert!(common.contains('🥳'));
assert!(!common.contains32(0x200D));
assert!(!common.contains32(0x30A0));
let inherited = swe.get_script_extensions_set(Script::Inherited);
assert!(!inherited.contains('🥳'));
assert!(inherited.contains32(0x200D));
assert!(!inherited.contains32(0x30A0));
let bangla = swe.get_script_extensions_set(Script::Bengali);
assert!(bangla.contains32(0x09E7)); assert!(!bangla.contains32(0x0963)); assert!(bangla.contains32(0x0964)); assert!(bangla.contains32(0x0965)); assert!(!bangla.contains32(0x0966)); let devanagari = swe.get_script_extensions_set(Script::Devanagari);
assert!(!devanagari.contains32(0x09E7)); assert!(devanagari.contains32(0x0963)); assert!(devanagari.contains32(0x0964)); assert!(devanagari.contains32(0x0965)); assert!(devanagari.contains32(0x0966)); assert!(!common.contains32(0x0964)); assert!(!common.contains32(0x0965)); }
}