icu_provider_source/locale/
aliases.rsuse crate::cldr_serde;
use crate::SourceDataProvider;
use icu::locale::provider::*;
use icu::locale::{
subtags::{self, language},
LanguageIdentifier,
};
use icu_provider::prelude::*;
use std::collections::{BTreeMap, HashSet};
use tinystr::TinyAsciiStr;
use zerovec::ZeroSlice;
impl DataProvider<AliasesV2Marker> for SourceDataProvider {
fn load(&self, req: DataRequest) -> Result<DataResponse<AliasesV2Marker>, DataError> {
self.check_req::<AliasesV2Marker>(req)?;
let data: &cldr_serde::aliases::Resource = self
.cldr()?
.core()
.read_and_parse("supplemental/aliases.json")?;
Ok(DataResponse {
metadata: Default::default(),
payload: DataPayload::from_owned(AliasesV2::from(data)),
})
}
}
impl crate::IterableDataProviderCached<AliasesV2Marker> for SourceDataProvider {
fn iter_ids_cached(&self) -> Result<HashSet<DataIdentifierCow<'static>>, DataError> {
Ok(HashSet::from_iter([Default::default()]))
}
}
fn appendix_c_cmp(langid: &LanguageIdentifier) -> impl Ord {
let mut union_size = langid.variants.len() as i8;
if !langid.language.is_default() {
union_size += 1;
}
if langid.script.is_some() {
union_size += 1;
}
if langid.region.is_some() {
union_size += 1;
}
(
-union_size,
langid.language,
langid.script,
langid.region,
langid.variants.clone(),
)
}
impl From<&cldr_serde::aliases::Resource> for AliasesV2<'_> {
fn from(other: &cldr_serde::aliases::Resource) -> Self {
let mut language_variants = Vec::new();
let mut sgn_region = BTreeMap::new();
let mut language_len2 = BTreeMap::new();
let mut language_len3 = BTreeMap::new();
let mut language = Vec::new();
let mut script = BTreeMap::new();
let mut region_alpha = BTreeMap::new();
let mut region_num = BTreeMap::new();
let mut complex_region = BTreeMap::new();
let mut variant = BTreeMap::new();
let mut subdivision = BTreeMap::new();
for (from, to) in other.supplemental.metadata.alias.language_aliases.iter() {
if let Ok(langid) = from.parse::<LanguageIdentifier>() {
if let Ok(replacement) = to.replacement.parse::<LanguageIdentifier>() {
match (
langid.language,
langid.script,
langid.region,
!langid.variants.is_empty(),
) {
(_, None, None, true) => language_variants.push((langid, replacement)),
(lang, None, None, false) if !lang.is_default() => {
let lang = langid.language.to_tinystr();
if lang.len() == 2 {
language_len2.insert(lang.resize(), to.replacement.as_str());
} else {
language_len3.insert(lang, to.replacement.as_str());
}
}
(language, None, Some(region), false)
if language == language!("sgn")
&& !replacement.language.is_default()
&& replacement.script.is_none()
&& replacement.region.is_none()
&& replacement.variants.is_empty() =>
{
sgn_region.insert(region.to_tinystr(), replacement.language);
}
_ => language.push((langid, replacement)),
}
}
}
}
if !language.is_empty() {
panic!("Aliases contain a non-special-cased rule. Remove this check if that is intended behaviour.")
}
for (from, to) in other.supplemental.metadata.alias.script_aliases.iter() {
if from.parse::<subtags::Script>().is_err() {
continue;
}
if let Ok(to) = to.replacement.parse::<subtags::Script>() {
script.insert(from, to);
}
}
for (from, to) in other.supplemental.metadata.alias.region_aliases.iter() {
if from.parse::<subtags::Region>().is_err() {
continue;
}
if let Ok(replacement) = to.replacement.parse::<subtags::Region>() {
if from.is_ascii_alphabetic() {
region_alpha.insert(from.resize(), replacement);
} else {
region_num.insert(from, replacement);
}
} else {
complex_region.insert(
from,
to.replacement
.split(' ')
.filter_map(|r| r.parse::<subtags::Region>().ok())
.collect::<Box<[_]>>(),
);
}
}
for (from, to) in other.supplemental.metadata.alias.variant_aliases.iter() {
if let Ok(to) = to.replacement.parse::<subtags::Variant>() {
variant.insert(from, to);
}
}
for (from, to) in other.supplemental.metadata.alias.subdivision_aliases.iter() {
if let Some(replacement) = to.replacement.split(' ').find_map(|r| {
if r.len() == 2 {
let replacement = r.to_string().to_ascii_lowercase() + "zzzz";
TinyAsciiStr::<7>::try_from_str(&replacement).ok()
} else {
TinyAsciiStr::<7>::try_from_str(r).ok()
}
}) {
subdivision.insert(from, replacement);
}
}
language_variants.sort_unstable_by_key(|(langid, _)| appendix_c_cmp(langid));
language.sort_unstable_by_key(|(langid, _)| appendix_c_cmp(langid));
let language_variants = language_variants
.iter()
.map(|(from, to)| {
LanguageStrStrPair(
from.language,
from.variants.to_string().into(),
to.to_string().into(),
)
})
.collect::<Vec<_>>();
let language = language
.iter()
.map(|(from, to)| StrStrPair(from.to_string().into(), to.to_string().into()))
.collect::<Vec<_>>();
Self {
language_variants: language_variants.as_slice().into(),
sgn_region: sgn_region
.into_iter()
.map(|(k, v)| (k.to_unvalidated(), v))
.collect(),
language_len2: language_len2
.into_iter()
.map(|(k, v)| (k.to_unvalidated(), v))
.collect(),
language_len3: language_len3
.into_iter()
.map(|(k, v)| (k.to_unvalidated(), v))
.collect(),
language: language.as_slice().into(),
script: script
.into_iter()
.map(|(k, v)| (k.to_unvalidated(), v))
.collect(),
region_alpha: region_alpha
.into_iter()
.map(|(k, v)| (k.to_unvalidated(), v))
.collect(),
region_num: region_num
.into_iter()
.map(|(k, v)| (k.to_unvalidated(), v))
.collect(),
complex_region: complex_region
.into_iter()
.map(|(k, v)| (k.to_unvalidated(), ZeroSlice::from_boxed_slice(v)))
.collect(),
variant: variant
.into_iter()
.map(|(k, v)| (k.to_unvalidated(), v))
.collect(),
subdivision: subdivision
.into_iter()
.map(|(k, v)| (k.to_unvalidated(), v))
.collect(),
}
}
}
#[test]
fn test_appendix_c_cmp() {
let en = icu::locale::langid!("en-GB");
let ca = icu::locale::langid!("ca");
let und = "und-hepburn-heploc".parse::<LanguageIdentifier>().unwrap();
let fr = icu::locale::langid!("fr-CA");
let mut rules = vec![&en, &ca, &und, &fr];
rules.sort_unstable_by_key(|&l| appendix_c_cmp(l));
assert_eq!(rules, &[&en, &fr, &und, &ca]);
}
#[test]
fn test_basic() {
use icu::locale::subtags::{language, region, script};
let provider = SourceDataProvider::new_testing();
let data: DataResponse<AliasesV2Marker> = provider.load(Default::default()).unwrap();
assert!(data.payload.get().language.is_empty());
assert!(!data.payload.get().language_variants.is_empty());
assert!(!data.payload.get().sgn_region.is_empty());
assert!(!data.payload.get().language_len2.is_empty());
assert!(!data.payload.get().language_len3.is_empty());
assert!(!data.payload.get().script.is_empty());
assert!(!data.payload.get().region_alpha.is_empty());
assert!(!data.payload.get().region_num.is_empty());
assert!(!data.payload.get().complex_region.is_empty());
assert!(!data.payload.get().variant.is_empty());
assert!(!data.payload.get().subdivision.is_empty());
assert_eq!(
data.payload
.get()
.language_len2
.get(&language!("iw").to_tinystr().resize().to_unvalidated())
.unwrap(),
"he"
);
assert!(data
.payload
.get()
.language_len3
.get(&language!("iw").to_tinystr().to_unvalidated())
.is_none());
assert_eq!(
data.payload.get().script.iter().next().unwrap(),
(
&script!("Qaai").to_tinystr().to_unvalidated(),
&script!("Zinh")
)
);
assert_eq!(
data.payload
.get()
.region_num
.get(®ion!("768").to_tinystr().to_unvalidated())
.unwrap(),
®ion!("TG")
);
}