icu_provider_source/time_zones/
names.rsuse crate::SourceDataProvider;
use icu::timezone::provider::names::*;
use icu::timezone::TimeZoneBcp47Id;
use icu_provider::prelude::*;
use std::collections::BTreeMap;
use std::collections::BTreeSet;
use std::collections::HashSet;
use std::hash::Hasher;
use zerotrie::ZeroAsciiIgnoreCaseTrie;
use zerovec::{ZeroSlice, ZeroVec};
impl DataProvider<IanaToBcp47MapV3Marker> for SourceDataProvider {
fn load(&self, _: DataRequest) -> Result<DataResponse<IanaToBcp47MapV3Marker>, DataError> {
let iana2bcp = &self.compute_bcp47_tzids_btreemap()?;
let bcp_set: BTreeSet<TimeZoneBcp47Id> = iana2bcp.values().copied().collect();
let bcp47_ids: ZeroVec<TimeZoneBcp47Id> = bcp_set.into_iter().collect();
let bcp47_ids_checksum = compute_bcp47_ids_hash(&bcp47_ids);
let bcp2iana = self.compute_canonical_tzids_btreemap()?;
#[allow(clippy::unwrap_used)] let map: BTreeMap<Vec<u8>, usize> = iana2bcp
.iter()
.map(|(iana, bcp)| {
let is_canonical = bcp2iana.get(bcp) == Some(iana);
let index = bcp47_ids.binary_search(bcp).unwrap();
(
if iana.contains('/') {
iana.to_owned()
} else {
format!(
"{}{iana}",
char::from_u32(
icu::timezone::provider::names::NON_REGION_CITY_PREFIX as u32
)
.unwrap()
)
}
.into_bytes(),
(index << 1) | (is_canonical as usize),
)
})
.collect();
let data_struct = IanaToBcp47MapV3 {
map: ZeroAsciiIgnoreCaseTrie::try_from(&map)
.map_err(|e| {
DataError::custom("Could not create ZeroTrie from timezone.json data")
.with_display_context(&e)
})?
.convert_store(),
bcp47_ids,
bcp47_ids_checksum,
};
Ok(DataResponse {
metadata: Default::default(),
payload: DataPayload::from_owned(data_struct),
})
}
}
impl crate::IterableDataProviderCached<IanaToBcp47MapV3Marker> for SourceDataProvider {
fn iter_ids_cached(&self) -> Result<HashSet<DataIdentifierCow<'static>>, DataError> {
Ok(HashSet::from_iter([Default::default()]))
}
}
impl DataProvider<Bcp47ToIanaMapV1Marker> for SourceDataProvider {
fn load(&self, _: DataRequest) -> Result<DataResponse<Bcp47ToIanaMapV1Marker>, DataError> {
let bcp2iana = &self.compute_canonical_tzids_btreemap()?;
let bcp47_ids: ZeroVec<TimeZoneBcp47Id> = bcp2iana.keys().copied().collect();
let bcp47_ids_checksum = compute_bcp47_ids_hash(&bcp47_ids);
let iana_vec: Vec<&String> = bcp2iana.values().collect();
let canonical_iana_ids = iana_vec.as_slice().into();
let data_struct = Bcp47ToIanaMapV1 {
bcp47_ids_checksum,
canonical_iana_ids,
};
Ok(DataResponse {
metadata: Default::default(),
payload: DataPayload::from_owned(data_struct),
})
}
}
impl crate::IterableDataProviderCached<Bcp47ToIanaMapV1Marker> for SourceDataProvider {
fn iter_ids_cached(&self) -> Result<HashSet<DataIdentifierCow<'static>>, DataError> {
Ok(HashSet::from_iter([Default::default()]))
}
}
fn create_hasher() -> impl std::hash::Hasher {
twox_hash::XxHash64::with_seed(0)
}
fn compute_bcp47_ids_hash(bcp47_ids: &ZeroSlice<TimeZoneBcp47Id>) -> u64 {
let mut hasher = create_hasher();
hasher.write(bcp47_ids.as_bytes());
hasher.finish()
}
#[test]
fn test_compute_bcp47_ids_hash() {
let bcp47_ids: ZeroVec<TimeZoneBcp47Id> = [
TimeZoneBcp47Id(tinystr::tinystr!(8, "aedxb")),
TimeZoneBcp47Id(tinystr::tinystr!(8, "brfor")),
TimeZoneBcp47Id(tinystr::tinystr!(8, "usinvev")),
]
.into_iter()
.collect();
let checksum1 = compute_bcp47_ids_hash(&bcp47_ids);
assert_eq!(checksum1, 0x66FA043B31200DCB); let mut hasher = create_hasher();
for bcp47 in bcp47_ids.iter() {
hasher.write(bcp47.0.all_bytes());
}
let checksum2 = hasher.finish();
assert_eq!(checksum1, checksum2);
let bcp47_ids_rev: ZeroVec<TimeZoneBcp47Id> = [
TimeZoneBcp47Id(tinystr::tinystr!(8, "usinvev")),
TimeZoneBcp47Id(tinystr::tinystr!(8, "aedxb")),
TimeZoneBcp47Id(tinystr::tinystr!(8, "brfor")),
]
.into_iter()
.collect();
let checksum3 = compute_bcp47_ids_hash(&bcp47_ids_rev);
assert_ne!(checksum1, checksum3);
let bcp47_ids_roll: ZeroVec<TimeZoneBcp47Id> = [
TimeZoneBcp47Id(tinystr::tinystr!(8, "aedx")),
TimeZoneBcp47Id(tinystr::tinystr!(8, "bbrfor")),
TimeZoneBcp47Id(tinystr::tinystr!(8, "usinvev")),
]
.into_iter()
.collect();
let checksum4 = compute_bcp47_ids_hash(&bcp47_ids_roll);
assert_ne!(checksum1, checksum4);
let bcp47_ids_empty_end: ZeroVec<TimeZoneBcp47Id> = [
TimeZoneBcp47Id(tinystr::tinystr!(8, "aedxb")),
TimeZoneBcp47Id(tinystr::tinystr!(8, "brfor")),
TimeZoneBcp47Id(tinystr::tinystr!(8, "usinvev")),
TimeZoneBcp47Id(tinystr::tinystr!(8, "")),
]
.into_iter()
.collect();
let checksum5 = compute_bcp47_ids_hash(&bcp47_ids_empty_end);
assert_ne!(checksum1, checksum5);
let bcp47_ids_empty_middle: ZeroVec<TimeZoneBcp47Id> = [
TimeZoneBcp47Id(tinystr::tinystr!(8, "aedxb")),
TimeZoneBcp47Id(tinystr::tinystr!(8, "")),
TimeZoneBcp47Id(tinystr::tinystr!(8, "brfor")),
TimeZoneBcp47Id(tinystr::tinystr!(8, "usinvev")),
]
.into_iter()
.collect();
let checksum6 = compute_bcp47_ids_hash(&bcp47_ids_empty_middle);
assert_ne!(checksum1, checksum6);
assert_ne!(checksum5, checksum6);
assert_ne!(checksum2, checksum3);
assert_ne!(checksum2, checksum4);
assert_ne!(checksum2, checksum5);
assert_ne!(checksum2, checksum6);
assert_ne!(checksum3, checksum4);
assert_ne!(checksum3, checksum5);
assert_ne!(checksum3, checksum6);
assert_ne!(checksum4, checksum5);
assert_ne!(checksum4, checksum6);
}
#[test]
fn test_normalize_canonicalize_iana_coverage() {
let provider = crate::SourceDataProvider::new_testing();
let iana2bcp = &provider.compute_bcp47_tzids_btreemap().unwrap();
let mapper = icu::timezone::TimeZoneIdMapper::try_new_unstable(&provider).unwrap();
let mapper = mapper.as_borrowed();
for iana_id in iana2bcp.keys() {
let normalized = mapper.normalize_iana(iana_id).unwrap().0;
assert_eq!(&normalized, iana_id);
}
let bcp2iana = &provider.compute_canonical_tzids_btreemap().unwrap();
for (iana_id, bcp47_id) in iana2bcp.iter() {
let canonicalized = mapper.canonicalize_iana(iana_id).unwrap().0;
assert_eq!(&canonicalized, bcp2iana.get(bcp47_id).unwrap());
}
}