icu_provider_blob/
blob_schema.rs
use core::fmt::Write;
use icu_provider::{marker::DataMarkerIdHash, prelude::*};
use serde::Deserialize;
use writeable::Writeable;
use zerotrie::ZeroTrieSimpleAscii;
use zerovec::vecs::{Index16, Index32, VarZeroSlice, VarZeroVecFormat, ZeroSlice};
#[derive(serde::Deserialize, yoke::Yokeable)]
#[yoke(prove_covariance_manually)]
#[cfg_attr(feature = "export", derive(serde::Serialize))]
#[derive(Debug, Clone)]
pub(crate) enum BlobSchema<'data> {
V001(NeverSchema),
V002(NeverSchema),
V002Bigger(NeverSchema),
#[serde(borrow)]
V003(BlobSchemaV1<'data, Index16>),
#[serde(borrow)]
V003Bigger(BlobSchemaV1<'data, Index32>),
}
pub(crate) const REQUEST_SEPARATOR: char = '\x1E';
pub(crate) const CHECKSUM_KEY: &[u8] = b"\0c";
impl<'data> BlobSchema<'data> {
pub fn deserialize_and_check<D: serde::Deserializer<'data>>(
de: D,
) -> Result<BlobSchema<'data>, D::Error> {
let blob = Self::deserialize(de)?;
#[cfg(debug_assertions)]
blob.check_invariants();
Ok(blob)
}
pub fn load(
&self,
marker: DataMarkerInfo,
req: DataRequest,
) -> Result<(&'data [u8], Option<u64>), DataError> {
match self {
BlobSchema::V001(..) | BlobSchema::V002(..) | BlobSchema::V002Bigger(..) => {
unreachable!("Unreachable blob schema")
}
BlobSchema::V003(s) => s.load(marker, req),
BlobSchema::V003Bigger(s) => s.load(marker, req),
}
}
#[cfg(feature = "alloc")]
pub fn iter_ids(
&self,
marker: DataMarkerInfo,
) -> Result<alloc::collections::BTreeSet<DataIdentifierCow>, DataError> {
match self {
BlobSchema::V001(..) | BlobSchema::V002(..) | BlobSchema::V002Bigger(..) => {
unreachable!("Unreachable blob schema")
}
BlobSchema::V003(s) => s.iter_ids(marker),
BlobSchema::V003Bigger(s) => s.iter_ids(marker),
}
}
#[cfg(debug_assertions)]
fn check_invariants(&self) {
match self {
BlobSchema::V001(..) | BlobSchema::V002(..) | BlobSchema::V002Bigger(..) => (),
BlobSchema::V003(s) => s.check_invariants(),
BlobSchema::V003Bigger(s) => s.check_invariants(),
}
}
}
#[cfg_attr(feature = "export", derive(serde::Serialize))]
#[derive(Debug, Clone, yoke::Yokeable)]
pub enum NeverSchema {}
impl<'de> serde::Deserialize<'de> for NeverSchema {
fn deserialize<D>(_: D) -> Result<Self, D::Error>
where
D: serde::Deserializer<'de>,
{
use serde::de::Error;
Err(D::Error::custom("Attempted to read 1.0 blob format from ICU4X 2.0: please run ICU4X 2.0 datagen to generate a new file."))
}
}
#[derive(Clone, Copy, Debug, serde::Deserialize, yoke::Yokeable)]
#[yoke(prove_covariance_manually)]
#[cfg_attr(feature = "export", derive(serde::Serialize))]
#[serde(bound = "")] pub(crate) struct BlobSchemaV1<'data, LocaleVecFormat: VarZeroVecFormat> {
#[serde(borrow)]
pub markers: &'data ZeroSlice<DataMarkerIdHash>,
#[serde(borrow)]
pub locales: &'data VarZeroSlice<[u8], LocaleVecFormat>,
#[serde(borrow)]
pub buffers: &'data VarZeroSlice<[u8], Index32>,
}
impl<LocaleVecFormat: VarZeroVecFormat> Default for BlobSchemaV1<'_, LocaleVecFormat> {
fn default() -> Self {
Self {
markers: ZeroSlice::new_empty(),
locales: VarZeroSlice::new_empty(),
buffers: VarZeroSlice::new_empty(),
}
}
}
impl<'data, LocaleVecFormat: VarZeroVecFormat> BlobSchemaV1<'data, LocaleVecFormat> {
pub fn load(
&self,
marker: DataMarkerInfo,
req: DataRequest,
) -> Result<(&'data [u8], Option<u64>), DataError> {
if marker.is_singleton && !req.id.locale.is_default() {
return Err(DataErrorKind::InvalidRequest.with_req(marker, req));
}
let marker_index = self
.markers
.binary_search(&marker.id.hashed())
.ok()
.ok_or_else(|| DataErrorKind::MarkerNotFound.with_req(marker, req))?;
let zerotrie = self
.locales
.get(marker_index)
.ok_or_else(|| DataError::custom("Invalid blob bytes").with_req(marker, req))?;
let mut cursor = ZeroTrieSimpleAscii::from_store(zerotrie).into_cursor();
let _infallible_ascii = req.id.locale.write_to(&mut cursor);
let blob_index = if !req.id.marker_attributes.is_empty() {
let _infallible_ascii = cursor.write_char(REQUEST_SEPARATOR);
req.id
.marker_attributes
.write_to(&mut cursor)
.map_err(|_| DataErrorKind::IdentifierNotFound.with_req(marker, req))?;
loop {
if let Some(v) = cursor.take_value() {
break Some(v);
}
if !req.metadata.attributes_prefix_match || cursor.probe(0).is_none() {
break None;
}
}
} else {
cursor.take_value()
}
.ok_or_else(|| DataErrorKind::IdentifierNotFound.with_req(marker, req))?;
let buffer = self
.buffers
.get(blob_index)
.ok_or_else(|| DataError::custom("Invalid blob bytes").with_req(marker, req))?;
Ok((
buffer,
marker
.has_checksum
.then(|| self.get_checksum(zerotrie))
.flatten(),
))
}
fn get_checksum(&self, zerotrie: &[u8]) -> Option<u64> {
ZeroTrieSimpleAscii::from_store(zerotrie)
.get(CHECKSUM_KEY)
.and_then(|cs| Some(u64::from_le_bytes(self.buffers.get(cs)?.try_into().ok()?)))
}
#[cfg(feature = "alloc")]
pub fn iter_ids(
&self,
marker: DataMarkerInfo,
) -> Result<alloc::collections::BTreeSet<DataIdentifierCow>, DataError> {
let marker_index = self
.markers
.binary_search(&marker.id.hashed())
.ok()
.ok_or_else(|| DataErrorKind::MarkerNotFound.with_marker(marker))?;
let zerotrie = self
.locales
.get(marker_index)
.ok_or_else(|| DataError::custom("Invalid blob bytes").with_marker(marker))?;
Ok(ZeroTrieSimpleAscii::from_store(zerotrie)
.iter()
.filter_map(|(s, _)| {
#[allow(unused_imports)]
use alloc::borrow::ToOwned;
if let Some((locale, attrs)) = s.split_once(REQUEST_SEPARATOR) {
Some(DataIdentifierCow::from_owned(
DataMarkerAttributes::try_from_str(attrs).ok()?.to_owned(),
locale.parse().ok()?,
))
} else if s.as_bytes() == CHECKSUM_KEY {
None
} else {
Some(DataIdentifierCow::from_locale(s.parse().ok()?))
}
})
.collect())
}
#[cfg(debug_assertions)]
fn check_invariants(&self) {
if self.markers.is_empty() && self.locales.is_empty() && self.buffers.is_empty() {
return;
}
debug_assert_eq!(self.markers.len(), self.locales.len());
let mut seen_min = self.buffers.is_empty();
let mut seen_max = self.buffers.is_empty();
for zerotrie in self.locales.iter() {
for (_locale, idx) in ZeroTrieSimpleAscii::from_store(zerotrie).iter() {
debug_assert!(idx < self.buffers.len());
if idx == 0 {
seen_min = true;
}
if idx + 1 == self.buffers.len() {
seen_max = true;
}
}
}
debug_assert!(seen_min);
debug_assert!(seen_max);
}
}