#![allow(clippy::exhaustive_structs, clippy::exhaustive_enums)]
use core::ops::RangeInclusive;
use alloc::borrow::Cow;
use icu_collections::{
codepointinvlist::{CodePointInversionList, CodePointInversionListULE},
codepointinvliststringlist::CodePointInversionListAndStringListULE,
};
use icu_provider::prelude::*;
use vecs::Index32;
use zerovec::*;
#[icu_provider::data_struct(TransliteratorRulesV1Marker = "transliterator/rules@1")]
#[derive(Debug, Clone, PartialEq, Eq)]
#[cfg_attr(feature = "datagen", derive(serde::Serialize, databake::Bake))]
#[cfg_attr(feature = "datagen", databake(path = icu_experimental::transliterate::provider))]
pub struct RuleBasedTransliterator<'a> {
pub visibility: bool,
pub variable_table: VarTable<'a>,
pub filter: CodePointInversionList<'a>,
pub id_group_list: VarZeroVec<'a, VarZeroSlice<SimpleIdULE>>,
pub rule_group_list: VarZeroVec<'a, VarZeroSlice<RuleULE, Index32>, Index32>,
}
#[cfg(feature = "serde")]
impl<'de> serde::Deserialize<'de> for RuleBasedTransliterator<'de> {
fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
where
D: serde::Deserializer<'de>,
{
use serde::de::Error;
#[derive(serde::Deserialize)]
pub struct Raw<'a> {
pub visibility: bool,
#[serde(borrow)]
pub variable_table: VarTable<'a>,
#[serde(borrow)]
pub filter: CodePointInversionList<'a>,
#[serde(borrow)]
pub id_group_list: VarZeroVec<'a, VarZeroSlice<SimpleIdULE>>,
#[serde(borrow)]
pub rule_group_list: VarZeroVec<'a, VarZeroSlice<RuleULE, Index32>, Index32>,
}
let Raw {
visibility,
variable_table,
filter,
id_group_list,
rule_group_list,
} = Raw::deserialize(deserializer)?;
if id_group_list.len() != rule_group_list.len() {
return Err(D::Error::custom(
"invalid data: id_group_list and rule_group_list have different lengths",
));
}
Ok(Self {
visibility,
variable_table,
filter,
id_group_list,
rule_group_list,
})
}
}
impl RuleBasedTransliterator<'_> {
pub fn deps(&self) -> impl Iterator<Item = Cow<str>> {
use zerofrom::ZeroFrom;
self.id_group_list
.iter()
.flat_map(|id_group| id_group.iter().map(|s| SimpleId::zero_from(s).id))
.chain(
self.variable_table
.function_calls
.iter()
.map(|s| FunctionCall::zero_from(s).translit.id),
)
}
}
#[derive(Debug, Clone)]
#[make_varule(SimpleIdULE)]
#[zerovec::skip_derive(Ord)]
#[zerovec::derive(Debug)]
#[cfg_attr(
feature = "serde",
derive(serde::Deserialize),
zerovec::derive(Deserialize)
)]
#[cfg_attr(
feature = "datagen",
derive(serde::Serialize),
zerovec::derive(Serialize)
)]
pub struct SimpleId<'a> {
#[zerovec::varule(CodePointInversionListULE)]
#[cfg_attr(feature = "serde", serde(borrow))]
pub filter: CodePointInversionList<'a>,
#[cfg_attr(feature = "serde", serde(borrow))]
pub id: Cow<'a, str>,
}
#[derive(Debug, Clone)]
#[make_varule(RuleULE)]
#[zerovec::skip_derive(Ord)]
#[zerovec::derive(Debug)]
#[cfg_attr(
feature = "serde",
derive(serde::Deserialize),
zerovec::derive(Deserialize)
)]
#[cfg_attr(
feature = "datagen",
derive(serde::Serialize),
zerovec::derive(Serialize)
)]
pub struct Rule<'a> {
#[cfg_attr(feature = "serde", serde(borrow))]
pub ante: Cow<'a, str>,
#[cfg_attr(feature = "serde", serde(borrow))]
pub key: Cow<'a, str>,
#[cfg_attr(feature = "serde", serde(borrow))]
pub post: Cow<'a, str>,
#[cfg_attr(feature = "serde", serde(borrow))]
pub replacer: Cow<'a, str>,
}
#[derive(Debug, Clone, zerofrom::ZeroFrom, PartialEq, Eq)]
#[cfg_attr(feature = "serde", derive(serde::Deserialize))]
#[cfg_attr(feature = "datagen", derive(serde::Serialize, databake::Bake))]
#[cfg_attr(feature = "datagen", databake(path = icu_experimental::transliterate::provider))]
pub struct VarTable<'a> {
#[cfg_attr(feature = "serde", serde(borrow))]
pub compounds: VarZeroVec<'a, str, Index32>,
#[cfg_attr(feature = "serde", serde(borrow))]
pub quantifiers_opt: VarZeroVec<'a, str, Index32>,
#[cfg_attr(feature = "serde", serde(borrow))]
pub quantifiers_kleene: VarZeroVec<'a, str, Index32>,
#[cfg_attr(feature = "serde", serde(borrow))]
pub quantifiers_kleene_plus: VarZeroVec<'a, str, Index32>,
#[cfg_attr(feature = "serde", serde(borrow))]
pub segments: VarZeroVec<'a, SegmentULE, Index32>,
#[cfg_attr(feature = "serde", serde(borrow))]
pub unicode_sets: VarZeroVec<'a, CodePointInversionListAndStringListULE, Index32>,
#[cfg_attr(feature = "serde", serde(borrow))]
pub function_calls: VarZeroVec<'a, FunctionCallULE, Index32>,
pub max_left_placeholder_count: u16,
pub max_right_placeholder_count: u16,
}
impl VarTable<'_> {
pub const BASE: char = '\u{F0000}';
pub const MAX_DYNAMIC: char = '\u{FFFF0}';
pub const RESERVED_PURE_CURSOR: char = '\u{FFFFB}';
pub const RESERVED_ANCHOR_START: char = '\u{FFFFC}';
pub const RESERVED_ANCHOR_END: char = '\u{FFFFD}';
pub const ENCODE_RANGE: RangeInclusive<char> = Self::BASE..=Self::RESERVED_ANCHOR_END;
pub const NUM_DYNAMIC: usize = Self::MAX_DYNAMIC as usize - Self::BASE as usize + 1;
}
#[derive(Debug, Clone)]
#[make_varule(SegmentULE)]
#[zerovec::skip_derive(Ord)]
#[zerovec::derive(Debug)]
#[cfg_attr(
feature = "serde",
derive(serde::Deserialize),
zerovec::derive(Deserialize)
)]
#[cfg_attr(
feature = "datagen",
derive(serde::Serialize),
zerovec::derive(Serialize)
)]
pub struct Segment<'a> {
pub idx: u16,
#[cfg_attr(feature = "serde", serde(borrow))]
pub content: Cow<'a, str>,
}
#[derive(Debug, Clone)]
#[make_varule(FunctionCallULE)]
#[zerovec::skip_derive(Ord)]
#[zerovec::derive(Debug)]
#[cfg_attr(
feature = "serde",
derive(serde::Deserialize),
zerovec::derive(Deserialize)
)]
#[cfg_attr(
feature = "datagen",
derive(serde::Serialize),
zerovec::derive(Serialize)
)]
pub struct FunctionCall<'a> {
#[zerovec::varule(SimpleIdULE)]
#[cfg_attr(feature = "serde", serde(borrow))]
pub translit: SimpleId<'a>,
#[cfg_attr(feature = "serde", serde(borrow))]
pub arg: Cow<'a, str>,
}