1// This file is part of ICU4X. For terms of use, please see the file
2// called LICENSE at the top level of the ICU4X source tree
3// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
45//! This module contains the logic for aggregating rules into groups.
6//! Due to incompatible orders (see comment on `ReverseRuleGroupAggregator`), we need separate
7//! aggregators for forward and reverse directions.
8//!
9//! These aggregators both accept source-order (!) `parse::Rule`s and aggregate them into the rule
10//! groups that the data struct [`RuleBasedTransliterator`](icu::experimental::transliterate::provider::RuleBasedTransliterator)
11//! semantically expects. A later step converts these into the actual zero-copy format.
12//! They also transform the bidirectional `parse::Rule`s into a unidirectional version.
1314// note: the aggregators currently work in a one-`parse::Rule`-at-a-time fashion, but really, they
15// could also receive the full `&[parse::Rule]` slice. with some careful index handling, some
16// allocations could be avoided.
1718use super::*;
19use alloc::borrow::Cow;
20use alloc::collections::VecDeque;
21use alloc::vec;
2223// parse::Rule::Conversion but unidirectional
24#[derive(Debug, Clone)]
25pub(crate) struct UniConversionRule<'p> {
26pub(crate) ante: &'p [parse::Element],
27pub(crate) key: &'p [parse::Element],
28pub(crate) post: &'p [parse::Element],
29pub(crate) replacement: &'p [parse::Element],
30}
3132// transform + conversion rule groups for a single direction
33pub(crate) type RuleGroups<'p> = Vec<(Vec<Cow<'p, parse::SingleId>>, Vec<UniConversionRule<'p>>)>;
3435// an intermediate enum for use in the aggregators
36enum UniRule<'p> {
37 Conversion(UniConversionRule<'p>),
38 Transform(Cow<'p, parse::SingleId>),
39}
4041#[derive(Debug, Clone)]
42pub(crate) struct ForwardRuleGroupAggregator<'p> {
43 current: ForwardRuleGroup<'p>,
44// the forward aggregator can use the final type directly, as source-order is equivalent to
45 // forward-order.
46groups: RuleGroups<'p>,
47// the transform_group of a group pair appears first
48preceding_transform_group: Option<Vec<Cow<'p, parse::SingleId>>>,
49}
5051impl<'p> ForwardRuleGroupAggregator<'p> {
52pub(crate) fn new() -> Self {
53Self {
54// this is a somewhat important first group.
55 // we want &[(transform_group), (conversion_group)] in the end, and because we iterate
56 // in source-order, the first element of that is a transform_group.
57current: ForwardRuleGroup::Transform(Vec::new()),
58 groups: Vec::new(),
59 preceding_transform_group: None,
60 }
61 }
6263pub(crate) fn push(&mut self, rule: &'p parse::Rule) {
64match rule {
65 parse::Rule::Conversion(source_half, dir, target_half) => {
66if !dir.permits(Direction::Forward) {
67return;
68 }
6970let ante = &source_half.ante;
71let key = &source_half.key;
72let post = &source_half.post;
73let replacement = &target_half.key;
7475let rule = UniConversionRule {
76 ante,
77 key,
78 post,
79 replacement,
80 };
8182let finished_group = self.current.push(UniRule::Conversion(rule));
83if let Some(finished_group) = finished_group {
84self.push_rule_group(finished_group);
85 }
86 }
87 parse::Rule::Transform(fwd, _) => {
88let finished_group = self.current.push(UniRule::Transform(Cow::Borrowed(fwd)));
89if let Some(finished_group) = finished_group {
90self.push_rule_group(finished_group);
91 }
92 }
93 parse::Rule::VariableDefinition(..) => {
94// variable definitions are handled in a previous step
95}
96 parse::Rule::GlobalFilter(..) | parse::Rule::GlobalInverseFilter(..) => {
97// global filters are handled in a previous step
98}
99 }
100 }
101102fn push_rule_group(&mut self, group: ForwardRuleGroup<'p>) {
103match group {
104 ForwardRuleGroup::Transform(transform_group) => {
105// because ForwardRuleGroup returns a different kind of group every time,
106 // the previous group must have been a conversion group which pushed the
107 // finished group pair into self.groups.
108debug_assert!(self.preceding_transform_group.is_none());
109self.preceding_transform_group = Some(transform_group);
110 }
111 ForwardRuleGroup::Conversion(conversion_group) => {
112// unwrap is necessary if the first source-order rule group is a conversion group
113let associated_transform_group =
114self.preceding_transform_group.take().unwrap_or_default();
115self.groups
116 .push((associated_transform_group, conversion_group));
117 }
118 }
119 }
120121pub(crate) fn finalize(mut self) -> RuleGroups<'p> {
122// push the current group
123 // note: refactoring could get rid of clone
124self.push_rule_group(self.current.clone());
125// push any remaining group pairs
126if let Some(transform_group) = self.preceding_transform_group.take() {
127self.groups.push((transform_group, Vec::new()));
128 }
129130self.groups
131 }
132}
133134// Represents a non-empty rule group for the forward direction.
135#[derive(Debug, Clone)]
136enum ForwardRuleGroup<'p> {
137 Conversion(Vec<UniConversionRule<'p>>),
138 Transform(Vec<Cow<'p, parse::SingleId>>),
139}
140141impl<'p> ForwardRuleGroup<'p> {
142fn new_conversion(rule: UniConversionRule<'p>) -> Self {
143Self::Conversion(vec![rule])
144 }
145146fn new_transform(rule: Cow<'p, parse::SingleId>) -> Self {
147Self::Transform(vec![rule])
148 }
149150// if the group is full return self, and push the rule into a new group
151fn push(&mut self, rule: UniRule<'p>) -> Option<Self> {
152// necessary reborrow for mem::replace
153match (&mut *self, rule) {
154 (Self::Conversion(group), UniRule::Conversion(rule)) => {
155 group.push(rule);
156None
157}
158 (Self::Transform(group), UniRule::Transform(rule)) => {
159 group.push(rule);
160None
161}
162 (Self::Conversion(_), UniRule::Transform(new_rule)) => {
163Some(core::mem::replace(self, Self::new_transform(new_rule)))
164 }
165 (Self::Transform(_), UniRule::Conversion(new_rule)) => {
166Some(core::mem::replace(self, Self::new_conversion(new_rule)))
167 }
168 }
169 }
170}
171172// Rules will be pushed in source-order (i.e., forward order), which means we have to be careful
173// in which order we aggregate them. Example: (T = transform rule, C = conversion rule)
174// T1 T2 C1 C2 T3 C3 C4 T4 T5
175// should be aggregated as
176// (T5, T4), (C3, C4), (T3), (C1, C2), (T2, T1) (assuming all rules apply to the reverse direction)
177// note in particular the discrepancy between the order of contiguous T's and contiguous C's:
178// contiguous C's keep the source order, but contiguous T's are reversed. Also the overall order
179// is reversed, of course.
180//
181// We do this by using VecDeque, push_front, and make_contiguous in the end.
182#[derive(Debug, Clone)]
183pub(crate) struct ReverseRuleGroupAggregator<'p> {
184 current: ReverseRuleGroup<'p>,
185// VecDeque because we encounter groups in source-order, but we want to aggregate them in
186 // reverse-order.
187groups: VecDeque<(Vec<Cow<'p, parse::SingleId>>, Vec<UniConversionRule<'p>>)>,
188// the conversion_group of a group pair appears first due to the reverse order
189preceding_conversion_group: Option<Vec<UniConversionRule<'p>>>,
190}
191192impl<'p> ReverseRuleGroupAggregator<'p> {
193pub(crate) fn new() -> Self {
194Self {
195// this is a somewhat important first group.
196 // we want &[(transform_group), (conversion_group)] in the end, and because we iterate
197 // in opposite order, the last element of that slice is a conversion_group.
198current: ReverseRuleGroup::Conversion(Vec::new()),
199 groups: VecDeque::new(),
200 preceding_conversion_group: None,
201 }
202 }
203204pub(crate) fn push(&mut self, rule: &'p parse::Rule) {
205match rule {
206 parse::Rule::Conversion(target_half, dir, source_half) => {
207if !dir.permits(Direction::Reverse) {
208return;
209 }
210211let ante = &source_half.ante;
212let key = &source_half.key;
213let post = &source_half.post;
214let replacement = &target_half.key;
215216let rule = UniConversionRule {
217 ante,
218 key,
219 post,
220 replacement,
221 };
222223let finished_group = self.current.push(UniRule::Conversion(rule));
224if let Some(finished_group) = finished_group {
225self.push_rule_group(finished_group);
226 }
227 }
228 parse::Rule::Transform(fwd, rev) => {
229let rev = rev.clone().unwrap_or_else(|| fwd.clone().reverse());
230231let finished_group = self.current.push(UniRule::Transform(Cow::Owned(rev)));
232if let Some(finished_group) = finished_group {
233self.push_rule_group(finished_group);
234 }
235 }
236 parse::Rule::VariableDefinition(..) => {
237// variable definitions are handled in a previous step
238}
239 parse::Rule::GlobalFilter(..) | parse::Rule::GlobalInverseFilter(..) => {
240// global filters are handled in a previous step
241}
242 }
243 }
244245fn push_rule_group(&mut self, group: ReverseRuleGroup<'p>) {
246match group {
247 ReverseRuleGroup::Conversion(conv_group) => {
248// because ReverseRuleGroup returns a different kind of group every time,
249 // the previous group must have been a transform group which pushed the
250 // finished group pair into self.groups.
251debug_assert!(self.preceding_conversion_group.is_none());
252self.preceding_conversion_group = Some(conv_group);
253 }
254 ReverseRuleGroup::Transform(transform_group) => {
255// unwrap is necessary if the first source-order rule group is a transform group
256let associated_conv_group =
257self.preceding_conversion_group.take().unwrap_or_default();
258let vec_transform_group = transform_group.into(); // non-allocating conversion
259self.groups
260 .push_front((vec_transform_group, associated_conv_group));
261 }
262 }
263 }
264265pub(crate) fn finalize(mut self) -> RuleGroups<'p> {
266// push the current group
267 // note: refactoring could get rid of clone
268self.push_rule_group(self.current.clone());
269// push any remaining group pairs
270if let Some(conv_group) = self.preceding_conversion_group.take() {
271// a trailing conversion group in source order is the same as having a conversion
272 // group as the first in-order group. we can just prepend an empty transform group.
273self.groups.push_front((Vec::new(), conv_group));
274 }
275276self.groups.into() // non-allocating conversion
277}
278}
279280// Represents a non-empty rule group for the reverse direction.
281#[derive(Debug, Clone)]
282enum ReverseRuleGroup<'p> {
283// because contiguous C's are aggregated in source-order, we can just use a Vec
284Conversion(Vec<UniConversionRule<'p>>),
285// but contiguous T's are aggregated in reverse-order, so we need to use a VecDeque and push_front
286Transform(VecDeque<Cow<'p, parse::SingleId>>),
287}
288289impl Default for ReverseRuleGroup<'_> {
290fn default() -> Self {
291Self::Conversion(Vec::new())
292 }
293}
294295impl<'p> ReverseRuleGroup<'p> {
296fn new_conversion(rule: UniConversionRule<'p>) -> Self {
297Self::Conversion(vec![rule])
298 }
299300fn new_transform(rule: Cow<'p, parse::SingleId>) -> Self {
301let mut group = VecDeque::new();
302 group.push_front(rule);
303Self::Transform(group)
304 }
305306fn push(&mut self, rule: UniRule<'p>) -> Option<Self> {
307// necessary reborrow for mem::replace
308match (&mut *self, rule) {
309 (Self::Conversion(group), UniRule::Conversion(rule)) => {
310 group.push(rule);
311None
312}
313 (Self::Transform(group), UniRule::Transform(rule)) => {
314// we receive rules via `push` in source-order, which is the opposite order we want,
315 // so we push_front.
316group.push_front(rule);
317None
318}
319 (Self::Conversion(_), UniRule::Transform(new_rule)) => {
320Some(core::mem::replace(self, Self::new_transform(new_rule)))
321 }
322 (Self::Transform(_), UniRule::Conversion(new_rule)) => {
323Some(core::mem::replace(self, Self::new_conversion(new_rule)))
324 }
325 }
326 }
327}