icu_pattern/
multi_named.rs

1// This file is part of ICU4X. For terms of use, please see the file
2// called LICENSE at the top level of the ICU4X source tree
3// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
4
5//! Code for the [`MultiNamedPlaceholder`] pattern backend.
6
7#[cfg(feature = "alloc")]
8use alloc::{borrow::Cow, boxed::Box, collections::BTreeMap, str::FromStr, string::String};
9use core::fmt;
10#[cfg(feature = "litemap")]
11use litemap::LiteMap;
12use writeable::Writeable;
13
14use crate::common::*;
15use crate::Error;
16
17/// A string wrapper for the [`MultiNamedPlaceholder`] pattern backend.
18///
19/// # Examples
20///
21/// ```
22/// use core::cmp::Ordering;
23/// use core::str::FromStr;
24/// use icu_pattern::MultiNamedPlaceholderKey;
25/// use icu_pattern::MultiNamedPlaceholderPattern;
26/// use icu_pattern::PatternItem;
27///
28/// // Parse the string syntax and check the resulting data store:
29/// let pattern = MultiNamedPlaceholderPattern::try_from_str(
30///     "Hello, {person0} and {person1}!",
31///     Default::default(),
32/// )
33/// .unwrap();
34///
35/// assert_eq!(
36///     pattern.iter().cmp(
37///         [
38///             PatternItem::Literal("Hello, "),
39///             PatternItem::Placeholder(MultiNamedPlaceholderKey(
40///                 "person0".into()
41///             )),
42///             PatternItem::Literal(" and "),
43///             PatternItem::Placeholder(MultiNamedPlaceholderKey(
44///                 "person1".into()
45///             )),
46///             PatternItem::Literal("!")
47///         ]
48///         .into_iter()
49///     ),
50///     Ordering::Equal
51/// );
52/// ```
53#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)]
54#[repr(transparent)]
55#[allow(clippy::exhaustive_structs)] // transparent newtype
56pub struct MultiNamedPlaceholderKey<'a>(pub &'a str);
57
58/// Cowable version of [`MultiNamedPlaceholderKey`], used during construction.
59#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)]
60#[cfg_attr(feature = "serde", derive(serde::Deserialize, serde::Serialize))]
61#[repr(transparent)]
62#[allow(clippy::exhaustive_structs)] // transparent newtype
63#[cfg(feature = "alloc")]
64pub struct MultiNamedPlaceholderKeyCow<'a>(pub Cow<'a, str>);
65
66#[cfg(feature = "alloc")]
67impl FromStr for MultiNamedPlaceholderKeyCow<'_> {
68    type Err = Error;
69    fn from_str(s: &str) -> Result<Self, Self::Err> {
70        // Can't borrow the str here unfortunately
71        Ok(MultiNamedPlaceholderKeyCow(Cow::Owned(String::from(s))))
72    }
73}
74
75#[derive(Debug, Clone, PartialEq, Eq)]
76#[non_exhaustive]
77pub struct MissingNamedPlaceholderError<'a> {
78    pub name: &'a str,
79}
80
81impl Writeable for MissingNamedPlaceholderError<'_> {
82    fn write_to<W: fmt::Write + ?Sized>(&self, sink: &mut W) -> fmt::Result {
83        sink.write_char('{')?;
84        sink.write_str(self.name)?;
85        sink.write_char('}')?;
86        Ok(())
87    }
88}
89
90#[cfg(feature = "alloc")]
91impl<'k, K, W> PlaceholderValueProvider<MultiNamedPlaceholderKey<'k>> for BTreeMap<K, W>
92where
93    K: Ord + core::borrow::Borrow<str>,
94    W: Writeable,
95{
96    type Error = MissingNamedPlaceholderError<'k>;
97
98    type W<'a>
99        = Result<&'a W, Self::Error>
100    where
101        Self: 'a;
102
103    type L<'a, 'l>
104        = &'l str
105    where
106        Self: 'a;
107
108    #[inline]
109    fn value_for<'a>(&'a self, key: MultiNamedPlaceholderKey<'k>) -> Self::W<'a> {
110        match self.get(key.0) {
111            Some(value) => Ok(value),
112            None => Err(MissingNamedPlaceholderError { name: key.0 }),
113        }
114    }
115    #[inline]
116    fn map_literal<'a, 'l>(&'a self, literal: &'l str) -> Self::L<'a, 'l> {
117        literal
118    }
119}
120
121#[cfg(feature = "litemap")]
122impl<'k, K, W, S> PlaceholderValueProvider<MultiNamedPlaceholderKey<'k>> for LiteMap<K, W, S>
123where
124    K: Ord + core::borrow::Borrow<str>,
125    W: Writeable,
126    S: litemap::store::Store<K, W>,
127{
128    type Error = MissingNamedPlaceholderError<'k>;
129
130    type W<'a>
131        = Result<&'a W, Self::Error>
132    where
133        Self: 'a;
134
135    type L<'a, 'l>
136        = &'l str
137    where
138        Self: 'a;
139
140    #[inline]
141    fn value_for<'a>(&'a self, key: MultiNamedPlaceholderKey<'k>) -> Self::W<'a> {
142        match self.get(key.0) {
143            Some(value) => Ok(value),
144            None => Err(MissingNamedPlaceholderError { name: key.0 }),
145        }
146    }
147    #[inline]
148    fn map_literal<'a, 'l>(&'a self, literal: &'l str) -> Self::L<'a, 'l> {
149        literal
150    }
151}
152
153/// Backend for patterns containing zero or more named placeholders.
154///
155/// This empty type is not constructible.
156///
157/// # Placeholder Keys
158///
159/// The placeholder is [`MultiNamedPlaceholderKey`].
160///
161/// In [`Pattern::interpolate()`], pass a map-like structure. Missing keys will be replaced
162/// with the Unicode replacement character U+FFFD.
163///
164/// # Encoding Details
165///
166/// The literals and placeholders are stored in context. A placeholder is encoded as a name length
167/// in octal code points followed by the placeholder name.
168///
169/// For example, consider the pattern: "Hello, {user} and {someone_else}!"
170///
171/// The encoding for this would be:
172///
173/// ```txt
174/// Hello, \x00\x04user and \x01\x04someone_else!
175/// ```
176///
177/// where `\x00\x04` and `\x01\x04` are a big-endian octal number representing the lengths of
178/// their respective placeholder names.
179///
180/// Consequences of this encoding:
181///
182/// 1. The maximum placeholder name length is 64 bytes
183/// 2. Code points in the range `\x00` through `\x07` are reserved for the placeholder name
184///
185/// # Examples
186///
187/// Example patterns supported by this backend:
188///
189/// ```
190/// use core::str::FromStr;
191/// use icu_pattern::MultiNamedPlaceholder;
192/// use icu_pattern::Pattern;
193/// use std::collections::BTreeMap;
194///
195/// let placeholder_value_map: BTreeMap<&str, &str> = [
196///     ("num", "5"),
197///     ("letter", "X"),
198///     ("", "empty"),
199///     ("unused", "unused"),
200/// ]
201/// .into_iter()
202/// .collect();
203///
204/// // Single placeholder:
205/// assert_eq!(
206///     Pattern::<MultiNamedPlaceholder>::try_from_str(
207///         "{num} days ago",
208///         Default::default()
209///     )
210///     .unwrap()
211///     .try_interpolate_to_string(&placeholder_value_map)
212///     .unwrap(),
213///     "5 days ago",
214/// );
215///
216/// // No placeholder (note, the placeholder value is never accessed):
217/// assert_eq!(
218///     Pattern::<MultiNamedPlaceholder>::try_from_str(
219///         "yesterday",
220///         Default::default()
221///     )
222///     .unwrap()
223///     .try_interpolate_to_string(&placeholder_value_map)
224///     .unwrap(),
225///     "yesterday",
226/// );
227///
228/// // No literals, only placeholders:
229/// assert_eq!(
230///     Pattern::<MultiNamedPlaceholder>::try_from_str(
231///         "{letter}{num}{}",
232///         Default::default()
233///     )
234///     .unwrap()
235///     .try_interpolate_to_string(&placeholder_value_map)
236///     .unwrap(),
237///     "X5empty",
238/// );
239/// ```
240///
241/// Use [`LiteMap`] for alloc-free formatting:
242///
243/// ```
244/// use core::str::FromStr;
245/// use icu_pattern::MultiNamedPlaceholderPattern;
246/// use litemap::LiteMap;
247/// use writeable::TryWriteable;
248///
249/// static placeholder_value_map: LiteMap<&str, usize, &[(&str, usize)]> =
250///     LiteMap::from_sorted_store_unchecked(&[("seven", 11)]);
251///
252/// // Note: String allocates, but this could be a non-allocating sink
253/// let mut sink = String::new();
254///
255/// MultiNamedPlaceholderPattern::try_from_str("{seven}", Default::default())
256///     .unwrap()
257///     .try_interpolate(&placeholder_value_map)
258///     .try_write_to(&mut sink)
259///     .unwrap()
260///     .unwrap();
261///
262/// assert_eq!(sink, "11");
263/// ```
264///
265/// Missing placeholder values cause an error result to be returned. However,
266/// based on the design of [`TryWriteable`], the error can be discarded to get
267/// a best-effort interpolation with potential replacement characters.
268///
269/// ```should_panic
270/// use core::str::FromStr;
271/// use icu_pattern::MultiNamedPlaceholder;
272/// use icu_pattern::Pattern;
273/// use std::collections::BTreeMap;
274///
275/// let placeholder_value_map: BTreeMap<&str, &str> =
276///     [("num", "5"), ("letter", "X")].into_iter().collect();
277///
278/// Pattern::<MultiNamedPlaceholder>::try_from_str(
279///     "Your name is {your_name}",
280///     Default::default(),
281/// )
282/// .unwrap()
283/// .try_interpolate_to_string(&placeholder_value_map)
284/// .unwrap();
285/// ```
286///
287/// Recover the best-effort lossy string by directly using [`Pattern::try_interpolate()`]:
288///
289/// ```
290/// use core::str::FromStr;
291/// use icu_pattern::MissingNamedPlaceholderError;
292/// use icu_pattern::MultiNamedPlaceholder;
293/// use icu_pattern::Pattern;
294/// use std::borrow::Cow;
295/// use std::collections::BTreeMap;
296/// use writeable::TryWriteable;
297///
298/// let placeholder_value_map: BTreeMap<&str, &str> =
299///     [("num", "5"), ("letter", "X")].into_iter().collect();
300///
301/// let pattern = Pattern::<MultiNamedPlaceholder>::try_from_str(
302///     "Your name is {your_name}",
303///     Default::default(),
304/// )
305/// .unwrap();
306///
307/// let mut buffer = String::new();
308/// let result = pattern
309///     .try_interpolate(&placeholder_value_map)
310///     .try_write_to(&mut buffer)
311///     .expect("infallible write to String");
312///
313/// assert!(matches!(result, Err(MissingNamedPlaceholderError { .. })));
314/// assert_eq!(result.unwrap_err().name, "your_name");
315/// assert_eq!(buffer, "Your name is {your_name}");
316/// ```
317///
318/// [`Pattern::interpolate()`]: crate::Pattern::interpolate
319/// [`Pattern::try_interpolate()`]: crate::Pattern::try_interpolate
320/// [`TryWriteable`]: writeable::TryWriteable
321#[derive(Debug, Copy, Clone, PartialEq, Eq)]
322#[allow(clippy::exhaustive_enums)] // Empty Enum
323pub enum MultiNamedPlaceholder {}
324
325impl crate::private::Sealed for MultiNamedPlaceholder {}
326
327impl PatternBackend for MultiNamedPlaceholder {
328    type PlaceholderKey<'a> = MultiNamedPlaceholderKey<'a>;
329    #[cfg(feature = "alloc")]
330    type PlaceholderKeyCow<'a> = MultiNamedPlaceholderKeyCow<'a>;
331    type Error<'a> = MissingNamedPlaceholderError<'a>;
332    type Store = str;
333    type Iter<'a> = MultiNamedPlaceholderPatternIterator<'a>;
334
335    fn validate_store(store: &Self::Store) -> Result<(), Error> {
336        let mut iter = MultiNamedPlaceholderPatternIterator::new(store);
337        while iter
338            .try_next()
339            .map_err(|e| match e {
340                MultiNamedPlaceholderError::InvalidStore => Error::InvalidPattern,
341                MultiNamedPlaceholderError::Unreachable => {
342                    debug_assert!(false, "unreachable");
343                    Error::InvalidPattern
344                }
345            })?
346            .is_some()
347        {}
348        Ok(())
349    }
350
351    fn iter_items(store: &Self::Store) -> Self::Iter<'_> {
352        MultiNamedPlaceholderPatternIterator::new(store)
353    }
354
355    #[cfg(feature = "alloc")]
356    fn try_from_items<
357        'cow,
358        'ph,
359        I: Iterator<Item = Result<PatternItemCow<'cow, Self::PlaceholderKeyCow<'ph>>, Error>>,
360    >(
361        items: I,
362    ) -> Result<Box<str>, Error> {
363        let mut string = String::new();
364        for item in items {
365            match item? {
366                PatternItemCow::Literal(s) if s.contains(|x| (x as usize) <= 0x07) => {
367                    // TODO: Should this be a different error type?
368                    return Err(Error::InvalidPattern);
369                }
370                PatternItemCow::Literal(s) => string.push_str(&s),
371                PatternItemCow::Placeholder(ph_key) => {
372                    let name_length = ph_key.0.len();
373                    if name_length >= 64 {
374                        return Err(Error::InvalidPlaceholder);
375                    }
376                    let lead = (name_length >> 3) as u8;
377                    let trail = (name_length & 0x7) as u8;
378                    string.push(char::from(lead));
379                    string.push(char::from(trail));
380                    string.push_str(&ph_key.0);
381                }
382            }
383        }
384        Ok(string.into_boxed_str())
385    }
386
387    fn empty() -> &'static Self::Store {
388        ""
389    }
390}
391
392#[derive(Debug)]
393pub struct MultiNamedPlaceholderPatternIterator<'a> {
394    store: &'a str,
395}
396
397// Note: we don't implement ExactSizeIterator since we don't store that metadata in MultiNamed.
398
399impl<'a> Iterator for MultiNamedPlaceholderPatternIterator<'a> {
400    type Item = PatternItem<'a, MultiNamedPlaceholderKey<'a>>;
401    fn next(&mut self) -> Option<Self::Item> {
402        match self.try_next() {
403            Ok(next) => next,
404            Err(MultiNamedPlaceholderError::InvalidStore) => {
405                debug_assert!(
406                    false,
407                    "invalid store with {} bytes remaining",
408                    self.store.len()
409                );
410                None
411            }
412            Err(MultiNamedPlaceholderError::Unreachable) => {
413                debug_assert!(false, "unreachable");
414                None
415            }
416        }
417    }
418}
419
420enum MultiNamedPlaceholderError {
421    InvalidStore,
422    Unreachable,
423}
424
425impl<'a> MultiNamedPlaceholderPatternIterator<'a> {
426    fn new(store: &'a str) -> Self {
427        Self { store }
428    }
429
430    fn try_next(
431        &mut self,
432    ) -> Result<Option<PatternItem<'a, MultiNamedPlaceholderKey<'a>>>, MultiNamedPlaceholderError>
433    {
434        match self.store.find(|x| (x as usize) <= 0x07) {
435            Some(0) => {
436                // Placeholder
437                let Some((&[lead, trail], remainder)) = self
438                    .store
439                    .split_at_checked(2)
440                    .map(|(a, b)| (a.as_bytes(), b))
441                else {
442                    return Err(MultiNamedPlaceholderError::InvalidStore);
443                };
444                debug_assert!(lead <= 7);
445                if trail > 7 {
446                    return Err(MultiNamedPlaceholderError::InvalidStore);
447                }
448                let placeholder_len = (lead << 3) + trail;
449                let Some((placeholder_name, remainder)) =
450                    remainder.split_at_checked(placeholder_len as usize)
451                else {
452                    return Err(MultiNamedPlaceholderError::InvalidStore);
453                };
454                self.store = remainder;
455                Ok(Some(PatternItem::Placeholder(MultiNamedPlaceholderKey(
456                    placeholder_name,
457                ))))
458            }
459            Some(i) => {
460                // Literal
461                let Some((literal, remainder)) = self.store.split_at_checked(i) else {
462                    debug_assert!(false, "should be a perfect slice");
463                    return Err(MultiNamedPlaceholderError::Unreachable);
464                };
465                self.store = remainder;
466                Ok(Some(PatternItem::Literal(literal)))
467            }
468            None if self.store.is_empty() => {
469                // End of string
470                Ok(None)
471            }
472            None => {
473                // Closing literal
474                let literal = self.store;
475                self.store = "";
476                Ok(Some(PatternItem::Literal(literal)))
477            }
478        }
479    }
480}
481
482#[cfg(test)]
483mod tests {
484    use super::*;
485    use crate::{MultiNamedPlaceholder, MultiNamedPlaceholderPattern};
486
487    #[test]
488    fn test_invalid() {
489        let long_str = "0123456789".repeat(1000000);
490        let strings = [
491            "{",    // invalid syntax
492            "{@}",  // placeholder name too long
493            "\x00", // invalid character
494            "\x07", // invalid character
495        ];
496        for string in strings {
497            let string = string.replace('@', &long_str);
498            assert!(
499                MultiNamedPlaceholderPattern::try_from_str(&string, Default::default()).is_err(),
500                "{string:?}"
501            );
502        }
503        let stores = [
504            "\x00",      // too short
505            "\x02",      // too short
506            "\x00\x02",  // no placeholder name
507            "\x00\x02a", // placeholder name too short
508        ];
509        for store in stores {
510            assert!(
511                MultiNamedPlaceholder::validate_store(store).is_err(),
512                "{store:?}"
513            );
514        }
515    }
516}