icu_pattern/multi_named.rs
1// This file is part of ICU4X. For terms of use, please see the file
2// called LICENSE at the top level of the ICU4X source tree
3// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
4
5//! Code for the [`MultiNamedPlaceholder`] pattern backend.
6
7#[cfg(feature = "alloc")]
8use alloc::{borrow::Cow, boxed::Box, collections::BTreeMap, str::FromStr, string::String};
9use core::fmt;
10#[cfg(feature = "litemap")]
11use litemap::LiteMap;
12use writeable::Writeable;
13
14use crate::common::*;
15use crate::Error;
16
17/// A string wrapper for the [`MultiNamedPlaceholder`] pattern backend.
18///
19/// # Examples
20///
21/// ```
22/// use core::cmp::Ordering;
23/// use core::str::FromStr;
24/// use icu_pattern::MultiNamedPlaceholderKey;
25/// use icu_pattern::MultiNamedPlaceholderPattern;
26/// use icu_pattern::PatternItem;
27///
28/// // Parse the string syntax and check the resulting data store:
29/// let pattern = MultiNamedPlaceholderPattern::try_from_str(
30/// "Hello, {person0} and {person1}!",
31/// Default::default(),
32/// )
33/// .unwrap();
34///
35/// assert_eq!(
36/// pattern.iter().cmp(
37/// [
38/// PatternItem::Literal("Hello, "),
39/// PatternItem::Placeholder(MultiNamedPlaceholderKey(
40/// "person0".into()
41/// )),
42/// PatternItem::Literal(" and "),
43/// PatternItem::Placeholder(MultiNamedPlaceholderKey(
44/// "person1".into()
45/// )),
46/// PatternItem::Literal("!")
47/// ]
48/// .into_iter()
49/// ),
50/// Ordering::Equal
51/// );
52/// ```
53#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)]
54#[repr(transparent)]
55#[allow(clippy::exhaustive_structs)] // transparent newtype
56pub struct MultiNamedPlaceholderKey<'a>(pub &'a str);
57
58/// Cowable version of [`MultiNamedPlaceholderKey`], used during construction.
59#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)]
60#[cfg_attr(feature = "serde", derive(serde::Deserialize, serde::Serialize))]
61#[repr(transparent)]
62#[allow(clippy::exhaustive_structs)] // transparent newtype
63#[cfg(feature = "alloc")]
64pub struct MultiNamedPlaceholderKeyCow<'a>(pub Cow<'a, str>);
65
66#[cfg(feature = "alloc")]
67impl FromStr for MultiNamedPlaceholderKeyCow<'_> {
68 type Err = Error;
69 fn from_str(s: &str) -> Result<Self, Self::Err> {
70 // Can't borrow the str here unfortunately
71 Ok(MultiNamedPlaceholderKeyCow(Cow::Owned(String::from(s))))
72 }
73}
74
75#[derive(Debug, Clone, PartialEq, Eq)]
76#[non_exhaustive]
77pub struct MissingNamedPlaceholderError<'a> {
78 pub name: &'a str,
79}
80
81impl Writeable for MissingNamedPlaceholderError<'_> {
82 fn write_to<W: fmt::Write + ?Sized>(&self, sink: &mut W) -> fmt::Result {
83 sink.write_char('{')?;
84 sink.write_str(self.name)?;
85 sink.write_char('}')?;
86 Ok(())
87 }
88}
89
90#[cfg(feature = "alloc")]
91impl<'k, K, W> PlaceholderValueProvider<MultiNamedPlaceholderKey<'k>> for BTreeMap<K, W>
92where
93 K: Ord + core::borrow::Borrow<str>,
94 W: Writeable,
95{
96 type Error = MissingNamedPlaceholderError<'k>;
97
98 type W<'a>
99 = Result<&'a W, Self::Error>
100 where
101 Self: 'a;
102
103 type L<'a, 'l>
104 = &'l str
105 where
106 Self: 'a;
107
108 #[inline]
109 fn value_for<'a>(&'a self, key: MultiNamedPlaceholderKey<'k>) -> Self::W<'a> {
110 match self.get(key.0) {
111 Some(value) => Ok(value),
112 None => Err(MissingNamedPlaceholderError { name: key.0 }),
113 }
114 }
115 #[inline]
116 fn map_literal<'a, 'l>(&'a self, literal: &'l str) -> Self::L<'a, 'l> {
117 literal
118 }
119}
120
121#[cfg(feature = "litemap")]
122impl<'k, K, W, S> PlaceholderValueProvider<MultiNamedPlaceholderKey<'k>> for LiteMap<K, W, S>
123where
124 K: Ord + core::borrow::Borrow<str>,
125 W: Writeable,
126 S: litemap::store::Store<K, W>,
127{
128 type Error = MissingNamedPlaceholderError<'k>;
129
130 type W<'a>
131 = Result<&'a W, Self::Error>
132 where
133 Self: 'a;
134
135 type L<'a, 'l>
136 = &'l str
137 where
138 Self: 'a;
139
140 #[inline]
141 fn value_for<'a>(&'a self, key: MultiNamedPlaceholderKey<'k>) -> Self::W<'a> {
142 match self.get(key.0) {
143 Some(value) => Ok(value),
144 None => Err(MissingNamedPlaceholderError { name: key.0 }),
145 }
146 }
147 #[inline]
148 fn map_literal<'a, 'l>(&'a self, literal: &'l str) -> Self::L<'a, 'l> {
149 literal
150 }
151}
152
153/// Backend for patterns containing zero or more named placeholders.
154///
155/// This empty type is not constructible.
156///
157/// # Placeholder Keys
158///
159/// The placeholder is [`MultiNamedPlaceholderKey`].
160///
161/// In [`Pattern::interpolate()`], pass a map-like structure. Missing keys will be replaced
162/// with the Unicode replacement character U+FFFD.
163///
164/// # Encoding Details
165///
166/// The literals and placeholders are stored in context. A placeholder is encoded as a name length
167/// in octal code points followed by the placeholder name.
168///
169/// For example, consider the pattern: "Hello, {user} and {someone_else}!"
170///
171/// The encoding for this would be:
172///
173/// ```txt
174/// Hello, \x00\x04user and \x01\x04someone_else!
175/// ```
176///
177/// where `\x00\x04` and `\x01\x04` are a big-endian octal number representing the lengths of
178/// their respective placeholder names.
179///
180/// Consequences of this encoding:
181///
182/// 1. The maximum placeholder name length is 64 bytes
183/// 2. Code points in the range `\x00` through `\x07` are reserved for the placeholder name
184///
185/// # Examples
186///
187/// Example patterns supported by this backend:
188///
189/// ```
190/// use core::str::FromStr;
191/// use icu_pattern::MultiNamedPlaceholder;
192/// use icu_pattern::Pattern;
193/// use std::collections::BTreeMap;
194///
195/// let placeholder_value_map: BTreeMap<&str, &str> = [
196/// ("num", "5"),
197/// ("letter", "X"),
198/// ("", "empty"),
199/// ("unused", "unused"),
200/// ]
201/// .into_iter()
202/// .collect();
203///
204/// // Single placeholder:
205/// assert_eq!(
206/// Pattern::<MultiNamedPlaceholder>::try_from_str(
207/// "{num} days ago",
208/// Default::default()
209/// )
210/// .unwrap()
211/// .try_interpolate_to_string(&placeholder_value_map)
212/// .unwrap(),
213/// "5 days ago",
214/// );
215///
216/// // No placeholder (note, the placeholder value is never accessed):
217/// assert_eq!(
218/// Pattern::<MultiNamedPlaceholder>::try_from_str(
219/// "yesterday",
220/// Default::default()
221/// )
222/// .unwrap()
223/// .try_interpolate_to_string(&placeholder_value_map)
224/// .unwrap(),
225/// "yesterday",
226/// );
227///
228/// // No literals, only placeholders:
229/// assert_eq!(
230/// Pattern::<MultiNamedPlaceholder>::try_from_str(
231/// "{letter}{num}{}",
232/// Default::default()
233/// )
234/// .unwrap()
235/// .try_interpolate_to_string(&placeholder_value_map)
236/// .unwrap(),
237/// "X5empty",
238/// );
239/// ```
240///
241/// Use [`LiteMap`] for alloc-free formatting:
242///
243/// ```
244/// use core::str::FromStr;
245/// use icu_pattern::MultiNamedPlaceholderPattern;
246/// use litemap::LiteMap;
247/// use writeable::TryWriteable;
248///
249/// static placeholder_value_map: LiteMap<&str, usize, &[(&str, usize)]> =
250/// LiteMap::from_sorted_store_unchecked(&[("seven", 11)]);
251///
252/// // Note: String allocates, but this could be a non-allocating sink
253/// let mut sink = String::new();
254///
255/// MultiNamedPlaceholderPattern::try_from_str("{seven}", Default::default())
256/// .unwrap()
257/// .try_interpolate(&placeholder_value_map)
258/// .try_write_to(&mut sink)
259/// .unwrap()
260/// .unwrap();
261///
262/// assert_eq!(sink, "11");
263/// ```
264///
265/// Missing placeholder values cause an error result to be returned. However,
266/// based on the design of [`TryWriteable`], the error can be discarded to get
267/// a best-effort interpolation with potential replacement characters.
268///
269/// ```should_panic
270/// use core::str::FromStr;
271/// use icu_pattern::MultiNamedPlaceholder;
272/// use icu_pattern::Pattern;
273/// use std::collections::BTreeMap;
274///
275/// let placeholder_value_map: BTreeMap<&str, &str> =
276/// [("num", "5"), ("letter", "X")].into_iter().collect();
277///
278/// Pattern::<MultiNamedPlaceholder>::try_from_str(
279/// "Your name is {your_name}",
280/// Default::default(),
281/// )
282/// .unwrap()
283/// .try_interpolate_to_string(&placeholder_value_map)
284/// .unwrap();
285/// ```
286///
287/// Recover the best-effort lossy string by directly using [`Pattern::try_interpolate()`]:
288///
289/// ```
290/// use core::str::FromStr;
291/// use icu_pattern::MissingNamedPlaceholderError;
292/// use icu_pattern::MultiNamedPlaceholder;
293/// use icu_pattern::Pattern;
294/// use std::borrow::Cow;
295/// use std::collections::BTreeMap;
296/// use writeable::TryWriteable;
297///
298/// let placeholder_value_map: BTreeMap<&str, &str> =
299/// [("num", "5"), ("letter", "X")].into_iter().collect();
300///
301/// let pattern = Pattern::<MultiNamedPlaceholder>::try_from_str(
302/// "Your name is {your_name}",
303/// Default::default(),
304/// )
305/// .unwrap();
306///
307/// let mut buffer = String::new();
308/// let result = pattern
309/// .try_interpolate(&placeholder_value_map)
310/// .try_write_to(&mut buffer)
311/// .expect("infallible write to String");
312///
313/// assert!(matches!(result, Err(MissingNamedPlaceholderError { .. })));
314/// assert_eq!(result.unwrap_err().name, "your_name");
315/// assert_eq!(buffer, "Your name is {your_name}");
316/// ```
317///
318/// [`Pattern::interpolate()`]: crate::Pattern::interpolate
319/// [`Pattern::try_interpolate()`]: crate::Pattern::try_interpolate
320/// [`TryWriteable`]: writeable::TryWriteable
321#[derive(Debug, Copy, Clone, PartialEq, Eq)]
322#[allow(clippy::exhaustive_enums)] // Empty Enum
323pub enum MultiNamedPlaceholder {}
324
325impl crate::private::Sealed for MultiNamedPlaceholder {}
326
327impl PatternBackend for MultiNamedPlaceholder {
328 type PlaceholderKey<'a> = MultiNamedPlaceholderKey<'a>;
329 #[cfg(feature = "alloc")]
330 type PlaceholderKeyCow<'a> = MultiNamedPlaceholderKeyCow<'a>;
331 type Error<'a> = MissingNamedPlaceholderError<'a>;
332 type Store = str;
333 type Iter<'a> = MultiNamedPlaceholderPatternIterator<'a>;
334
335 fn validate_store(store: &Self::Store) -> Result<(), Error> {
336 let mut iter = MultiNamedPlaceholderPatternIterator::new(store);
337 while iter
338 .try_next()
339 .map_err(|e| match e {
340 MultiNamedPlaceholderError::InvalidStore => Error::InvalidPattern,
341 MultiNamedPlaceholderError::Unreachable => {
342 debug_assert!(false, "unreachable");
343 Error::InvalidPattern
344 }
345 })?
346 .is_some()
347 {}
348 Ok(())
349 }
350
351 fn iter_items(store: &Self::Store) -> Self::Iter<'_> {
352 MultiNamedPlaceholderPatternIterator::new(store)
353 }
354
355 #[cfg(feature = "alloc")]
356 fn try_from_items<
357 'cow,
358 'ph,
359 I: Iterator<Item = Result<PatternItemCow<'cow, Self::PlaceholderKeyCow<'ph>>, Error>>,
360 >(
361 items: I,
362 ) -> Result<Box<str>, Error> {
363 let mut string = String::new();
364 for item in items {
365 match item? {
366 PatternItemCow::Literal(s) if s.contains(|x| (x as usize) <= 0x07) => {
367 // TODO: Should this be a different error type?
368 return Err(Error::InvalidPattern);
369 }
370 PatternItemCow::Literal(s) => string.push_str(&s),
371 PatternItemCow::Placeholder(ph_key) => {
372 let name_length = ph_key.0.len();
373 if name_length >= 64 {
374 return Err(Error::InvalidPlaceholder);
375 }
376 let lead = (name_length >> 3) as u8;
377 let trail = (name_length & 0x7) as u8;
378 string.push(char::from(lead));
379 string.push(char::from(trail));
380 string.push_str(&ph_key.0);
381 }
382 }
383 }
384 Ok(string.into_boxed_str())
385 }
386
387 fn empty() -> &'static Self::Store {
388 ""
389 }
390}
391
392#[derive(Debug)]
393pub struct MultiNamedPlaceholderPatternIterator<'a> {
394 store: &'a str,
395}
396
397// Note: we don't implement ExactSizeIterator since we don't store that metadata in MultiNamed.
398
399impl<'a> Iterator for MultiNamedPlaceholderPatternIterator<'a> {
400 type Item = PatternItem<'a, MultiNamedPlaceholderKey<'a>>;
401 fn next(&mut self) -> Option<Self::Item> {
402 match self.try_next() {
403 Ok(next) => next,
404 Err(MultiNamedPlaceholderError::InvalidStore) => {
405 debug_assert!(
406 false,
407 "invalid store with {} bytes remaining",
408 self.store.len()
409 );
410 None
411 }
412 Err(MultiNamedPlaceholderError::Unreachable) => {
413 debug_assert!(false, "unreachable");
414 None
415 }
416 }
417 }
418}
419
420enum MultiNamedPlaceholderError {
421 InvalidStore,
422 Unreachable,
423}
424
425impl<'a> MultiNamedPlaceholderPatternIterator<'a> {
426 fn new(store: &'a str) -> Self {
427 Self { store }
428 }
429
430 fn try_next(
431 &mut self,
432 ) -> Result<Option<PatternItem<'a, MultiNamedPlaceholderKey<'a>>>, MultiNamedPlaceholderError>
433 {
434 match self.store.find(|x| (x as usize) <= 0x07) {
435 Some(0) => {
436 // Placeholder
437 let Some((&[lead, trail], remainder)) = self
438 .store
439 .split_at_checked(2)
440 .map(|(a, b)| (a.as_bytes(), b))
441 else {
442 return Err(MultiNamedPlaceholderError::InvalidStore);
443 };
444 debug_assert!(lead <= 7);
445 if trail > 7 {
446 return Err(MultiNamedPlaceholderError::InvalidStore);
447 }
448 let placeholder_len = (lead << 3) + trail;
449 let Some((placeholder_name, remainder)) =
450 remainder.split_at_checked(placeholder_len as usize)
451 else {
452 return Err(MultiNamedPlaceholderError::InvalidStore);
453 };
454 self.store = remainder;
455 Ok(Some(PatternItem::Placeholder(MultiNamedPlaceholderKey(
456 placeholder_name,
457 ))))
458 }
459 Some(i) => {
460 // Literal
461 let Some((literal, remainder)) = self.store.split_at_checked(i) else {
462 debug_assert!(false, "should be a perfect slice");
463 return Err(MultiNamedPlaceholderError::Unreachable);
464 };
465 self.store = remainder;
466 Ok(Some(PatternItem::Literal(literal)))
467 }
468 None if self.store.is_empty() => {
469 // End of string
470 Ok(None)
471 }
472 None => {
473 // Closing literal
474 let literal = self.store;
475 self.store = "";
476 Ok(Some(PatternItem::Literal(literal)))
477 }
478 }
479 }
480}
481
482#[cfg(test)]
483mod tests {
484 use super::*;
485 use crate::{MultiNamedPlaceholder, MultiNamedPlaceholderPattern};
486
487 #[test]
488 fn test_invalid() {
489 let long_str = "0123456789".repeat(1000000);
490 let strings = [
491 "{", // invalid syntax
492 "{@}", // placeholder name too long
493 "\x00", // invalid character
494 "\x07", // invalid character
495 ];
496 for string in strings {
497 let string = string.replace('@', &long_str);
498 assert!(
499 MultiNamedPlaceholderPattern::try_from_str(&string, Default::default()).is_err(),
500 "{string:?}"
501 );
502 }
503 let stores = [
504 "\x00", // too short
505 "\x02", // too short
506 "\x00\x02", // no placeholder name
507 "\x00\x02a", // placeholder name too short
508 ];
509 for store in stores {
510 assert!(
511 MultiNamedPlaceholder::validate_store(store).is_err(),
512 "{store:?}"
513 );
514 }
515 }
516}