1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188
// This file is part of ICU4X. For terms of use, please see the file
// called LICENSE at the top level of the ICU4X source tree
// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
//! Reference `Skeleton` implementation for parsing.
use super::error::SkeletonError;
use crate::fields::{self, Field, FieldLength, FieldSymbol};
#[cfg(feature = "datagen")]
use crate::provider::pattern::reference::Pattern;
use alloc::vec::Vec;
use core::convert::TryFrom;
use smallvec::SmallVec;
/// A [`Skeleton`] is used to represent what types of `Field`s are present in a [`Pattern`]. The
/// ordering of the [`Skeleton`]'s `Field`s have no bearing on the ordering of the `Field`s and
/// `Literal`s in the [`Pattern`].
///
/// A [`Skeleton`] is a [`Vec`]`<Field>`, but with the invariant that it is sorted according to the canonical
/// sort order. This order is sorted according to the most significant `Field` to the least significant.
/// For example, a field with a `Minute` symbol would precede a field with a `Second` symbol.
/// This order is documented as the order of fields as presented in the
/// [UTS 35 Date Field Symbol Table](https://unicode.org/reports/tr35/tr35-dates.html#Date_Field_Symbol_Table)
///
/// The `Field`s are only sorted in the [`Skeleton`] in order to provide a deterministic
/// serialization strategy, and to provide a faster [`Skeleton`] matching operation.
#[derive(Debug, Eq, PartialEq, Clone, Ord, PartialOrd)]
// TODO(#876): Use ZeroVec instead of SmallVec
pub struct Skeleton(pub(crate) SmallVec<[fields::Field; 5]>);
impl Skeleton {
pub(crate) fn fields_iter(&self) -> impl Iterator<Item = &Field> {
self.0.iter()
}
pub(crate) fn fields_len(&self) -> usize {
self.0.len()
}
/// Return the underlying fields as a slice.
pub fn as_slice(&self) -> &[fields::Field] {
self.0.as_slice()
}
}
impl From<SmallVec<[fields::Field; 5]>> for Skeleton {
fn from(fields: SmallVec<[fields::Field; 5]>) -> Self {
Self(fields)
}
}
impl From<Vec<fields::Field>> for Skeleton {
fn from(fields: Vec<fields::Field>) -> Self {
Self(fields.into())
}
}
impl From<&[fields::Field]> for Skeleton {
fn from(fields: &[fields::Field]) -> Self {
Self(fields.into())
}
}
/// Convert a Pattern into a Skeleton. This will remove all of the string literals, and sort
/// the fields into the canonical sort order. Not all fields are supported by Skeletons, so map
/// fields into skeleton-appropriate ones. For instance, in the "ja" locale the pattern "aK:mm"
/// gets transformed into the skeleton "hmm".
///
/// At the time of this writing, it's being used for applying hour cycle preferences and should not
/// be exposed as a public API for end users.
#[doc(hidden)]
#[cfg(feature = "datagen")]
impl From<&Pattern> for Skeleton {
fn from(pattern: &Pattern) -> Self {
let mut fields: SmallVec<[fields::Field; 5]> = SmallVec::new();
for item in pattern.items() {
if let crate::provider::pattern::PatternItem::Field(field) = item {
let mut field = *field;
// Skeletons only have a subset of available fields, these are then mapped to more
// specific fields for the patterns they expand to.
field.symbol = match field.symbol {
// Only the format varieties are used in the skeletons, the matched patterns
// will be more specific.
FieldSymbol::Month(_) => FieldSymbol::Month(fields::Month::Format),
FieldSymbol::Weekday(_) => FieldSymbol::Weekday(fields::Weekday::Format),
// Only flexible day periods are used in skeletons, ignore all others.
FieldSymbol::DayPeriod(fields::DayPeriod::AmPm)
| FieldSymbol::DayPeriod(fields::DayPeriod::NoonMidnight) => continue,
// TODO(#487) - Flexible day periods should be included here.
// FieldSymbol::DayPeriod(fields::DayPeriod::Flexible) => {
// FieldSymbol::DayPeriod(fields::DayPeriod::Flexible)
// }
// Only the H12 and H23 symbols are used in skeletons, while the patterns may
// contain H11 or H23 depending on the localization.
FieldSymbol::Hour(fields::Hour::H11) | FieldSymbol::Hour(fields::Hour::H12) => {
FieldSymbol::Hour(fields::Hour::H12)
}
FieldSymbol::Hour(fields::Hour::H23) | FieldSymbol::Hour(fields::Hour::H24) => {
FieldSymbol::Hour(fields::Hour::H23)
}
// Pass through all of the following preferences unchanged.
FieldSymbol::Minute
| FieldSymbol::Second(_)
| FieldSymbol::TimeZone(_)
| FieldSymbol::DecimalSecond(_)
| FieldSymbol::Era
| FieldSymbol::Year(_)
| FieldSymbol::Week(_)
| FieldSymbol::Day(_) => field.symbol,
};
// Only insert if it's a unique field.
if let Err(pos) = fields.binary_search(&field) {
fields.insert(pos, field)
}
}
}
Self(fields)
}
}
/// Parse a string into a list of fields. This trait implementation validates the input string to
/// verify that fields are correct. If the fields are out of order, this returns an error that
/// contains the fields, which gives the callee a chance to sort the fields with the
/// `From<SmallVec<[fields::Field; 5]>> for Skeleton` trait.
impl TryFrom<&str> for Skeleton {
type Error = SkeletonError;
fn try_from(skeleton_string: &str) -> Result<Self, Self::Error> {
let mut fields: SmallVec<[fields::Field; 5]> = SmallVec::new();
let mut iter = skeleton_string.chars().peekable();
while let Some(ch) = iter.next() {
// Go through the chars to count how often it's repeated.
let mut field_length: u8 = 1;
while let Some(next_ch) = iter.peek() {
if *next_ch != ch {
break;
}
field_length += 1;
iter.next();
}
// Convert the byte to a valid field symbol.
let field_symbol = if ch == 'Z' {
match field_length {
1..=3 => {
field_length = 4;
FieldSymbol::try_from('x')?
}
4 => FieldSymbol::try_from('O')?,
5 => {
field_length = 4;
FieldSymbol::try_from('X')?
}
_ => FieldSymbol::try_from(ch)?,
}
} else {
FieldSymbol::try_from(ch)?
};
let field = Field::from((field_symbol, FieldLength::from_idx(field_length)?));
match fields.binary_search(&field) {
Ok(_) => return Err(SkeletonError::DuplicateField),
Err(pos) => fields.insert(pos, field),
}
}
Ok(Self::from(fields))
}
}
#[cfg(feature = "datagen")]
impl core::fmt::Display for Skeleton {
fn fmt(&self, formatter: &mut core::fmt::Formatter) -> core::fmt::Result {
use core::fmt::Write;
for field in self.fields_iter() {
let ch: char = field.symbol.into();
for _ in 0..field.length.to_len() {
formatter.write_char(ch)?;
}
}
Ok(())
}
}