icu_locale_core/parser/
mod.rspub mod errors;
mod langid;
mod locale;
pub use errors::ParseError;
pub use langid::{
parse_language_identifier, parse_language_identifier_from_iter,
parse_language_identifier_with_single_variant,
parse_locale_with_single_variant_single_keyword_unicode_extension_from_iter, ParserMode,
};
pub use locale::{
parse_locale, parse_locale_with_single_variant_single_keyword_unicode_keyword_extension,
};
const fn skip_before_separator(slice: &[u8]) -> &[u8] {
let mut end = 0;
#[allow(clippy::indexing_slicing)] while end < slice.len() && !matches!(slice[end], b'-' | b'_') {
end += 1;
}
unsafe { core::slice::from_raw_parts(slice.as_ptr(), end) }
}
#[derive(Copy, Clone, Debug)]
pub struct SubtagIterator<'a> {
remaining: &'a [u8],
current: Option<&'a [u8]>,
}
impl<'a> SubtagIterator<'a> {
pub const fn new(rest: &'a [u8]) -> Self {
Self {
remaining: rest,
current: Some(skip_before_separator(rest)),
}
}
pub const fn next_const(mut self) -> (Self, Option<&'a [u8]>) {
let Some(result) = self.current else {
return (self, None);
};
self.current = if result.len() < self.remaining.len() {
self.remaining = unsafe {
core::slice::from_raw_parts(
self.remaining.as_ptr().add(result.len() + 1),
self.remaining.len() - (result.len() + 1),
)
};
Some(skip_before_separator(self.remaining))
} else {
None
};
(self, Some(result))
}
pub const fn peek(&self) -> Option<&'a [u8]> {
self.current
}
}
impl<'a> Iterator for SubtagIterator<'a> {
type Item = &'a [u8];
fn next(&mut self) -> Option<Self::Item> {
let (s, res) = self.next_const();
*self = s;
res
}
}
#[cfg(test)]
mod test {
use super::*;
fn slice_to_str(input: &[u8]) -> &str {
std::str::from_utf8(input).unwrap()
}
#[test]
fn subtag_iterator_peek_test() {
let slice = "de_at-u-ca-foobar";
let mut si = SubtagIterator::new(slice.as_bytes());
assert_eq!(si.peek().map(slice_to_str), Some("de"));
assert_eq!(si.peek().map(slice_to_str), Some("de"));
assert_eq!(si.next().map(slice_to_str), Some("de"));
assert_eq!(si.peek().map(slice_to_str), Some("at"));
assert_eq!(si.peek().map(slice_to_str), Some("at"));
assert_eq!(si.next().map(slice_to_str), Some("at"));
}
#[test]
fn subtag_iterator_test() {
let slice = "";
let mut si = SubtagIterator::new(slice.as_bytes());
assert_eq!(si.next().map(slice_to_str), Some(""));
let slice = "-";
let mut si = SubtagIterator::new(slice.as_bytes());
assert_eq!(si.next().map(slice_to_str), Some(""));
let slice = "-en";
let mut si = SubtagIterator::new(slice.as_bytes());
assert_eq!(si.next().map(slice_to_str), Some(""));
assert_eq!(si.next().map(slice_to_str), Some("en"));
assert_eq!(si.next(), None);
let slice = "en";
let si = SubtagIterator::new(slice.as_bytes());
assert_eq!(si.map(slice_to_str).collect::<Vec<_>>(), vec!["en",]);
let slice = "en-";
let si = SubtagIterator::new(slice.as_bytes());
assert_eq!(si.map(slice_to_str).collect::<Vec<_>>(), vec!["en", "",]);
let slice = "--";
let mut si = SubtagIterator::new(slice.as_bytes());
assert_eq!(si.next().map(slice_to_str), Some(""));
assert_eq!(si.next().map(slice_to_str), Some(""));
assert_eq!(si.next().map(slice_to_str), Some(""));
assert_eq!(si.next(), None);
let slice = "-en-";
let mut si = SubtagIterator::new(slice.as_bytes());
assert_eq!(si.next().map(slice_to_str), Some(""));
assert_eq!(si.next().map(slice_to_str), Some("en"));
assert_eq!(si.next().map(slice_to_str), Some(""));
assert_eq!(si.next(), None);
let slice = "de_at-u-ca-foobar";
let si = SubtagIterator::new(slice.as_bytes());
assert_eq!(
si.map(slice_to_str).collect::<Vec<_>>(),
vec!["de", "at", "u", "ca", "foobar",]
);
}
#[test]
fn skip_before_separator_test() {
let current = skip_before_separator(b"");
assert_eq!(current, b"");
let current = skip_before_separator(b"en");
assert_eq!(current, b"en");
let current = skip_before_separator(b"en-");
assert_eq!(current, b"en");
let current = skip_before_separator(b"en--US");
assert_eq!(current, b"en");
let current = skip_before_separator(b"-US");
assert_eq!(current, b"");
let current = skip_before_separator(b"US");
assert_eq!(current, b"US");
let current = skip_before_separator(b"-");
assert_eq!(current, b"");
}
}