Struct icu::experimental::transliterate::Transliterator

source ·
pub struct Transliterator { /* private fields */ }
Expand description

A Transliterator allows transliteration based on UTS #35 transform rules, including overrides with custom implementations.

§Examples

A transliterator with a custom alias referenced by another:

use icu::experimental::transliterate::{Transliterator, CustomTransliterator, RuleCollection};
use icu::locale::Locale;

// Set up a transliterator with 3 custom rules.
// Note: These rules are for demonstration purposes only! Do not use.

// 1. Main entrypoint: a chain of several transliterators
let mut collection = RuleCollection::default();
collection.register_source(
    &"und-t-und-x0-custom".parse().unwrap(),
    "::NFD; ::FlattenLowerUmlaut; ::[:Nonspacing_Mark:] Remove; ::AsciiUpper; ::NFC;".to_string(),
    [],
    false,
    true,
);

// 2. A custom ruleset that expands lowercase umlauts
collection.register_source(
    &"und-t-und-x0-dep1".parse().unwrap(),
    r#"
      [ä {a \u0308}] → ae;
      [ö {o \u0308}] → oe;
      [ü {u \u0308}] → ue;
    "#.to_string(),
    ["Any-FlattenLowerUmlaut"],
    false,
    true,
);

// 3. A custom transliterator that uppercases all ASCII characters
#[derive(Debug)]
struct AsciiUpperTransliterator;
impl CustomTransliterator for AsciiUpperTransliterator {
    fn transliterate(&self, input: &str, range: std::ops::Range<usize>) -> String {
        input.to_ascii_uppercase()
    }
}
collection.register_aliases(
    &"und-t-und-x0-dep2".parse().unwrap(),
    ["Any-AsciiUpper"],
);

// Create a transliterator from the main entrypoint:
let provider = collection.as_provider();
let t = Transliterator::try_new_with_override_unstable(
    &provider,
    &provider,
    &"und-t-und-x0-custom".parse().unwrap(),
    |locale| locale.normalizing_eq("und-t-und-x0-dep2").then_some(Ok(Box::new(AsciiUpperTransliterator))),
)
.unwrap();

// Test the behavior:
// - The uppercase 'Ü' is stripped of its umlaut
// - The lowercase 'ä' is expanded to "ae"
// - All ASCII characters are uppercased: not 'ß', which is not ASCII
let r = t.transliterate("Übermäßig".to_string());
assert_eq!(r, "UBERMAEßIG");

Implementations§

source§

impl Transliterator

source

pub fn try_new(locale: &Locale) -> Result<Transliterator, DataError>

Construct a Transliterator from the given Locale.

§Examples
use icu::experimental::transliterate::Transliterator;
// BCP-47-T ID for Bengali to Arabic transliteration
let locale = "und-Arab-t-und-beng".parse().unwrap();
let t = Transliterator::try_new(&locale).unwrap();
let output = t.transliterate("অকার্যতানাযা".to_string());

assert_eq!(output, "اكاريتانايا");
source

pub fn try_new_with_any_provider( provider: &(impl AnyProvider + ?Sized), locale: &Locale, ) -> Result<Transliterator, DataError>

A version of Self::try_new that uses custom data provided by an AnyProvider.

📚 Help choosing a constructor

source

pub fn try_new_with_buffer_provider( provider: &(impl BufferProvider + ?Sized), locale: &Locale, ) -> Result<Transliterator, DataError>

A version of Self::try_new that uses custom data provided by a BufferProvider.

Enabled with the serde feature.

📚 Help choosing a constructor

source

pub fn try_new_unstable<PT, PN>( transliterator_provider: &PT, normalizer_provider: &PN, locale: &Locale, ) -> Result<Transliterator, DataError>

A version of Self::try_new that uses custom data provided by a DataProvider.

📚 Help choosing a constructor

⚠️ The bounds on provider may change over time, including in SemVer minor releases.
source

pub fn try_new_with_override<F>( locale: &Locale, lookup: F, ) -> Result<Transliterator, DataError>

Construct a Transliterator from the given Locale using overrides provided by lookup.

This allows clients to override the nested transliterators used by this transliterator. Any nested transliterator will first try to be loaded with lookup, and only fall back to the nested transliterator defined by the data if it returns None. See CustomTransliterator.

§Example

Overriding "de-t-de-d0-ascii"’s dependency on "und-t-und-Latn-d0-ascii":

use icu::experimental::transliterate::{Transliterator, CustomTransliterator};
use icu::locale::Locale;
use core::ops::Range;

#[derive(Debug)]
struct FunkyGermanToAscii;
impl CustomTransliterator for FunkyGermanToAscii {
    fn transliterate(&self, input: &str, allowed_range: Range<usize>) -> String {
        input[allowed_range].replace("oeverride", "overridden")
    }
}

let override_locale: Locale = "und-t-und-Latn-d0-ascii".parse().unwrap();
let locale = "de-t-de-d0-ascii".parse().unwrap();
let t = Transliterator::try_new_with_override(&locale, |locale| override_locale.eq(locale).then_some(Ok(Box::new(FunkyGermanToAscii)))).unwrap();
let output = t.transliterate("This is an överride example".to_string());

assert_eq!(output, "This is an overridden example");
source

pub fn try_new_with_override_with_any_provider<F>( provider: &(impl AnyProvider + ?Sized), locale: &Locale, lookup: F, ) -> Result<Transliterator, DataError>

A version of Self::try_new_with_override that uses custom data provided by an AnyProvider.

📚 Help choosing a constructor

source

pub fn try_new_with_override_with_buffer_provider<F>( provider: &(impl BufferProvider + ?Sized), locale: &Locale, lookup: F, ) -> Result<Transliterator, DataError>

A version of Self::try_new_with_override that uses custom data provided by a BufferProvider.

Enabled with the serde feature.

📚 Help choosing a constructor

source

pub fn try_new_with_override_unstable<PT, PN, F>( transliterator_provider: &PT, normalizer_provider: &PN, locale: &Locale, lookup: F, ) -> Result<Transliterator, DataError>

A version of Self::try_new_with_override that uses custom data provided by a DataProvider.

📚 Help choosing a constructor

⚠️ The bounds on provider may change over time, including in SemVer minor releases.
source

pub fn transliterate(&self, input: String) -> String

Transliterates input and returns its transliteration.

Trait Implementations§

source§

impl Debug for Transliterator

source§

fn fmt(&self, f: &mut Formatter<'_>) -> Result<(), Error>

Formats the value using the given formatter. Read more

Auto Trait Implementations§

Blanket Implementations§

source§

impl<T> Any for T
where T: 'static + ?Sized,

source§

fn type_id(&self) -> TypeId

Gets the TypeId of self. Read more
source§

impl<T> Borrow<T> for T
where T: ?Sized,

source§

fn borrow(&self) -> &T

Immutably borrows from an owned value. Read more
source§

impl<T> BorrowMut<T> for T
where T: ?Sized,

source§

fn borrow_mut(&mut self) -> &mut T

Mutably borrows from an owned value. Read more
source§

impl<T> From<T> for T

source§

fn from(t: T) -> T

Returns the argument unchanged.

source§

impl<T, U> Into<U> for T
where U: From<T>,

source§

fn into(self) -> U

Calls U::from(self).

That is, this conversion is whatever the implementation of From<T> for U chooses to do.

source§

impl<T> IntoEither for T

source§

fn into_either(self, into_left: bool) -> Either<Self, Self>

Converts self into a Left variant of Either<Self, Self> if into_left is true. Converts self into a Right variant of Either<Self, Self> otherwise. Read more
source§

fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
where F: FnOnce(&Self) -> bool,

Converts self into a Left variant of Either<Self, Self> if into_left(&self) returns true. Converts self into a Right variant of Either<Self, Self> otherwise. Read more
source§

impl<T, U> TryFrom<U> for T
where U: Into<T>,

source§

type Error = Infallible

The type returned in the event of a conversion error.
source§

fn try_from(value: U) -> Result<T, <T as TryFrom<U>>::Error>

Performs the conversion.
source§

impl<T, U> TryInto<U> for T
where U: TryFrom<T>,

source§

type Error = <U as TryFrom<T>>::Error

The type returned in the event of a conversion error.
source§

fn try_into(self) -> Result<U, <U as TryFrom<T>>::Error>

Performs the conversion.
source§

impl<T> ErasedDestructor for T
where T: 'static,