Struct icu::experimental::transliterate::Transliterator
source · pub struct Transliterator { /* private fields */ }
Expand description
A Transliterator
allows transliteration based on UTS #35 transform rules,
including overrides with custom implementations.
§Examples
A transliterator with a custom alias referenced by another:
use icu::experimental::transliterate::{Transliterator, CustomTransliterator, RuleCollection};
use icu::locale::Locale;
// Set up a transliterator with 3 custom rules.
// Note: These rules are for demonstration purposes only! Do not use.
// 1. Main entrypoint: a chain of several transliterators
let mut collection = RuleCollection::default();
collection.register_source(
&"und-t-und-x0-custom".parse().unwrap(),
"::NFD; ::FlattenLowerUmlaut; ::[:Nonspacing_Mark:] Remove; ::AsciiUpper; ::NFC;".to_string(),
[],
false,
true,
);
// 2. A custom ruleset that expands lowercase umlauts
collection.register_source(
&"und-t-und-x0-dep1".parse().unwrap(),
r#"
[ä {a \u0308}] → ae;
[ö {o \u0308}] → oe;
[ü {u \u0308}] → ue;
"#.to_string(),
["Any-FlattenLowerUmlaut"],
false,
true,
);
// 3. A custom transliterator that uppercases all ASCII characters
#[derive(Debug)]
struct AsciiUpperTransliterator;
impl CustomTransliterator for AsciiUpperTransliterator {
fn transliterate(&self, input: &str, range: std::ops::Range<usize>) -> String {
input.to_ascii_uppercase()
}
}
collection.register_aliases(
&"und-t-und-x0-dep2".parse().unwrap(),
["Any-AsciiUpper"],
);
// Create a transliterator from the main entrypoint:
let provider = collection.as_provider();
let t = Transliterator::try_new_with_override_unstable(
&provider,
&provider,
&"und-t-und-x0-custom".parse().unwrap(),
|locale| locale.normalizing_eq("und-t-und-x0-dep2").then_some(Ok(Box::new(AsciiUpperTransliterator))),
)
.unwrap();
// Test the behavior:
// - The uppercase 'Ü' is stripped of its umlaut
// - The lowercase 'ä' is expanded to "ae"
// - All ASCII characters are uppercased: not 'ß', which is not ASCII
let r = t.transliterate("Übermäßig".to_string());
assert_eq!(r, "UBERMAEßIG");
Implementations§
source§impl Transliterator
impl Transliterator
sourcepub fn try_new(locale: &Locale) -> Result<Transliterator, DataError>
pub fn try_new(locale: &Locale) -> Result<Transliterator, DataError>
Construct a Transliterator
from the given Locale
.
§Examples
use icu::experimental::transliterate::Transliterator;
// BCP-47-T ID for Bengali to Arabic transliteration
let locale = "und-Arab-t-und-beng".parse().unwrap();
let t = Transliterator::try_new(&locale).unwrap();
let output = t.transliterate("অকার্যতানাযা".to_string());
assert_eq!(output, "اكاريتانايا");
sourcepub fn try_new_with_any_provider(
provider: &(impl AnyProvider + ?Sized),
locale: &Locale,
) -> Result<Transliterator, DataError>
pub fn try_new_with_any_provider( provider: &(impl AnyProvider + ?Sized), locale: &Locale, ) -> Result<Transliterator, DataError>
A version of Self::try_new
that uses custom data provided by an AnyProvider
.
sourcepub fn try_new_with_buffer_provider(
provider: &(impl BufferProvider + ?Sized),
locale: &Locale,
) -> Result<Transliterator, DataError>
pub fn try_new_with_buffer_provider( provider: &(impl BufferProvider + ?Sized), locale: &Locale, ) -> Result<Transliterator, DataError>
A version of Self::try_new
that uses custom data provided by a BufferProvider
.
✨ Enabled with the serde
feature.
sourcepub fn try_new_unstable<PT, PN>(
transliterator_provider: &PT,
normalizer_provider: &PN,
locale: &Locale,
) -> Result<Transliterator, DataError>where
PT: DataProvider<TransliteratorRulesV1Marker> + ?Sized,
PN: DataProvider<CanonicalDecompositionDataV2Marker> + DataProvider<CompatibilityDecompositionDataV2Marker> + DataProvider<CanonicalDecompositionTablesV1Marker> + DataProvider<CompatibilityDecompositionTablesV1Marker> + DataProvider<CanonicalCompositionsV1Marker> + ?Sized,
pub fn try_new_unstable<PT, PN>(
transliterator_provider: &PT,
normalizer_provider: &PN,
locale: &Locale,
) -> Result<Transliterator, DataError>where
PT: DataProvider<TransliteratorRulesV1Marker> + ?Sized,
PN: DataProvider<CanonicalDecompositionDataV2Marker> + DataProvider<CompatibilityDecompositionDataV2Marker> + DataProvider<CanonicalDecompositionTablesV1Marker> + DataProvider<CompatibilityDecompositionTablesV1Marker> + DataProvider<CanonicalCompositionsV1Marker> + ?Sized,
A version of Self::try_new
that uses custom data provided by a DataProvider
.
sourcepub fn try_new_with_override<F>(
locale: &Locale,
lookup: F,
) -> Result<Transliterator, DataError>
pub fn try_new_with_override<F>( locale: &Locale, lookup: F, ) -> Result<Transliterator, DataError>
Construct a Transliterator
from the given Locale
using overrides provided
by lookup
.
This allows clients to override the nested transliterators used by this transliterator.
Any nested transliterator will first try to be loaded with lookup
, and only fall back
to the nested transliterator defined by the data if it returns None
.
See CustomTransliterator
.
§Example
Overriding "de-t-de-d0-ascii"
’s dependency on "und-t-und-Latn-d0-ascii"
:
use core::ops::Range;
use icu::experimental::transliterate::{
CustomTransliterator, Transliterator,
};
use icu::locale::Locale;
#[derive(Debug)]
struct FunkyGermanToAscii;
impl CustomTransliterator for FunkyGermanToAscii {
fn transliterate(
&self,
input: &str,
allowed_range: Range<usize>,
) -> String {
input[allowed_range].replace("oeverride", "overridden")
}
}
let override_locale: Locale = "und-t-und-Latn-d0-ascii".parse().unwrap();
let locale = "de-t-de-d0-ascii".parse().unwrap();
let t = Transliterator::try_new_with_override(&locale, |locale| {
override_locale
.eq(locale)
.then_some(Ok(Box::new(FunkyGermanToAscii)))
})
.unwrap();
let output = t.transliterate("This is an överride example".to_string());
assert_eq!(output, "This is an overridden example");
sourcepub fn try_new_with_override_with_any_provider<F>(
provider: &(impl AnyProvider + ?Sized),
locale: &Locale,
lookup: F,
) -> Result<Transliterator, DataError>
pub fn try_new_with_override_with_any_provider<F>( provider: &(impl AnyProvider + ?Sized), locale: &Locale, lookup: F, ) -> Result<Transliterator, DataError>
A version of Self::try_new_with_override
that uses custom data provided by an AnyProvider
.
sourcepub fn try_new_with_override_with_buffer_provider<F>(
provider: &(impl BufferProvider + ?Sized),
locale: &Locale,
lookup: F,
) -> Result<Transliterator, DataError>
pub fn try_new_with_override_with_buffer_provider<F>( provider: &(impl BufferProvider + ?Sized), locale: &Locale, lookup: F, ) -> Result<Transliterator, DataError>
A version of Self::try_new_with_override
that uses custom data provided by a BufferProvider
.
✨ Enabled with the serde
feature.
sourcepub fn try_new_with_override_unstable<PT, PN, F>(
transliterator_provider: &PT,
normalizer_provider: &PN,
locale: &Locale,
lookup: F,
) -> Result<Transliterator, DataError>where
PT: DataProvider<TransliteratorRulesV1Marker> + ?Sized,
PN: DataProvider<CanonicalDecompositionDataV2Marker> + DataProvider<CompatibilityDecompositionDataV2Marker> + DataProvider<CanonicalDecompositionTablesV1Marker> + DataProvider<CompatibilityDecompositionTablesV1Marker> + DataProvider<CanonicalCompositionsV1Marker> + ?Sized,
F: Fn(&Locale) -> Option<Result<Box<dyn CustomTransliterator>, DataError>>,
pub fn try_new_with_override_unstable<PT, PN, F>(
transliterator_provider: &PT,
normalizer_provider: &PN,
locale: &Locale,
lookup: F,
) -> Result<Transliterator, DataError>where
PT: DataProvider<TransliteratorRulesV1Marker> + ?Sized,
PN: DataProvider<CanonicalDecompositionDataV2Marker> + DataProvider<CompatibilityDecompositionDataV2Marker> + DataProvider<CanonicalDecompositionTablesV1Marker> + DataProvider<CompatibilityDecompositionTablesV1Marker> + DataProvider<CanonicalCompositionsV1Marker> + ?Sized,
F: Fn(&Locale) -> Option<Result<Box<dyn CustomTransliterator>, DataError>>,
A version of Self::try_new_with_override
that uses custom data provided by a DataProvider
.
sourcepub fn transliterate(&self, input: String) -> String
pub fn transliterate(&self, input: String) -> String
Transliterates input
and returns its transliteration.
Trait Implementations§
Auto Trait Implementations§
impl Freeze for Transliterator
impl !RefUnwindSafe for Transliterator
impl !Send for Transliterator
impl !Sync for Transliterator
impl Unpin for Transliterator
impl !UnwindSafe for Transliterator
Blanket Implementations§
source§impl<T> BorrowMut<T> for Twhere
T: ?Sized,
impl<T> BorrowMut<T> for Twhere
T: ?Sized,
source§fn borrow_mut(&mut self) -> &mut T
fn borrow_mut(&mut self) -> &mut T
source§impl<T> IntoEither for T
impl<T> IntoEither for T
source§fn into_either(self, into_left: bool) -> Either<Self, Self>
fn into_either(self, into_left: bool) -> Either<Self, Self>
self
into a Left
variant of Either<Self, Self>
if into_left
is true
.
Converts self
into a Right
variant of Either<Self, Self>
otherwise. Read moresource§fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
fn into_either_with<F>(self, into_left: F) -> Either<Self, Self>
self
into a Left
variant of Either<Self, Self>
if into_left(&self)
returns true
.
Converts self
into a Right
variant of Either<Self, Self>
otherwise. Read more