icu_casemap/
closer.rs

1// This file is part of ICU4X. For terms of use, please see the file
2// called LICENSE at the top level of the ICU4X source tree
3// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
4
5use crate::provider::{CaseMapUnfold, CaseMapUnfoldV1, CaseMapV1};
6use crate::set::ClosureSink;
7use crate::{CaseMapper, CaseMapperBorrowed};
8
9use icu_provider::prelude::*;
10
11/// A wrapper around [`CaseMapper`] that can produce case mapping closures
12/// over a character or string. This wrapper can be constructed directly, or
13/// by wrapping a reference to an existing [`CaseMapper`].
14///
15/// Most methods for this type live on [`CaseMapCloserBorrowed`], which you can obtain via
16/// [`CaseMapCloser::new()`] or [`CaseMapCloser::as_borrowed()`].
17///
18/// # Examples
19///
20/// ```rust
21/// use icu::casemap::CaseMapCloser;
22/// use icu::collections::codepointinvlist::CodePointInversionListBuilder;
23///
24/// let cm = CaseMapCloser::new();
25/// let mut builder = CodePointInversionListBuilder::new();
26/// let found = cm.add_string_case_closure_to("ffi", &mut builder);
27/// assert!(found);
28/// let set = builder.build();
29///
30/// assert!(set.contains('ffi'));
31///
32/// let mut builder = CodePointInversionListBuilder::new();
33/// let found = cm.add_string_case_closure_to("ss", &mut builder);
34/// assert!(found);
35/// let set = builder.build();
36///
37/// assert!(set.contains('ß'));
38/// assert!(set.contains('ẞ'));
39/// ```
40#[derive(Clone, Debug)]
41pub struct CaseMapCloser<CM> {
42    cm: CM,
43    unfold: DataPayload<CaseMapUnfoldV1>,
44}
45
46impl CaseMapCloser<CaseMapper> {
47    icu_provider::gen_buffer_data_constructors!(() -> error: DataError,
48    functions: [
49        new: skip,
50        try_new_with_buffer_provider,
51        try_new_unstable,
52        Self,
53    ]);
54
55    #[doc = icu_provider::gen_buffer_unstable_docs!(UNSTABLE, Self::new)]
56    pub fn try_new_unstable<P>(provider: &P) -> Result<Self, DataError>
57    where
58        P: DataProvider<CaseMapV1> + DataProvider<CaseMapUnfoldV1> + ?Sized,
59    {
60        let cm = CaseMapper::try_new_unstable(provider)?;
61        let unfold = provider.load(Default::default())?.payload;
62        Ok(Self { cm, unfold })
63    }
64}
65
66impl CaseMapCloser<CaseMapper> {
67    /// A constructor which creates a [`CaseMapCloserBorrowed`] using compiled data.
68    ///
69    /// # Examples
70    ///
71    /// ```rust
72    /// use icu::casemap::CaseMapCloser;
73    /// use icu::collections::codepointinvlist::CodePointInversionListBuilder;
74    ///
75    /// let cm = CaseMapCloser::new();
76    /// let mut builder = CodePointInversionListBuilder::new();
77    /// let found = cm.add_string_case_closure_to("ffi", &mut builder);
78    /// assert!(found);
79    /// let set = builder.build();
80    ///
81    /// assert!(set.contains('ffi'));
82    ///
83    /// let mut builder = CodePointInversionListBuilder::new();
84    /// let found = cm.add_string_case_closure_to("ss", &mut builder);
85    /// assert!(found);
86    /// let set = builder.build();
87    ///
88    /// assert!(set.contains('ß'));
89    /// assert!(set.contains('ẞ'));
90    /// ```
91    ///
92    /// ✨ *Enabled with the `compiled_data` Cargo feature.*
93    ///
94    /// [📚 Help choosing a constructor](icu_provider::constructors)
95    #[cfg(feature = "compiled_data")]
96    #[allow(clippy::new_ret_no_self)] // Intentional
97    pub const fn new() -> CaseMapCloserBorrowed<'static> {
98        CaseMapCloserBorrowed::new()
99    }
100}
101
102// We use Borrow, not AsRef, since we want the blanket impl on T
103impl<CM: AsRef<CaseMapper>> CaseMapCloser<CM> {
104    icu_provider::gen_buffer_data_constructors!((casemapper: CM) -> error: DataError,
105    functions: [
106        new_with_mapper: skip,
107        try_new_with_mapper_with_buffer_provider,
108        try_new_with_mapper_unstable,
109        Self,
110    ]);
111
112    /// A constructor which creates a [`CaseMapCloser`] from an existing [`CaseMapper`]
113    /// (either owned or as a reference)
114    ///
115    /// ✨ *Enabled with the `compiled_data` Cargo feature.*
116    ///
117    /// [📚 Help choosing a constructor](icu_provider::constructors)
118    #[cfg(feature = "compiled_data")]
119    pub const fn new_with_mapper(casemapper: CM) -> Self {
120        Self {
121            cm: casemapper,
122            unfold: DataPayload::from_static_ref(
123                crate::provider::Baked::SINGLETON_CASE_MAP_UNFOLD_V1,
124            ),
125        }
126    }
127
128    /// Construct this object to wrap an existing CaseMapper (or a reference to one), loading additional data as needed.
129    #[doc = icu_provider::gen_buffer_unstable_docs!(UNSTABLE, Self::new_with_mapper)]
130    pub fn try_new_with_mapper_unstable<P>(provider: &P, casemapper: CM) -> Result<Self, DataError>
131    where
132        P: DataProvider<CaseMapV1> + DataProvider<CaseMapUnfoldV1> + ?Sized,
133    {
134        let unfold = provider.load(Default::default())?.payload;
135        Ok(Self {
136            cm: casemapper,
137            unfold,
138        })
139    }
140
141    /// Constructs a borrowed version of this type for more efficient querying.
142    pub fn as_borrowed(&self) -> CaseMapCloserBorrowed<'_> {
143        CaseMapCloserBorrowed {
144            cm: self.cm.as_ref().as_borrowed(),
145            unfold: self.unfold.get(),
146        }
147    }
148}
149
150/// A borrowed [`CaseMapCloser`].
151///
152/// See methods or [`CaseMapCloser`] for examples.
153#[derive(Clone, Debug, Copy)]
154pub struct CaseMapCloserBorrowed<'a> {
155    cm: CaseMapperBorrowed<'a>,
156    unfold: &'a CaseMapUnfold<'a>,
157}
158
159impl CaseMapCloserBorrowed<'static> {
160    /// A constructor which creates a [`CaseMapCloserBorrowed`] using compiled data.
161    ///
162    /// # Examples
163    ///
164    /// ```rust
165    /// use icu::casemap::CaseMapCloser;
166    /// use icu::collections::codepointinvlist::CodePointInversionListBuilder;
167    ///
168    /// let cm = CaseMapCloser::new();
169    /// let mut builder = CodePointInversionListBuilder::new();
170    /// let found = cm.add_string_case_closure_to("ffi", &mut builder);
171    /// assert!(found);
172    /// let set = builder.build();
173    ///
174    /// assert!(set.contains('ffi'));
175    ///
176    /// let mut builder = CodePointInversionListBuilder::new();
177    /// let found = cm.add_string_case_closure_to("ss", &mut builder);
178    /// assert!(found);
179    /// let set = builder.build();
180    ///
181    /// assert!(set.contains('ß'));
182    /// assert!(set.contains('ẞ'));
183    /// ```
184    ///
185    /// ✨ *Enabled with the `compiled_data` Cargo feature.*
186    ///
187    /// [📚 Help choosing a constructor](icu_provider::constructors)
188    #[cfg(feature = "compiled_data")]
189    pub const fn new() -> CaseMapCloserBorrowed<'static> {
190        CaseMapCloserBorrowed {
191            cm: CaseMapper::new(),
192            unfold: crate::provider::Baked::SINGLETON_CASE_MAP_UNFOLD_V1,
193        }
194    }
195    /// Cheaply converts a [`CaseMapCloserBorrowed<'static>`] into a [`CaseMapCloser`].
196    ///
197    /// Note: Due to branching and indirection, using [`CaseMapCloser`] might inhibit some
198    /// compile-time optimizations that are possible with [`CaseMapCloserBorrowed`].
199    pub const fn static_to_owned(self) -> CaseMapCloser<CaseMapper> {
200        CaseMapCloser {
201            cm: self.cm.static_to_owned(),
202            unfold: DataPayload::from_static_ref(self.unfold),
203        }
204    }
205}
206
207#[cfg(feature = "compiled_data")]
208impl Default for CaseMapCloserBorrowed<'static> {
209    fn default() -> Self {
210        Self::new()
211    }
212}
213
214impl CaseMapCloserBorrowed<'_> {
215    /// Adds all simple case mappings and the full case folding for `c` to `set`.
216    /// Also adds special case closure mappings.
217    ///
218    /// In other words, this adds all strings/characters that this casemaps to, as
219    /// well as all characters that may casemap to this one.
220    ///
221    /// The character itself is not added.
222    ///
223    /// For example, the mappings
224    /// - for s include long s
225    /// - for sharp s include ss
226    /// - for k include the Kelvin sign
227    ///
228    /// This function is identical to [`CaseMapperBorrowed::add_case_closure_to()`]; if you don't
229    /// need [`Self::add_string_case_closure_to()`] consider using a [`CaseMapper`] to avoid
230    /// loading additional data.
231    ///
232    /// # Examples
233    ///
234    /// ```rust
235    /// use icu::casemap::CaseMapCloser;
236    /// use icu::collections::codepointinvlist::CodePointInversionListBuilder;
237    ///
238    /// let cm = CaseMapCloser::new();
239    /// let mut builder = CodePointInversionListBuilder::new();
240    /// cm.add_case_closure_to('s', &mut builder);
241    ///
242    /// let set = builder.build();
243    ///
244    /// assert!(set.contains('S'));
245    /// assert!(set.contains('ſ'));
246    /// assert!(!set.contains('s')); // does not contain itself
247    /// ```
248    pub fn add_case_closure_to<S: ClosureSink>(self, c: char, set: &mut S) {
249        self.cm.add_case_closure_to(c, set);
250    }
251
252    /// Finds all characters and strings which may casemap to `s` as their full case folding string
253    /// and adds them to the set. Includes the full case closure of each character mapping.
254    ///
255    /// In other words, this performs a reverse full case folding and then
256    /// adds the case closure items of the resulting code points.
257    ///
258    /// The string itself is not added to the set.
259    ///
260    /// Returns true if the string was found
261    ///
262    /// # Examples
263    ///
264    /// ```rust
265    /// use icu::casemap::CaseMapCloser;
266    /// use icu::collections::codepointinvlist::CodePointInversionListBuilder;
267    ///
268    /// let cm = CaseMapCloser::new();
269    /// let mut builder = CodePointInversionListBuilder::new();
270    /// let found = cm.add_string_case_closure_to("ffi", &mut builder);
271    /// assert!(found);
272    /// let set = builder.build();
273    ///
274    /// assert!(set.contains('ffi'));
275    ///
276    /// let mut builder = CodePointInversionListBuilder::new();
277    /// let found = cm.add_string_case_closure_to("ss", &mut builder);
278    /// assert!(found);
279    /// let set = builder.build();
280    ///
281    /// assert!(set.contains('ß'));
282    /// assert!(set.contains('ẞ'));
283    /// ```
284    pub fn add_string_case_closure_to<S: ClosureSink>(self, s: &str, set: &mut S) -> bool {
285        self.cm.data.add_string_case_closure_to(s, set, self.unfold)
286    }
287}