icu_casemap/closer.rs
1// This file is part of ICU4X. For terms of use, please see the file
2// called LICENSE at the top level of the ICU4X source tree
3// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
4
5use crate::provider::{CaseMapUnfold, CaseMapUnfoldV1, CaseMapV1};
6use crate::set::ClosureSink;
7use crate::{CaseMapper, CaseMapperBorrowed};
8
9use icu_provider::prelude::*;
10
11/// A wrapper around [`CaseMapper`] that can produce case mapping closures
12/// over a character or string. This wrapper can be constructed directly, or
13/// by wrapping a reference to an existing [`CaseMapper`].
14///
15/// Most methods for this type live on [`CaseMapCloserBorrowed`], which you can obtain via
16/// [`CaseMapCloser::new()`] or [`CaseMapCloser::as_borrowed()`].
17///
18/// # Examples
19///
20/// ```rust
21/// use icu::casemap::CaseMapCloser;
22/// use icu::collections::codepointinvlist::CodePointInversionListBuilder;
23///
24/// let cm = CaseMapCloser::new();
25/// let mut builder = CodePointInversionListBuilder::new();
26/// let found = cm.add_string_case_closure_to("ffi", &mut builder);
27/// assert!(found);
28/// let set = builder.build();
29///
30/// assert!(set.contains('ffi'));
31///
32/// let mut builder = CodePointInversionListBuilder::new();
33/// let found = cm.add_string_case_closure_to("ss", &mut builder);
34/// assert!(found);
35/// let set = builder.build();
36///
37/// assert!(set.contains('ß'));
38/// assert!(set.contains('ẞ'));
39/// ```
40#[derive(Clone, Debug)]
41pub struct CaseMapCloser<CM> {
42 cm: CM,
43 unfold: DataPayload<CaseMapUnfoldV1>,
44}
45
46impl CaseMapCloser<CaseMapper> {
47 icu_provider::gen_buffer_data_constructors!(() -> error: DataError,
48 functions: [
49 new: skip,
50 try_new_with_buffer_provider,
51 try_new_unstable,
52 Self,
53 ]);
54
55 #[doc = icu_provider::gen_buffer_unstable_docs!(UNSTABLE, Self::new)]
56 pub fn try_new_unstable<P>(provider: &P) -> Result<Self, DataError>
57 where
58 P: DataProvider<CaseMapV1> + DataProvider<CaseMapUnfoldV1> + ?Sized,
59 {
60 let cm = CaseMapper::try_new_unstable(provider)?;
61 let unfold = provider.load(Default::default())?.payload;
62 Ok(Self { cm, unfold })
63 }
64}
65
66impl CaseMapCloser<CaseMapper> {
67 /// A constructor which creates a [`CaseMapCloserBorrowed`] using compiled data.
68 ///
69 /// # Examples
70 ///
71 /// ```rust
72 /// use icu::casemap::CaseMapCloser;
73 /// use icu::collections::codepointinvlist::CodePointInversionListBuilder;
74 ///
75 /// let cm = CaseMapCloser::new();
76 /// let mut builder = CodePointInversionListBuilder::new();
77 /// let found = cm.add_string_case_closure_to("ffi", &mut builder);
78 /// assert!(found);
79 /// let set = builder.build();
80 ///
81 /// assert!(set.contains('ffi'));
82 ///
83 /// let mut builder = CodePointInversionListBuilder::new();
84 /// let found = cm.add_string_case_closure_to("ss", &mut builder);
85 /// assert!(found);
86 /// let set = builder.build();
87 ///
88 /// assert!(set.contains('ß'));
89 /// assert!(set.contains('ẞ'));
90 /// ```
91 ///
92 /// ✨ *Enabled with the `compiled_data` Cargo feature.*
93 ///
94 /// [📚 Help choosing a constructor](icu_provider::constructors)
95 #[cfg(feature = "compiled_data")]
96 #[allow(clippy::new_ret_no_self)] // Intentional
97 pub const fn new() -> CaseMapCloserBorrowed<'static> {
98 CaseMapCloserBorrowed::new()
99 }
100}
101
102// We use Borrow, not AsRef, since we want the blanket impl on T
103impl<CM: AsRef<CaseMapper>> CaseMapCloser<CM> {
104 icu_provider::gen_buffer_data_constructors!((casemapper: CM) -> error: DataError,
105 functions: [
106 new_with_mapper: skip,
107 try_new_with_mapper_with_buffer_provider,
108 try_new_with_mapper_unstable,
109 Self,
110 ]);
111
112 /// A constructor which creates a [`CaseMapCloser`] from an existing [`CaseMapper`]
113 /// (either owned or as a reference)
114 ///
115 /// ✨ *Enabled with the `compiled_data` Cargo feature.*
116 ///
117 /// [📚 Help choosing a constructor](icu_provider::constructors)
118 #[cfg(feature = "compiled_data")]
119 pub const fn new_with_mapper(casemapper: CM) -> Self {
120 Self {
121 cm: casemapper,
122 unfold: DataPayload::from_static_ref(
123 crate::provider::Baked::SINGLETON_CASE_MAP_UNFOLD_V1,
124 ),
125 }
126 }
127
128 /// Construct this object to wrap an existing CaseMapper (or a reference to one), loading additional data as needed.
129 #[doc = icu_provider::gen_buffer_unstable_docs!(UNSTABLE, Self::new_with_mapper)]
130 pub fn try_new_with_mapper_unstable<P>(provider: &P, casemapper: CM) -> Result<Self, DataError>
131 where
132 P: DataProvider<CaseMapV1> + DataProvider<CaseMapUnfoldV1> + ?Sized,
133 {
134 let unfold = provider.load(Default::default())?.payload;
135 Ok(Self {
136 cm: casemapper,
137 unfold,
138 })
139 }
140
141 /// Constructs a borrowed version of this type for more efficient querying.
142 pub fn as_borrowed(&self) -> CaseMapCloserBorrowed<'_> {
143 CaseMapCloserBorrowed {
144 cm: self.cm.as_ref().as_borrowed(),
145 unfold: self.unfold.get(),
146 }
147 }
148}
149
150/// A borrowed [`CaseMapCloser`].
151///
152/// See methods or [`CaseMapCloser`] for examples.
153#[derive(Clone, Debug, Copy)]
154pub struct CaseMapCloserBorrowed<'a> {
155 cm: CaseMapperBorrowed<'a>,
156 unfold: &'a CaseMapUnfold<'a>,
157}
158
159impl CaseMapCloserBorrowed<'static> {
160 /// A constructor which creates a [`CaseMapCloserBorrowed`] using compiled data.
161 ///
162 /// # Examples
163 ///
164 /// ```rust
165 /// use icu::casemap::CaseMapCloser;
166 /// use icu::collections::codepointinvlist::CodePointInversionListBuilder;
167 ///
168 /// let cm = CaseMapCloser::new();
169 /// let mut builder = CodePointInversionListBuilder::new();
170 /// let found = cm.add_string_case_closure_to("ffi", &mut builder);
171 /// assert!(found);
172 /// let set = builder.build();
173 ///
174 /// assert!(set.contains('ffi'));
175 ///
176 /// let mut builder = CodePointInversionListBuilder::new();
177 /// let found = cm.add_string_case_closure_to("ss", &mut builder);
178 /// assert!(found);
179 /// let set = builder.build();
180 ///
181 /// assert!(set.contains('ß'));
182 /// assert!(set.contains('ẞ'));
183 /// ```
184 ///
185 /// ✨ *Enabled with the `compiled_data` Cargo feature.*
186 ///
187 /// [📚 Help choosing a constructor](icu_provider::constructors)
188 #[cfg(feature = "compiled_data")]
189 pub const fn new() -> CaseMapCloserBorrowed<'static> {
190 CaseMapCloserBorrowed {
191 cm: CaseMapper::new(),
192 unfold: crate::provider::Baked::SINGLETON_CASE_MAP_UNFOLD_V1,
193 }
194 }
195 /// Cheaply converts a [`CaseMapCloserBorrowed<'static>`] into a [`CaseMapCloser`].
196 ///
197 /// Note: Due to branching and indirection, using [`CaseMapCloser`] might inhibit some
198 /// compile-time optimizations that are possible with [`CaseMapCloserBorrowed`].
199 pub const fn static_to_owned(self) -> CaseMapCloser<CaseMapper> {
200 CaseMapCloser {
201 cm: self.cm.static_to_owned(),
202 unfold: DataPayload::from_static_ref(self.unfold),
203 }
204 }
205}
206
207#[cfg(feature = "compiled_data")]
208impl Default for CaseMapCloserBorrowed<'static> {
209 fn default() -> Self {
210 Self::new()
211 }
212}
213
214impl CaseMapCloserBorrowed<'_> {
215 /// Adds all simple case mappings and the full case folding for `c` to `set`.
216 /// Also adds special case closure mappings.
217 ///
218 /// In other words, this adds all strings/characters that this casemaps to, as
219 /// well as all characters that may casemap to this one.
220 ///
221 /// The character itself is not added.
222 ///
223 /// For example, the mappings
224 /// - for s include long s
225 /// - for sharp s include ss
226 /// - for k include the Kelvin sign
227 ///
228 /// This function is identical to [`CaseMapperBorrowed::add_case_closure_to()`]; if you don't
229 /// need [`Self::add_string_case_closure_to()`] consider using a [`CaseMapper`] to avoid
230 /// loading additional data.
231 ///
232 /// # Examples
233 ///
234 /// ```rust
235 /// use icu::casemap::CaseMapCloser;
236 /// use icu::collections::codepointinvlist::CodePointInversionListBuilder;
237 ///
238 /// let cm = CaseMapCloser::new();
239 /// let mut builder = CodePointInversionListBuilder::new();
240 /// cm.add_case_closure_to('s', &mut builder);
241 ///
242 /// let set = builder.build();
243 ///
244 /// assert!(set.contains('S'));
245 /// assert!(set.contains('ſ'));
246 /// assert!(!set.contains('s')); // does not contain itself
247 /// ```
248 pub fn add_case_closure_to<S: ClosureSink>(self, c: char, set: &mut S) {
249 self.cm.add_case_closure_to(c, set);
250 }
251
252 /// Finds all characters and strings which may casemap to `s` as their full case folding string
253 /// and adds them to the set. Includes the full case closure of each character mapping.
254 ///
255 /// In other words, this performs a reverse full case folding and then
256 /// adds the case closure items of the resulting code points.
257 ///
258 /// The string itself is not added to the set.
259 ///
260 /// Returns true if the string was found
261 ///
262 /// # Examples
263 ///
264 /// ```rust
265 /// use icu::casemap::CaseMapCloser;
266 /// use icu::collections::codepointinvlist::CodePointInversionListBuilder;
267 ///
268 /// let cm = CaseMapCloser::new();
269 /// let mut builder = CodePointInversionListBuilder::new();
270 /// let found = cm.add_string_case_closure_to("ffi", &mut builder);
271 /// assert!(found);
272 /// let set = builder.build();
273 ///
274 /// assert!(set.contains('ffi'));
275 ///
276 /// let mut builder = CodePointInversionListBuilder::new();
277 /// let found = cm.add_string_case_closure_to("ss", &mut builder);
278 /// assert!(found);
279 /// let set = builder.build();
280 ///
281 /// assert!(set.contains('ß'));
282 /// assert!(set.contains('ẞ'));
283 /// ```
284 pub fn add_string_case_closure_to<S: ClosureSink>(self, s: &str, set: &mut S) -> bool {
285 self.cm.data.add_string_case_closure_to(s, set, self.unfold)
286 }
287}