1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
// This file is part of ICU4X. For terms of use, please see the file
// called LICENSE at the top level of the ICU4X source tree
// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).

#[cfg(feature = "alloc")]
use alloc::boxed::Box;
use core::fmt;
use core::ops::Deref;

/// A byte slice that is expected to be a UTF-8 string but does not enforce that invariant.
///
/// Use this type instead of `str` if you don't need to enforce UTF-8 during deserialization. For
/// example, strings that are keys of a map don't need to ever be reified as `str`s.
///
/// [`PotentialUtf8`] derefs to `[u8]`. To obtain a `str`, use [`Self::try_as_str()`].
///
/// The main advantage of this type over `[u8]` is that it serializes as a string in
/// human-readable formats like JSON.
///
/// # Examples
///
/// Using an [`PotentialUtf8`] as the key of a [`ZeroMap`]:
///
/// ```
/// use potential_utf::PotentialUtf8;
/// use zerovec::ZeroMap;
///
/// // This map is cheap to deserialize, as we don't need to perform UTF-8 validation.
/// let map: ZeroMap<PotentialUtf8, u8> = [
///     (PotentialUtf8::from_bytes(b"abc"), 11),
///     (PotentialUtf8::from_bytes(b"def"), 22),
///     (PotentialUtf8::from_bytes(b"ghi"), 33),
/// ]
/// .into_iter()
/// .collect();
///
/// let key = "abc";
/// let value = map.get_copied(PotentialUtf8::from_str(key));
/// assert_eq!(Some(11), value);
/// ```
///
/// [`ZeroMap`]: zerovec::ZeroMap
#[repr(transparent)]
#[derive(PartialEq, Eq, PartialOrd, Ord)]
#[allow(clippy::exhaustive_structs)] // transparent newtype
pub struct PotentialUtf8(pub [u8]);

impl fmt::Debug for PotentialUtf8 {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        // Debug as a string if possible
        match self.try_as_str() {
            Ok(s) => fmt::Debug::fmt(s, f),
            Err(_) => fmt::Debug::fmt(&self.0, f),
        }
    }
}

impl PotentialUtf8 {
    /// Create a [`PotentialUtf8`] from a byte slice.
    #[inline]
    pub const fn from_bytes(other: &[u8]) -> &Self {
        // Safety: PotentialUtf8 is transparent over [u8]
        unsafe { core::mem::transmute(other) }
    }

    /// Create a [`PotentialUtf8`] from a string slice.
    #[inline]
    pub const fn from_str(s: &str) -> &Self {
        Self::from_bytes(s.as_bytes())
    }

    /// Create a [`PotentialUtf8`] from boxed bytes.
    #[inline]
    #[cfg(feature = "alloc")]
    pub fn from_boxed_bytes(other: Box<[u8]>) -> Box<Self> {
        // Safety: PotentialUtf8 is transparent over [u8]
        unsafe { core::mem::transmute(other) }
    }

    /// Create a [`PotentialUtf8`] from a boxed `str`.
    #[inline]
    #[cfg(feature = "alloc")]
    pub fn from_boxed_str(other: Box<str>) -> Box<Self> {
        Self::from_boxed_bytes(other.into_boxed_bytes())
    }

    /// Get the bytes from a [`PotentialUtf8].
    #[inline]
    pub const fn as_bytes(&self) -> &[u8] {
        &self.0
    }

    /// Attempt to convert a [`PotentialUtf8`] to a `str`.
    ///
    /// # Examples
    ///
    /// ```
    /// use potential_utf::PotentialUtf8;
    ///
    /// static A: &PotentialUtf8 = PotentialUtf8::from_bytes(b"abc");
    ///
    /// let b = A.try_as_str().unwrap();
    /// assert_eq!(b, "abc");
    /// ```
    // Note: this is const starting in 1.63
    #[inline]
    pub fn try_as_str(&self) -> Result<&str, core::str::Utf8Error> {
        core::str::from_utf8(&self.0)
    }
}

impl<'a> From<&'a str> for &'a PotentialUtf8 {
    #[inline]
    fn from(other: &'a str) -> Self {
        PotentialUtf8::from_str(other)
    }
}

#[cfg(feature = "alloc")]
impl From<Box<str>> for Box<PotentialUtf8> {
    #[inline]
    fn from(other: Box<str>) -> Self {
        PotentialUtf8::from_boxed_str(other)
    }
}

impl Deref for PotentialUtf8 {
    type Target = [u8];
    fn deref(&self) -> &Self::Target {
        &self.0
    }
}

/// This impl requires enabling the optional `zerovec` Cargo feature
#[cfg(feature = "zerovec")]
impl<'a> zerovec::maps::ZeroMapKV<'a> for PotentialUtf8 {
    type Container = zerovec::VarZeroVec<'a, PotentialUtf8>;
    type Slice = zerovec::VarZeroSlice<PotentialUtf8>;
    type GetType = PotentialUtf8;
    type OwnedType = Box<PotentialUtf8>;
}

// Safety (based on the safety checklist on the VarULE trait):
//  1. PotentialUtf8 does not include any uninitialized or padding bytes (transparent over a ULE)
//  2. PotentialUtf8 is aligned to 1 byte (transparent over a ULE)
//  3. The impl of `validate_bytes()` returns an error if any byte is not valid (impossible)
//  4. The impl of `validate_bytes()` returns an error if the slice cannot be used in its entirety (impossible)
//  5. The impl of `from_bytes_unchecked()` returns a reference to the same data (returns the argument directly)
//  6. All other methods are defaulted
//  7. `[T]` byte equality is semantic equality (transparent over a ULE)
/// This impl requires enabling the optional `zerovec` Cargo feature
#[cfg(feature = "zerovec")]
unsafe impl zerovec::ule::VarULE for PotentialUtf8 {
    #[inline]
    fn validate_bytes(_: &[u8]) -> Result<(), zerovec::ule::UleError> {
        Ok(())
    }
    #[inline]
    unsafe fn from_bytes_unchecked(bytes: &[u8]) -> &Self {
        PotentialUtf8::from_bytes(bytes)
    }
}

/// This impl requires enabling the optional `serde` Cargo feature
#[cfg(feature = "serde")]
impl serde::Serialize for PotentialUtf8 {
    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
    where
        S: serde::Serializer,
    {
        use serde::ser::Error;
        let s = self
            .try_as_str()
            .map_err(|_| S::Error::custom("invalid UTF-8 in PotentialUtf8"))?;
        if serializer.is_human_readable() {
            serializer.serialize_str(s)
        } else {
            serializer.serialize_bytes(s.as_bytes())
        }
    }
}

/// This impl requires enabling the optional `serde` Cargo feature
#[cfg(all(feature = "serde", feature = "alloc"))]
impl<'de> serde::Deserialize<'de> for Box<PotentialUtf8> {
    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
    where
        D: serde::Deserializer<'de>,
    {
        if deserializer.is_human_readable() {
            let boxed_str = Box::<str>::deserialize(deserializer)?;
            Ok(PotentialUtf8::from_boxed_str(boxed_str))
        } else {
            let boxed_bytes = Box::<[u8]>::deserialize(deserializer)?;
            Ok(PotentialUtf8::from_boxed_bytes(boxed_bytes))
        }
    }
}

/// This impl requires enabling the optional `serde` Cargo feature
#[cfg(feature = "serde")]
impl<'de, 'a> serde::Deserialize<'de> for &'a PotentialUtf8
where
    'de: 'a,
{
    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
    where
        D: serde::Deserializer<'de>,
    {
        if deserializer.is_human_readable() {
            let s = <&str>::deserialize(deserializer)?;
            Ok(PotentialUtf8::from_str(s))
        } else {
            let bytes = <&[u8]>::deserialize(deserializer)?;
            Ok(PotentialUtf8::from_bytes(bytes))
        }
    }
}

#[repr(transparent)]
#[derive(PartialEq, Eq, PartialOrd, Ord)]
#[allow(clippy::exhaustive_structs)] // transparent newtype
pub struct PotentialUtf16(pub [u16]);

impl fmt::Debug for PotentialUtf16 {
    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
        // Debug as a string if possible
        for c in char::decode_utf16(self.0.iter().copied()) {
            match c {
                Ok(c) => write!(f, "{c}")?,
                Err(e) => write!(f, "\\0x{:x}", e.unpaired_surrogate())?,
            }
        }
        Ok(())
    }
}

impl PotentialUtf16 {
    /// Create a [`PotentialUtf16`] from a u16 slice.
    #[inline]
    pub const fn from_slice(other: &[u16]) -> &Self {
        // Safety: PotentialUtf16 is transparent over [u16]
        unsafe { core::mem::transmute(other) }
    }
}