1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210
// This file is part of ICU4X. For terms of use, please see the file
// called LICENSE at the top level of the ICU4X source tree
// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
use crate::Writeable;
use alloc::borrow::Cow;
use alloc::string::String;
use core::fmt;
/// Bytes that have been partially validated as UTF-8 up to an offset.
struct PartiallyValidatedUtf8<'a> {
// Safety Invariants:
// 1. The offset is less than or equal to the length of the slice.
// 2. The slice is valid UTF-8 up to the offset.
slice: &'a [u8],
offset: usize,
}
impl<'a> PartiallyValidatedUtf8<'a> {
fn new(slice: &'a [u8]) -> Self {
// Safety: Field invariants maintained here trivially:
// 1. The offset 0 is ≤ all possible lengths of slice
// 2. The slice contains nothing up to the offset zero
Self { slice, offset: 0 }
}
/// Check whether the given string is the next chunk of unvalidated bytes.
/// If so, increment offset and return true. Otherwise, return false.
fn try_push(&mut self, valid_str: &str) -> bool {
let new_offset = self.offset + valid_str.len();
if self.slice.get(self.offset..new_offset) == Some(valid_str.as_bytes()) {
// Safety: Field invariants maintained here:
// 1. In the line above, `self.slice.get()` returned `Some()` for `new_offset` at
// the end of a `Range`, so `new_offset` is ≤ the length of `self.slice`.
// 2. By invariant, we have already validated the string up to `self.offset`, and
// the portion of the slice between `self.offset` and `new_offset` is equal to
// `valid_str`, which is a `&str`, so the string is valid up to `new_offset`.
self.offset = new_offset;
true
} else {
false
}
}
/// Return the validated portion as `&str`.
fn validated_as_str(&self) -> &'a str {
debug_assert!(self.offset <= self.slice.len());
// Safety: self.offset is a valid end index in a range (from field invariant)
let valid_slice = unsafe { self.slice.get_unchecked(..self.offset) };
debug_assert!(core::str::from_utf8(valid_slice).is_ok());
// Safety: the UTF-8 of slice has been validated up to offset (from field invariant)
unsafe { core::str::from_utf8_unchecked(valid_slice) }
}
}
enum SliceOrString<'a> {
Slice(PartiallyValidatedUtf8<'a>),
String(String),
}
/// This is an infallible impl. Functions always return Ok, not Err.
impl fmt::Write for SliceOrString<'_> {
#[inline]
fn write_str(&mut self, other: &str) -> fmt::Result {
match self {
SliceOrString::Slice(slice) => {
if !slice.try_push(other) {
// We failed to match. Convert to owned.
let valid_str = slice.validated_as_str();
let mut owned = String::with_capacity(valid_str.len() + other.len());
owned.push_str(valid_str);
owned.push_str(other);
*self = SliceOrString::String(owned);
}
Ok(())
}
SliceOrString::String(owned) => owned.write_str(other),
}
}
}
impl<'a> SliceOrString<'a> {
#[inline]
fn new(slice: &'a [u8]) -> Self {
Self::Slice(PartiallyValidatedUtf8::new(slice))
}
#[inline]
fn finish(self) -> Cow<'a, str> {
match self {
SliceOrString::Slice(slice) => Cow::Borrowed(slice.validated_as_str()),
SliceOrString::String(owned) => Cow::Owned(owned),
}
}
}
/// Writes the contents of a `Writeable` to a string, returning a reference
/// to a slice if it matches the provided reference bytes, and allocating a
/// String otherwise.
///
/// This function is useful if you have borrowed bytes which you expect
/// to be equal to a writeable a high percentage of the time.
///
/// You can also use this function to make a more efficient implementation of
/// [`Writeable::write_to_string`].
///
/// # Examples
///
/// Basic usage and behavior:
///
/// ```
/// use std::fmt;
/// use std::borrow::Cow;
/// use writeable::Writeable;
///
/// struct WelcomeMessage<'s> {
/// pub name: &'s str,
/// }
///
/// impl<'s> Writeable for WelcomeMessage<'s> {
/// // see impl in Writeable docs
/// # fn write_to<W: fmt::Write + ?Sized>(&self, sink: &mut W) -> fmt::Result {
/// # sink.write_str("Hello, ")?;
/// # sink.write_str(self.name)?;
/// # sink.write_char('!')?;
/// # Ok(())
/// # }
/// }
///
/// let message = WelcomeMessage { name: "Alice" };
///
/// assert!(matches!(
/// writeable::to_string_or_borrow(&message, b""),
/// Cow::Owned(s) if s == "Hello, Alice!"
/// ));
/// assert!(matches!(
/// writeable::to_string_or_borrow(&message, b"Hello"),
/// Cow::Owned(s) if s == "Hello, Alice!"
/// ));
/// assert!(matches!(
/// writeable::to_string_or_borrow(&message, b"Hello, Bob!"),
/// Cow::Owned(s) if s == "Hello, Alice!"
/// ));
/// assert!(matches!(
/// writeable::to_string_or_borrow(&message, b"Hello, Alice!"),
/// Cow::Borrowed("Hello, Alice!")
/// ));
///
/// // Borrowing can use a prefix:
/// assert!(matches!(
/// writeable::to_string_or_borrow(&message, b"Hello, Alice!..\xFF\x00\xFF"),
/// Cow::Borrowed("Hello, Alice!")
/// ));
/// ```
///
/// Example use case: a function that transforms a string to lowercase.
/// We are also able to write a more efficient implementation of
/// [`Writeable::write_to_string`] in this situation.
///
/// ```
/// use std::fmt;
/// use std::borrow::Cow;
/// use writeable::Writeable;
///
/// struct MakeAsciiLower<'a>(&'a str);
///
/// impl<'a> Writeable for MakeAsciiLower<'a> {
/// fn write_to<W: fmt::Write + ?Sized>(&self, sink: &mut W) -> fmt::Result {
/// for c in self.0.chars() {
/// sink.write_char(c.to_ascii_lowercase())?;
/// }
/// Ok(())
/// }
/// #[inline]
/// fn write_to_string(&self) -> Cow<str> {
/// writeable::to_string_or_borrow(self, self.0.as_bytes())
/// }
/// }
///
/// fn make_lowercase(input: &str) -> Cow<str> {
/// let writeable = MakeAsciiLower(input);
/// writeable::to_string_or_borrow(&writeable, input.as_bytes())
/// }
///
/// assert!(matches!(
/// make_lowercase("this is lowercase"),
/// Cow::Borrowed("this is lowercase")
/// ));
/// assert!(matches!(
/// make_lowercase("this is UPPERCASE"),
/// Cow::Owned(s) if s == "this is uppercase"
/// ));
///
/// assert!(matches!(
/// MakeAsciiLower("this is lowercase").write_to_string(),
/// Cow::Borrowed("this is lowercase")
/// ));
/// assert!(matches!(
/// MakeAsciiLower("this is UPPERCASE").write_to_string(),
/// Cow::Owned(s) if s == "this is uppercase"
/// ));
/// ```
pub fn to_string_or_borrow<'a>(
writeable: &impl Writeable,
reference_bytes: &'a [u8],
) -> Cow<'a, str> {
let mut sink = SliceOrString::new(reference_bytes);
let _ = writeable.write_to(&mut sink);
sink.finish()
}