icu_collections/codepointinvlist/
utils.rs

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
// This file is part of ICU4X. For terms of use, please see the file
// called LICENSE at the top level of the ICU4X source tree
// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).

use core::{
    char,
    ops::{Bound::*, RangeBounds},
};
use potential_utf::PotentialCodePoint;
use zerovec::ule::AsULE;
use zerovec::ZeroVec;

/// Returns whether the vector is sorted ascending non inclusive, of even length,
/// and within the bounds of `0x0 -> 0x10FFFF + 1` inclusive.
#[allow(clippy::indexing_slicing)] // windows
#[allow(clippy::unwrap_used)] // by is_empty check
pub fn is_valid_zv(inv_list_zv: &ZeroVec<'_, PotentialCodePoint>) -> bool {
    inv_list_zv.is_empty()
        || (inv_list_zv.len() % 2 == 0
            && inv_list_zv.as_ule_slice().windows(2).all(|chunk| {
                <PotentialCodePoint as AsULE>::from_unaligned(chunk[0])
                    < <PotentialCodePoint as AsULE>::from_unaligned(chunk[1])
            })
            && u32::from(inv_list_zv.last().unwrap()) <= char::MAX as u32 + 1)
}

/// Returns start (inclusive) and end (exclusive) bounds of [`RangeBounds`]
pub fn deconstruct_range<T>(range: impl RangeBounds<T>) -> (u32, u32)
where
    T: Into<u32> + Copy,
{
    let from = match range.start_bound() {
        Included(b) => (*b).into(),
        Excluded(_) => unreachable!(),
        Unbounded => 0,
    };
    let till = match range.end_bound() {
        Included(b) => (*b).into() + 1,
        Excluded(b) => (*b).into(),
        Unbounded => (char::MAX as u32) + 1,
    };
    (from, till)
}

#[cfg(test)]
mod tests {
    use super::{deconstruct_range, is_valid_zv, PotentialCodePoint};
    use core::char;
    use zerovec::ZeroVec;

    fn make_zv(slice: &[u32]) -> ZeroVec<PotentialCodePoint> {
        slice
            .iter()
            .copied()
            .map(PotentialCodePoint::from_u24)
            .collect()
    }
    #[test]
    fn test_is_valid_zv() {
        let check = make_zv(&[0x2, 0x3, 0x4, 0x5]);
        assert!(is_valid_zv(&check));
    }

    #[test]
    fn test_is_valid_zv_empty() {
        let check = make_zv(&[]);
        assert!(is_valid_zv(&check));
    }

    #[test]
    fn test_is_valid_zv_overlapping() {
        let check = make_zv(&[0x2, 0x5, 0x4, 0x6]);
        assert!(!is_valid_zv(&check));
    }

    #[test]
    fn test_is_valid_zv_out_of_order() {
        let check = make_zv(&[0x5, 0x4, 0x5, 0x6, 0x7]);
        assert!(!is_valid_zv(&check));
    }

    #[test]
    fn test_is_valid_zv_duplicate() {
        let check = make_zv(&[0x1, 0x2, 0x3, 0x3, 0x5]);
        assert!(!is_valid_zv(&check));
    }

    #[test]
    fn test_is_valid_zv_odd() {
        let check = make_zv(&[0x1, 0x2, 0x3, 0x4, 0x5]);
        assert!(!is_valid_zv(&check));
    }

    #[test]
    fn test_is_valid_zv_out_of_range() {
        let check = make_zv(&[0x1, 0x2, 0x3, 0x4, (char::MAX as u32) + 1]);
        assert!(!is_valid_zv(&check));
    }

    // deconstruct_range

    #[test]
    fn test_deconstruct_range() {
        let expected = (0x41, 0x45);
        let check = deconstruct_range('A'..'E'); // Range
        assert_eq!(check, expected);
        let check = deconstruct_range('A'..='D'); // Range Inclusive
        assert_eq!(check, expected);
        let check = deconstruct_range('A'..); // Range From
        assert_eq!(check, (0x41, (char::MAX as u32) + 1));
        let check = deconstruct_range(..'A'); // Range To
        assert_eq!(check, (0x0, 0x41));
        let check = deconstruct_range(..='A'); // Range To Inclusive
        assert_eq!(check, (0x0, 0x42));
        let check = deconstruct_range::<char>(..); // Range Full
        assert_eq!(check, (0x0, (char::MAX as u32) + 1));
    }
}