zerotrie/
options.rs

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
// This file is part of ICU4X. For terms of use, please see the file
// called LICENSE at the top level of the ICU4X source tree
// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).

//! Options for building and reading from a ZeroTrie.
//!
//! These options are internal to the crate. A small selection of options
//! are exported by way of the different public types on this crate.

/// Whether to use the perfect hash function in the ZeroTrie.
#[derive(Copy, Clone)]
pub(crate) enum PhfMode {
    /// Use binary search for all branch nodes.
    BinaryOnly,
    /// Use the perfect hash function for large branch nodes.
    UsePhf,
}

impl PhfMode {
    #[cfg(feature = "serde")]
    const fn to_u8_flag(self) -> u8 {
        match self {
            Self::BinaryOnly => 0,
            Self::UsePhf => 0x1,
        }
    }
}

/// Whether to support non-ASCII data in the ZeroTrie.
#[derive(Copy, Clone)]
pub(crate) enum AsciiMode {
    /// Support only ASCII, returning an error if non-ASCII is found.
    AsciiOnly,
    /// Support all data, creating span nodes for non-ASCII bytes.
    BinarySpans,
}

impl AsciiMode {
    #[cfg(feature = "serde")]
    const fn to_u8_flag(self) -> u8 {
        match self {
            Self::AsciiOnly => 0,
            Self::BinarySpans => 0x2,
        }
    }
}

/// Whether to enforce a limit to the capacity of the ZeroTrie.
#[derive(Copy, Clone)]
pub(crate) enum CapacityMode {
    /// Return an error if the trie requires a branch of more than 2^32 bytes.
    Normal,
    /// Construct the trie without returning an error.
    Extended,
}

impl CapacityMode {
    #[cfg(feature = "serde")]
    const fn to_u8_flag(self) -> u8 {
        match self {
            Self::Normal => 0,
            Self::Extended => 0x4,
        }
    }
}

/// How to handle strings with mixed ASCII case at a node, such as "abc" and "Abc"
#[derive(Copy, Clone)]
pub(crate) enum CaseSensitivity {
    /// Allow all strings and sort them by byte value.
    Sensitive,
    /// Reject strings with different case and sort them as if `to_ascii_lowercase` is called.
    IgnoreCase,
}

impl CaseSensitivity {
    #[cfg(feature = "serde")]
    const fn to_u8_flag(self) -> u8 {
        match self {
            Self::Sensitive => 0,
            Self::IgnoreCase => 0x8,
        }
    }
}

#[derive(Copy, Clone)]
pub(crate) struct ZeroTrieBuilderOptions {
    pub phf_mode: PhfMode,
    pub ascii_mode: AsciiMode,
    pub capacity_mode: CapacityMode,
    pub case_sensitivity: CaseSensitivity,
}

impl ZeroTrieBuilderOptions {
    #[cfg(feature = "serde")]
    pub(crate) const fn to_u8_flags(self) -> u8 {
        self.phf_mode.to_u8_flag()
            | self.ascii_mode.to_u8_flag()
            | self.capacity_mode.to_u8_flag()
            | self.case_sensitivity.to_u8_flag()
    }
}

pub(crate) trait ZeroTrieWithOptions {
    const OPTIONS: ZeroTrieBuilderOptions;
}

/// All branch nodes are binary search
/// and there are no span nodes.
impl<S: ?Sized> ZeroTrieWithOptions for crate::ZeroTrieSimpleAscii<S> {
    const OPTIONS: ZeroTrieBuilderOptions = ZeroTrieBuilderOptions {
        phf_mode: PhfMode::BinaryOnly,
        ascii_mode: AsciiMode::AsciiOnly,
        capacity_mode: CapacityMode::Normal,
        case_sensitivity: CaseSensitivity::Sensitive,
    };
}

impl<S: ?Sized> crate::ZeroTrieSimpleAscii<S> {
    #[cfg(feature = "serde")]
    pub(crate) const FLAGS: u8 = Self::OPTIONS.to_u8_flags();
}

/// All branch nodes are binary search
/// and nodes use case-insensitive matching.
impl<S: ?Sized> ZeroTrieWithOptions for crate::ZeroAsciiIgnoreCaseTrie<S> {
    const OPTIONS: ZeroTrieBuilderOptions = ZeroTrieBuilderOptions {
        phf_mode: PhfMode::BinaryOnly,
        ascii_mode: AsciiMode::AsciiOnly,
        capacity_mode: CapacityMode::Normal,
        case_sensitivity: CaseSensitivity::IgnoreCase,
    };
}

/// Branch nodes could be either binary search or PHF.
impl<S: ?Sized> ZeroTrieWithOptions for crate::ZeroTriePerfectHash<S> {
    const OPTIONS: ZeroTrieBuilderOptions = ZeroTrieBuilderOptions {
        phf_mode: PhfMode::UsePhf,
        ascii_mode: AsciiMode::BinarySpans,
        capacity_mode: CapacityMode::Normal,
        case_sensitivity: CaseSensitivity::Sensitive,
    };
}

/// No limited capacity assertion.
impl<S: ?Sized> ZeroTrieWithOptions for crate::ZeroTrieExtendedCapacity<S> {
    const OPTIONS: ZeroTrieBuilderOptions = ZeroTrieBuilderOptions {
        phf_mode: PhfMode::UsePhf,
        ascii_mode: AsciiMode::BinarySpans,
        capacity_mode: CapacityMode::Extended,
        case_sensitivity: CaseSensitivity::Sensitive,
    };
}