icu_experimental/measure/si_prefix.rs
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138
// This file is part of ICU4X. For terms of use, please see the file
// called LICENSE at the top level of the ICU4X source tree
// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
use zerotrie::ZeroTrieSimpleAscii;
use super::provider::si_prefix::{Base, SiPrefix};
/// The offset of the SI prefixes.
/// NOTE:
/// The offset is added to the power of the decimal SI prefixes in order to avoid negative powers.
/// Therefore, if there is a prefix with power more than -30, the offset should be increased.
const SI_PREFIXES_OFFSET: u8 = 30;
/// A trie that contains the decimal SI prefixes.
const DECIMAL_PREFIXES_TRIE: ZeroTrieSimpleAscii<[u8; 167]> =
ZeroTrieSimpleAscii::from_sorted_str_tuples(&[
("atto", (-18 + SI_PREFIXES_OFFSET as i16) as usize),
("centi", (-2 + SI_PREFIXES_OFFSET as i16) as usize),
("deca", (1 + SI_PREFIXES_OFFSET) as usize),
("deci", (-1 + SI_PREFIXES_OFFSET as i16) as usize),
("exa", (18 + SI_PREFIXES_OFFSET) as usize),
("femto", (-15 + SI_PREFIXES_OFFSET as i16) as usize),
("giga", (9 + SI_PREFIXES_OFFSET) as usize),
("hecto", (2 + SI_PREFIXES_OFFSET) as usize),
("kilo", (3 + SI_PREFIXES_OFFSET) as usize),
("mega", (6 + SI_PREFIXES_OFFSET) as usize),
("micro", (-6 + SI_PREFIXES_OFFSET as i16) as usize),
("milli", (-3 + SI_PREFIXES_OFFSET as i16) as usize),
("nano", (-9 + SI_PREFIXES_OFFSET as i16) as usize),
("peta", (15 + SI_PREFIXES_OFFSET) as usize),
("pico", (-12 + SI_PREFIXES_OFFSET as i16) as usize),
("quecto", (-30 + SI_PREFIXES_OFFSET as i16) as usize),
("quetta", (30 + SI_PREFIXES_OFFSET) as usize),
("ronna", (27 + SI_PREFIXES_OFFSET) as usize),
("ronto", (-27 + SI_PREFIXES_OFFSET as i16) as usize),
("tera", (12 + SI_PREFIXES_OFFSET) as usize),
("yocto", (-24 + SI_PREFIXES_OFFSET as i16) as usize),
("yotta", (24 + SI_PREFIXES_OFFSET) as usize),
("zepto", (-21 + SI_PREFIXES_OFFSET as i16) as usize),
("zetta", (21 + SI_PREFIXES_OFFSET) as usize),
]);
// TODO: consider returning Option<(i8, &str)> instead of (0, part) for the case when the prefix is not found.
// TODO: consider using a trie for the prefixes.
// TODO: complete all the cases for the prefixes.
/// Extracts the SI prefix of base 10.
/// NOTE:
/// if the prefix is found, the function will return (power, part without the prefix).
/// if the prefix is not found, the function will return (0, part).
fn get_si_prefix_base_ten(part: &[u8]) -> (i8, &[u8]) {
let mut cursor = DECIMAL_PREFIXES_TRIE.cursor();
let mut longest_match = (0_i8, part);
for (i, &b) in part.iter().enumerate() {
cursor.step(b);
if cursor.is_empty() {
break;
}
if let Some(value) = cursor.take_value() {
let power = value as i8 - SI_PREFIXES_OFFSET as i8;
longest_match = (power, &part[i + 1..]);
}
}
longest_match
}
/// A trie that contains the binary SI prefixes.
const BINARY_TRIE: ZeroTrieSimpleAscii<[u8; 55]> = ZeroTrieSimpleAscii::from_sorted_str_tuples(&[
("exbi", 60),
("gibi", 30),
("kibi", 10),
("mebi", 20),
("pebi", 50),
("tebi", 40),
("yobi", 80),
("zebi", 70),
]);
// TODO: consider returning Option<(i8, &str)> instead of (0, part) for the case when the prefix is not found.
// TODO: consider using a trie for the prefixes.
// TODO: complete all the cases for the prefixes.
/// Extracts the SI prefix of base 2.
/// NOTE:
/// if the prefix is found, the function will return (power, part without the prefix).
/// if the prefix is not found, the function will return (0, part).
fn get_si_prefix_base_two(part: &[u8]) -> (i8, &[u8]) {
let mut cursor = BINARY_TRIE.cursor();
let mut longest_match = (0, part);
for (i, &b) in part.iter().enumerate() {
cursor.step(b);
if cursor.is_empty() {
break;
}
if let Some(value) = cursor.take_value() {
longest_match = (value as i8, &part[i + 1..]);
}
}
longest_match
}
// TODO: complete all the cases for the prefixes.
// TODO: consider using a trie for the prefixes.
/// Extracts the SI prefix.
/// NOTE:
/// if the prefix is found, the function will return (SiPrefix, part without the prefix string).
/// if the prefix is not found, the function will return (SiPrefix { power: 0, base: Base::Decimal }, part).
pub fn get_si_prefix(part: &[u8]) -> (SiPrefix, &[u8]) {
let (si_prefix_base_10, part) = get_si_prefix_base_ten(part);
if si_prefix_base_10 != 0 {
return (
SiPrefix {
power: si_prefix_base_10,
base: Base::Decimal,
},
part,
);
}
let (si_prefix_base_2, part) = get_si_prefix_base_two(part);
if si_prefix_base_2 != 0 {
return (
SiPrefix {
power: si_prefix_base_2,
base: Base::Binary,
},
part,
);
}
(
SiPrefix {
power: 0,
base: Base::Decimal,
},
part,
)
}