resb/binary/
header.rs

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
// This file is part of ICU4X. For terms of use, please see the file
// called LICENSE at the top level of the ICU4X source tree
// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).

use super::{read_u16, BinaryDeserializerError, CharsetFamily, Endianness, FormatVersion};

/// The `BinHeader` struct represents the in-memory layout of a binary resource
/// bundle header.
#[repr(C)]
pub(super) struct BinHeader {
    /// The size of the header in bytes, padded such that it is divisible by 16.
    pub size: u16,

    /// A magic word. See [`MAGIC_WORD`].
    pub magic: [u8; 2],

    /// Information on the representation of data in the binary bundle.
    pub repr_info: BinReprInfo,
}

/// The `BinReprInfo` struct represents the in-memory layout of a binary
/// resource bundle's representation specifiers. These data describe the
/// parameters necessary for correctly reading a bundle file.
#[repr(C)]
pub(super) struct BinReprInfo {
    /// The size of the representation info in bytes.
    pub size: u16,

    /// Reserved. Always 0.
    pub reserved_word: u16,

    /// The endianness of values in the file. `0` for little endian and `1` for
    /// big endian.
    pub endianness: Endianness,

    /// The character set family in which key strings are represented.
    pub charset_family: CharsetFamily,

    /// The size of a character in a string resource in bytes. May be 1, 2, or
    /// 4.
    pub size_of_char: u8,

    /// Reserved. Always 0.
    pub reserved_byte: u8,

    /// The data format identifier. Always `b"ResB"`.
    pub data_format: [u8; 4],

    /// The format version used for laying out the bundle.
    pub format_version: FormatVersion,

    /// The version of the data in the file. This is `[1, 4, 0, 0]` in all known
    /// versions of ICU4C's `genrb`.
    pub data_version: [u8; 4],
}

impl TryFrom<&[u8]> for BinHeader {
    type Error = BinaryDeserializerError;

    // We can safely index in this function as we first guarantee that the input
    // is sufficiently large.
    #[allow(clippy::indexing_slicing)]
    fn try_from(value: &[u8]) -> Result<Self, Self::Error> {
        if value.len() < core::mem::size_of::<BinHeader>() {
            return Err(BinaryDeserializerError::invalid_data(
                "input is too short to contain file header",
            ));
        }

        let (size, value) = read_u16(value)?;

        // If the magic word does not match, we are not reading a valid resource
        // bundle and will not be able to proceed.
        let (magic, value) = ([value[0], value[1]], &value[2..]);
        match magic {
            [0xda, 0x27] => (),
            _ => {
                return Err(BinaryDeserializerError::invalid_data(
                    "magic word does not match",
                ))
            }
        }

        let repr_info = BinReprInfo::try_from(value)?;

        if (size as usize) < core::mem::size_of::<BinHeader>() {
            return Err(BinaryDeserializerError::invalid_data(
                "header size specified in file is smaller than expected",
            ));
        }

        Ok(BinHeader {
            size,
            magic,
            repr_info,
        })
    }
}

impl TryFrom<&[u8]> for BinReprInfo {
    type Error = BinaryDeserializerError;

    // We can safely index in this function as we first guarantee that the input
    // is sufficiently large.
    #[allow(clippy::indexing_slicing)]
    fn try_from(value: &[u8]) -> Result<Self, Self::Error> {
        if value.len() < core::mem::size_of::<BinReprInfo>() {
            return Err(BinaryDeserializerError::invalid_data(
                "input is too short to contain representation info",
            ));
        }

        let (size, value) = read_u16(value)?;
        let (reserved_word, value) = read_u16(value)?;

        // While the consumer is responsible for verifying acceptability of most
        // contents of the repr info, we explicitly depend on little endian data
        // in order to ensure compatibility with `zerovec`.
        let (endianness, value) = (Endianness::try_from(value[0])?, &value[1..]);
        if endianness != Endianness::Little {
            return Err(BinaryDeserializerError::unsupported_format(
                "big-endian bundles are not supported",
            ));
        }

        let (charset_family, value) = (CharsetFamily::try_from(value[0])?, &value[1..]);
        let (size_of_char, value) = (value[0], &value[1..]);
        let (reserved_byte, value) = (value[0], &value[1..]);

        // A mismatch in the data format indicates that we are not reading a
        // valid resource bundle.
        let (data_format, value) = ([value[0], value[1], value[2], value[3]], &value[4..]);
        if &data_format != b"ResB" {
            return Err(BinaryDeserializerError::invalid_data(
                "unexpected data format value",
            ));
        }

        let (format_version, value) = (FormatVersion::try_from(&value[..4])?, &value[4..]);
        let (data_version, _) = ([value[0], value[1], value[2], value[3]], &value[4..]);

        Ok(BinReprInfo {
            size,
            reserved_word,
            endianness,
            charset_family,
            size_of_char,
            reserved_byte,
            data_format,
            format_version,
            data_version,
        })
    }
}