1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
// This file is part of ICU4X. For terms of use, please see the file
// called LICENSE at the top level of the ICU4X source tree
// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).

//! 🚧 \[Unstable\] Property names-related data for this component
//!
//! <div class="stab unstable">
//! 🚧 This code is considered unstable; it may change at any time, in breaking or non-breaking ways,
//! including in SemVer minor releases. While the serde representation of data structs is guaranteed
//! to be stable, their Rust representation might not be. Use with caution.
//! </div>
//!
//! Read more about data providers: [`icu_provider`]

use crate::TimeZoneBcp47Id;
use icu_provider::prelude::*;
use zerotrie::ZeroAsciiIgnoreCaseTrie;
use zerovec::{VarZeroVec, ZeroVec};

/// [`IanaToBcp47MapV3`]'s trie cannot handle differently-cased prefixes, like `Mexico/BajaSur`` and `MET`.
///
/// Therefore, any ID that is not of the shape `{region}/{city}` gets prefixed with this character
/// inside the trie.
///
/// During lookup, if the input is not of the shape `{region}/{city}`, the trie cursor has to be advanced over
/// this byte.
pub const NON_REGION_CITY_PREFIX: u8 = b'_';

/// A mapping from normal-case IANA time zone identifiers to BCP-47 time zone identifiers.
///
/// Multiple IANA time zone IDs can map to the same BCP-47 time zone ID.
///
/// <div class="stab unstable">
/// 🚧 This code is considered unstable; it may change at any time, in breaking or non-breaking ways,
/// including in SemVer minor releases. While the serde representation of data structs is guaranteed
/// to be stable, their Rust representation might not be. Use with caution.
/// </div>
#[derive(Debug, Clone, PartialEq)]
#[icu_provider::data_struct(marker(
    IanaToBcp47MapV3Marker,
    "time_zone/iana_to_bcp47@3",
    singleton
))]
#[cfg_attr(feature = "datagen", derive(serde::Serialize, databake::Bake))]
#[cfg_attr(feature = "datagen", databake(path = icu_timezone::provider::names))]
#[cfg_attr(feature = "serde", derive(serde::Deserialize))]
pub struct IanaToBcp47MapV3<'data> {
    /// A map from normal-case IANA time zone identifiers to indexes of BCP-47 time zone
    /// identifiers along with a canonical flag. The IANA identifiers are normal-case.
    ///
    /// The `usize` values stored in the trie have the following form:
    ///
    /// - Lowest bit: 1 if canonical, 0 if not canonical
    /// - All remaining bits: index into `bcp47_ids`
    ///
    /// For example, in CLDR 44, `"Africa/Abidjan"` has value 221, which means it is canonical
    /// (low bit is 1 == odd number) and the index into `bcp47_ids` is 110 (221 >> 1).
    #[cfg_attr(feature = "serde", serde(borrow))]
    pub map: ZeroAsciiIgnoreCaseTrie<ZeroVec<'data, u8>>,
    /// A sorted list of BCP-47 time zone identifiers.
    #[cfg_attr(feature = "serde", serde(borrow))]
    // Note: this is 9739B as `ZeroVec<TimeZoneBcp47Id>` (`ZeroVec<TinyStr8>`)
    // and 9335B as `VarZeroVec<str>`
    pub bcp47_ids: ZeroVec<'data, TimeZoneBcp47Id>,
    /// An XxHash64 checksum of [`Self::bcp47_ids`].
    pub bcp47_ids_checksum: u64,
}

/// A mapping from IANA time zone identifiers to BCP-47 time zone identifiers.
///
/// The BCP-47 time zone ID maps to the default IANA time zone ID according to the CLDR data.
///
/// <div class="stab unstable">
/// 🚧 This code is considered unstable; it may change at any time, in breaking or non-breaking ways,
/// including in SemVer minor releases. While the serde representation of data structs is guaranteed
/// to be stable, their Rust representation might not be. Use with caution.
/// </div>
#[derive(Debug, Clone, PartialEq)]
#[icu_provider::data_struct(marker(
    Bcp47ToIanaMapV1Marker,
    "time_zone/bcp47_to_iana@1",
    singleton
))]
#[cfg_attr(feature = "datagen", derive(serde::Serialize, databake::Bake))]
#[cfg_attr(feature = "datagen", databake(path = icu_timezone::provider::names))]
#[cfg_attr(feature = "serde", derive(serde::Deserialize))]
#[yoke(prove_covariance_manually)]
pub struct Bcp47ToIanaMapV1<'data> {
    /// An XxHash64 checksum of [`IanaToBcp47MapV3::bcp47_ids`].
    ///
    /// The checksum here should match the checksum in [`IanaToBcp47MapV3`]
    /// if these were generated from the same data set.
    pub bcp47_ids_checksum: u64,
    /// The IANA time zone identifier corresponding to the BCP-47 ID in
    /// [`IanaToBcp47MapV3::bcp47_ids`].
    ///
    /// Since there can be more than one IANA identifier for a particular
    /// BCP-47 identifier, this list contains only the current canonical
    /// IANA identifier.
    #[cfg_attr(feature = "serde", serde(borrow))]
    pub canonical_iana_ids: VarZeroVec<'data, str>,
}