icu_provider_fs/export/
fs_exporter.rs

1// This file is part of ICU4X. For terms of use, please see the file
2// called LICENSE at the top level of the ICU4X source tree
3// (online at: https://github.com/unicode-org/icu4x/blob/main/LICENSE ).
4
5use super::serializers::AbstractSerializer;
6use crate::datapath::get_data_marker_id;
7use crate::manifest::Manifest;
8use icu_provider::export::*;
9use icu_provider::prelude::*;
10use serde::{Deserialize, Serialize};
11use std::fmt::Write;
12use std::fs;
13use std::io::Write as _;
14use std::path::PathBuf;
15
16/// Choices of what to do if [`FilesystemExporter`] tries to write to a pre-existing directory.
17#[non_exhaustive]
18#[derive(Copy, Clone, Debug, PartialEq, Serialize, Deserialize)]
19pub enum OverwriteOption {
20    /// If the directory doesn't exist, create it.
21    /// If it does exist, remove it safely (`rmdir`) and re-create it.
22    CheckEmpty,
23    /// If the directory doesn't exist, create it.
24    /// If it does exist, remove it aggressively (`rm -rf`) and re-create it.
25    RemoveAndReplace,
26}
27
28impl Default for OverwriteOption {
29    fn default() -> Self {
30        Self::CheckEmpty
31    }
32}
33
34/// Options bag for initializing a [`FilesystemExporter`].
35#[non_exhaustive]
36#[derive(Clone, Debug, PartialEq, Serialize, Deserialize)]
37pub struct Options {
38    /// Directory in the filesystem to write output.
39    pub root: PathBuf,
40    /// Option for initializing the output directory.
41    pub overwrite: OverwriteOption,
42}
43
44impl Default for Options {
45    fn default() -> Self {
46        Self {
47            root: PathBuf::from("icu4x_data"),
48            overwrite: Default::default(),
49        }
50    }
51}
52
53impl From<PathBuf> for Options {
54    fn from(root: PathBuf) -> Self {
55        Options {
56            root,
57            ..Default::default()
58        }
59    }
60}
61
62/// A data exporter that writes data to a filesystem hierarchy.
63/// See the module-level docs for an example.
64#[derive(Debug)]
65pub struct FilesystemExporter {
66    root: PathBuf,
67    manifest: Manifest,
68    serializer: Box<dyn AbstractSerializer + Sync>,
69}
70
71impl FilesystemExporter {
72    /// Creates a new [`FilesystemExporter`] with a [serializer] and [options].
73    ///
74    /// See the module-level docs for an example.
75    ///
76    /// [serializer]: crate::export::serializers
77    /// [options]: Options
78    pub fn try_new(
79        serializer: Box<dyn AbstractSerializer + Sync>,
80        options: Options,
81    ) -> Result<Self, DataError> {
82        let result = FilesystemExporter {
83            root: options.root,
84            manifest: Manifest::for_format(serializer.get_buffer_format())?,
85            serializer,
86        };
87
88        match options.overwrite {
89            OverwriteOption::CheckEmpty if result.root.exists() => fs::remove_dir(&result.root),
90            OverwriteOption::RemoveAndReplace if result.root.exists() => {
91                fs::remove_dir_all(&result.root)
92            }
93            _ => Ok(()),
94        }
95        .and_then(|_| fs::create_dir_all(&result.root))
96        .map_err(|e| DataError::from(e).with_path_context(&result.root))?;
97
98        result.manifest.write(&result.root)?;
99        Ok(result)
100    }
101
102    fn setup_file(&self, mut path_buf: PathBuf) -> Result<Box<dyn std::io::Write>, DataError> {
103        path_buf.set_extension(self.manifest.file_extension);
104        let file: Box<dyn std::io::Write> = if self.serializer.is_text_format() {
105            Box::new(crlify::BufWriterWithLineEndingFix::new(
106                fs::File::create(&path_buf)
107                    .map_err(|e| DataError::from(e).with_path_context(&path_buf))?,
108            ))
109        } else {
110            Box::new(std::io::BufWriter::new(
111                fs::File::create(&path_buf)
112                    .map_err(|e| DataError::from(e).with_path_context(&path_buf))?,
113            ))
114        };
115        Ok(file)
116    }
117}
118
119impl DataExporter for FilesystemExporter {
120    fn put_payload(
121        &self,
122        marker: DataMarkerInfo,
123        id: DataIdentifierBorrowed,
124        payload: &DataPayload<ExportMarker>,
125    ) -> Result<(), DataError> {
126        let Some((component, marker_name)) = get_data_marker_id(marker.id) else {
127            return Err(DataErrorKind::MarkerNotFound.with_marker(marker));
128        };
129        let mut path_buf = self.root.join(component).join(marker_name);
130        if !id.marker_attributes.is_empty() {
131            path_buf.push(id.marker_attributes.as_str());
132        }
133
134        #[allow(clippy::unwrap_used)] // has parent by construction
135        let parent_dir = path_buf.parent().unwrap();
136
137        fs::create_dir_all(parent_dir)
138            .map_err(|e| DataError::from(e).with_path_context(parent_dir))?;
139
140        fs::create_dir_all(&path_buf)
141            .map_err(|e| DataError::from(e).with_path_context(&path_buf))?;
142        let mut string_path = path_buf.into_os_string();
143        write!(&mut string_path, "/{}", id.locale).expect("infallible");
144        path_buf = PathBuf::from(string_path);
145
146        let mut file = self.setup_file(path_buf)?;
147        self.serializer.serialize(payload, &mut file)
148    }
149
150    fn flush(&self, marker: DataMarkerInfo, metadata: FlushMetadata) -> Result<(), DataError> {
151        let Some((component, marker_name)) = get_data_marker_id(marker.id) else {
152            return Err(DataErrorKind::MarkerNotFound.with_marker(marker));
153        };
154        let path_buf = self.root.join(component).join(marker_name);
155
156        if !path_buf.exists() {
157            fs::create_dir_all(&path_buf)
158                .map_err(|e| DataError::from(e).with_path_context(&path_buf))?;
159            fs::File::create(path_buf.join(".empty"))?;
160        } else if let Some(checksum) = metadata.checksum {
161            write!(
162                &mut fs::File::create(path_buf.join(".checksum"))?,
163                "{checksum}"
164            )?;
165        }
166
167        Ok(())
168    }
169
170    fn flush_singleton(
171        &self,
172        marker: DataMarkerInfo,
173        payload: &DataPayload<ExportMarker>,
174        metadata: FlushMetadata,
175    ) -> Result<(), DataError> {
176        let Some((component, marker_name)) = get_data_marker_id(marker.id) else {
177            return Err(DataErrorKind::MarkerNotFound.with_marker(marker));
178        };
179        let path_buf = self.root.join(component).join(marker_name);
180
181        #[allow(clippy::unwrap_used)] // has parent by construction
182        let parent_dir = path_buf.parent().unwrap();
183
184        fs::create_dir_all(parent_dir)
185            .map_err(|e| DataError::from(e).with_path_context(parent_dir))?;
186
187        if let Some(checksum) = metadata.checksum {
188            write!(
189                &mut fs::File::create(format!("{}_checksum", path_buf.display()))?,
190                "{checksum}"
191            )
192            .unwrap();
193        }
194        let mut file = self.setup_file(path_buf)?;
195
196        self.serializer.serialize(payload, &mut file)
197    }
198}