Skip to main content

reth_era/common/
file_ops.rs

1//! Era file format traits and I/O operations.
2
3use crate::e2s::{error::E2sError, types::Version};
4use std::{
5    fs::File,
6    io::{self, Read, Seek, Write},
7    path::Path,
8};
9
10/// Represents era file with generic content and identifier types
11pub trait EraFileFormat: Sized {
12    /// Content group type
13    type EraGroup;
14
15    /// The identifier type
16    type Id: EraFileId;
17
18    /// Get the version
19    fn version(&self) -> &Version;
20
21    /// Get the content group
22    fn group(&self) -> &Self::EraGroup;
23
24    /// Get the file identifier
25    fn id(&self) -> &Self::Id;
26
27    /// Create a new instance
28    fn new(group: Self::EraGroup, id: Self::Id) -> Self;
29}
30
31/// Era file identifiers
32pub trait EraFileId: Clone {
33    /// File type for this identifier
34    const FILE_TYPE: EraFileType;
35
36    /// Number of items, slots for `era`, blocks for `era1`, per era
37    const ITEMS_PER_ERA: u64;
38
39    /// Get the network name
40    fn network_name(&self) -> &str;
41
42    /// Get the starting number (block or slot)
43    fn start_number(&self) -> u64;
44
45    /// Get the count of items
46    fn count(&self) -> u32;
47
48    /// Get the optional hash identifier
49    fn hash(&self) -> Option<[u8; 4]>;
50
51    /// Whether to include era count in filename
52    fn include_era_count(&self) -> bool;
53
54    /// Calculate era number
55    fn era_number(&self) -> u64 {
56        self.start_number() / Self::ITEMS_PER_ERA
57    }
58
59    /// Calculate the number of eras spanned per file.
60    ///
61    /// If the user can decide how many slots/blocks per era file there are, we need to calculate
62    /// it. Most of the time it should be 1, but it can never be more than 2 eras per file
63    /// as there is a maximum of 8192 slots/blocks per era file.
64    fn era_count(&self) -> u64 {
65        if self.count() == 0 {
66            return 0;
67        }
68        let first_era = self.era_number();
69        let last_number = self.start_number() + self.count() as u64 - 1;
70        let last_era = last_number / Self::ITEMS_PER_ERA;
71        last_era - first_era + 1
72    }
73
74    /// Convert to standardized file name.
75    fn to_file_name(&self) -> String {
76        Self::FILE_TYPE.format_filename(
77            self.network_name(),
78            self.era_number(),
79            self.hash(),
80            self.include_era_count(),
81            self.era_count(),
82        )
83    }
84}
85
86/// [`StreamReader`] for reading era-format files
87pub trait StreamReader<R: Read + Seek>: Sized {
88    /// The file type the reader produces
89    type File: EraFileFormat;
90
91    /// The iterator type for streaming data
92    type Iterator;
93
94    /// Create a new reader
95    fn new(reader: R) -> Self;
96
97    /// Read and parse the complete file
98    fn read(self, network_name: String) -> Result<Self::File, E2sError>;
99
100    /// Get an iterator for streaming processing
101    fn iter(self) -> Self::Iterator;
102}
103
104/// [`FileReader`] provides reading era file operations for era files
105pub trait FileReader: StreamReader<File> {
106    /// Opens and reads an era file from the given path
107    fn open<P: AsRef<Path>>(
108        path: P,
109        network_name: impl Into<String>,
110    ) -> Result<Self::File, E2sError> {
111        let file = File::open(path).map_err(E2sError::Io)?;
112        let reader = Self::new(file);
113        reader.read(network_name.into())
114    }
115}
116
117impl<T: StreamReader<File>> FileReader for T {}
118
119/// [`StreamWriter`] for writing era-format files
120pub trait StreamWriter<W: Write>: Sized {
121    /// The file type this writer handles
122    type File: EraFileFormat;
123
124    /// Create a new writer
125    fn new(writer: W) -> Self;
126
127    /// Writer version
128    fn write_version(&mut self) -> Result<(), E2sError>;
129
130    /// Write a complete era file
131    fn write_file(&mut self, file: &Self::File) -> Result<(), E2sError>;
132
133    /// Flush any buffered data
134    fn flush(&mut self) -> Result<(), E2sError>;
135}
136
137/// [`StreamWriter`] provides writing file operations for era files
138pub trait FileWriter {
139    /// Era file type the writer handles
140    type File: EraFileFormat<Id: EraFileId>;
141
142    /// Creates a new file at the specified path and writes the era file to it
143    fn create<P: AsRef<Path>>(path: P, file: &Self::File) -> Result<(), E2sError>;
144
145    /// Creates a file in the directory using standardized era naming
146    fn create_with_id<P: AsRef<Path>>(directory: P, file: &Self::File) -> Result<(), E2sError>;
147}
148
149impl<T: StreamWriter<File>> FileWriter for T {
150    type File = T::File;
151
152    /// Creates a new file at the specified path and writes the era file to it
153    fn create<P: AsRef<Path>>(path: P, file: &Self::File) -> Result<(), E2sError> {
154        let file_handle = File::create(path).map_err(E2sError::Io)?;
155        let mut writer = Self::new(file_handle);
156        writer.write_file(file)?;
157        Ok(())
158    }
159
160    /// Creates a file in the directory using standardized era naming
161    fn create_with_id<P: AsRef<Path>>(directory: P, file: &Self::File) -> Result<(), E2sError> {
162        let filename = file.id().to_file_name();
163        let path = directory.as_ref().join(filename);
164        Self::create(path, file)
165    }
166}
167
168/// Era file type identifier
169#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
170pub enum EraFileType {
171    /// Consensus layer ERA file, `.era`
172    /// Contains beacon blocks and states
173    Era,
174    /// Execution layer ERA1 file, `.era1`
175    /// Contains execution blocks pre-merge
176    Era1,
177    /// Execution layer ERE file, `.ere`
178    /// Contains execution blocks for both pre-merge and post-merge
179    Ere,
180}
181
182impl EraFileType {
183    /// All file types. No extension is a suffix of another, so `from_filename`'s suffix match is
184    /// order-independent.
185    const ALL: [Self; 3] = [Self::Era, Self::Era1, Self::Ere];
186
187    /// Get the canonical file extension for this type, dot included.
188    ///
189    /// Used when writing files. For recognizing downloaded files, which may use an alternate
190    /// extension, see [`extensions`](Self::extensions).
191    pub const fn extension(&self) -> &'static str {
192        match self {
193            Self::Era => ".era",
194            Self::Era1 => ".era1",
195            Self::Ere => ".ere",
196        }
197    }
198
199    /// All file extensions this type may be published with, dot included, ordered longest-first.
200    ///
201    /// `ere` files are served as either `.erae` (current ethPandaOps naming) or `.ere`. The
202    /// longest-first order matters for substring scans so `.ere` never matches inside `.erae`.
203    pub const fn extensions(&self) -> &'static [&'static str] {
204        match self {
205            Self::Era => &[".era"],
206            Self::Era1 => &[".era1"],
207            Self::Ere => &[".erae", ".ere"],
208        }
209    }
210
211    /// Whether files of this type are published with a `checksums.txt` for verification.
212    ///
213    /// Execution-layer files (`era1`, `ere`) ship checksums; consensus-layer `era` files do not.
214    pub const fn has_checksums(&self) -> bool {
215        matches!(self, Self::Era1 | Self::Ere)
216    }
217
218    /// Detect file type from a filename
219    pub fn from_filename(filename: &str) -> Option<Self> {
220        Self::ALL.into_iter().find(|ty| ty.extensions().iter().any(|ext| filename.ends_with(ext)))
221    }
222
223    /// Generate era file name.
224    ///
225    /// Standard format: `<config-name>-<era-number>-<short-historical-root>.<ext>`
226    /// See also <https://github.com/eth-clients/e2store-format-specs/blob/main/formats/era.md#file-name>
227    ///
228    /// With era count (for custom exports):
229    /// `<config-name>-<era-number>-<era-count>-<short-historical-root>.<ext>`
230    pub fn format_filename(
231        &self,
232        network_name: &str,
233        era_number: u64,
234        hash: Option<[u8; 4]>,
235        include_era_count: bool,
236        era_count: u64,
237    ) -> String {
238        let hash = format_hash(hash);
239        // Custom exports insert an `-<era-count>` segment between the era number and the hash.
240        let era_count = if include_era_count { format!("-{era_count:05}") } else { String::new() };
241        format!("{network_name}-{era_number:05}{era_count}-{hash}{}", self.extension())
242    }
243
244    /// Detects the execution-layer file type from the files in `dir`.
245    ///
246    /// Returns the type of the first `.era1` or `.ere`/`.erae` file found. Consensus-layer
247    /// `.era` files are ignored.
248    pub fn from_dir(dir: impl AsRef<Path>) -> io::Result<Option<Self>> {
249        for entry in std::fs::read_dir(dir)? {
250            if let Some(name) = entry?.file_name().to_str() &&
251                let Some(era_type @ (Self::Era1 | Self::Ere)) = Self::from_filename(name)
252            {
253                return Ok(Some(era_type));
254            }
255        }
256
257        Ok(None)
258    }
259
260    /// Detect file type from a URL, defaulting to `Era`.
261    ///
262    /// Resolves by file extension when the URL names a file; otherwise falls back to the `era1`
263    /// host/path substring.
264    pub fn from_url(url: &str) -> Self {
265        let file_url = url.split(['?', '#']).next().unwrap_or(url);
266        if let Some(ty) = Self::from_filename(file_url) {
267            return ty;
268        }
269        if url.contains("era1") {
270            Self::Era1
271        } else if url.contains("erae") {
272            Self::Ere
273        } else {
274            Self::Era
275        }
276    }
277}
278
279/// Format hash as hex string, or placeholder if none
280pub fn format_hash(hash: Option<[u8; 4]>) -> String {
281    match hash {
282        Some(h) => format!("{:02x}{:02x}{:02x}{:02x}", h[0], h[1], h[2], h[3]),
283        None => "00000000".to_string(),
284    }
285}
286
287#[cfg(test)]
288mod tests {
289    use super::*;
290
291    #[test]
292    fn test_from_url_detection() {
293        // A URL that names a file resolves by its extension, regardless of the rest of the path.
294        assert_eq!(
295            EraFileType::from_url("https://host/mainnet-00000-abcd1234.ere"),
296            EraFileType::Ere
297        );
298        assert_eq!(
299            EraFileType::from_url("https://host/mainnet-00000-abcd1234.era1"),
300            EraFileType::Era1
301        );
302        assert_eq!(
303            EraFileType::from_url("https://host/mainnet-00000-abcd1234.era"),
304            EraFileType::Era
305        );
306
307        // An ERE file under a path/mirror containing `era1` still resolves by its `.ere` extension.
308        assert_eq!(
309            EraFileType::from_url("https://host/era1/mainnet-00000-abcd1234.ere"),
310            EraFileType::Ere
311        );
312
313        // Directory/index endpoints have no file extension and fall back to the host/path
314        // substring.
315        assert_eq!(EraFileType::from_url("https://mainnet.era1.nimbus.team/"), EraFileType::Era1);
316        assert_eq!(EraFileType::from_url("https://era.ithaca.xyz/"), EraFileType::Era);
317        assert_eq!(
318            EraFileType::from_url("https://data.ethpandaops.io/erae/mainnet/"),
319            EraFileType::Ere
320        );
321    }
322
323    #[test]
324    fn test_from_filename_detection() {
325        assert_eq!(
326            EraFileType::from_filename("mainnet-00000-abcd1234.era"),
327            Some(EraFileType::Era)
328        );
329        assert_eq!(
330            EraFileType::from_filename("mainnet-00000-abcd1234.era1"),
331            Some(EraFileType::Era1)
332        );
333        assert_eq!(
334            EraFileType::from_filename("mainnet-00000-abcd1234.ere"),
335            Some(EraFileType::Ere)
336        );
337        // The alternate `.erae` extension also resolves to `Ere`.
338        assert_eq!(
339            EraFileType::from_filename("mainnet-00000-abcd1234.erae"),
340            Some(EraFileType::Ere)
341        );
342        // Profile postfixes don't change extension detection.
343        assert_eq!(
344            EraFileType::from_filename("mainnet-00000-abcd1234-noproofs.ere"),
345            Some(EraFileType::Ere)
346        );
347        assert_eq!(
348            EraFileType::from_filename("mainnet-00000-abcd1234-noproofs.erae"),
349            Some(EraFileType::Ere)
350        );
351        assert_eq!(EraFileType::from_filename("mainnet-00000-abcd1234.txt"), None);
352    }
353}