reth_static_file_types/
segment.rs

1use crate::{BlockNumber, Compression};
2use alloc::{
3    format,
4    string::{String, ToString},
5};
6use alloy_primitives::TxNumber;
7use core::{ops::RangeInclusive, str::FromStr};
8use derive_more::Display;
9use serde::{Deserialize, Serialize};
10use strum::{AsRefStr, EnumString};
11
12#[derive(
13    Debug,
14    Copy,
15    Clone,
16    Eq,
17    PartialEq,
18    Hash,
19    Ord,
20    PartialOrd,
21    Deserialize,
22    Serialize,
23    EnumString,
24    AsRefStr,
25    Display,
26)]
27#[cfg_attr(feature = "clap", derive(clap::ValueEnum))]
28/// Segment of the data that can be moved to static files.
29pub enum StaticFileSegment {
30    #[strum(serialize = "headers")]
31    /// Static File segment responsible for the `CanonicalHeaders`, `Headers`,
32    /// `HeaderTerminalDifficulties` tables.
33    Headers,
34    #[strum(serialize = "transactions")]
35    /// Static File segment responsible for the `Transactions` table.
36    Transactions,
37    #[strum(serialize = "receipts")]
38    /// Static File segment responsible for the `Receipts` table.
39    Receipts,
40    #[strum(serialize = "blockmeta")]
41    /// Static File segment responsible for the `BlockBodyIndices`, `BlockOmmers`,
42    /// `BlockWithdrawals` tables.
43    BlockMeta,
44}
45
46impl StaticFileSegment {
47    /// Returns the segment as a string.
48    pub const fn as_str(&self) -> &'static str {
49        match self {
50            Self::Headers => "headers",
51            Self::Transactions => "transactions",
52            Self::Receipts => "receipts",
53            Self::BlockMeta => "blockmeta",
54        }
55    }
56
57    /// Returns an iterator over all segments.
58    pub fn iter() -> impl Iterator<Item = Self> {
59        // The order of segments is significant and must be maintained to ensure correctness. For
60        // example, Transactions require BlockBodyIndices from Blockmeta to be sound.
61        [Self::Headers, Self::BlockMeta, Self::Transactions, Self::Receipts].into_iter()
62    }
63
64    /// Returns the default configuration of the segment.
65    pub const fn config(&self) -> SegmentConfig {
66        SegmentConfig { compression: Compression::Lz4 }
67    }
68
69    /// Returns the number of columns for the segment
70    pub const fn columns(&self) -> usize {
71        match self {
72            Self::Headers | Self::BlockMeta => 3,
73            Self::Transactions | Self::Receipts => 1,
74        }
75    }
76
77    /// Returns the default file name for the provided segment and range.
78    pub fn filename(&self, block_range: &SegmentRangeInclusive) -> String {
79        // ATTENTION: if changing the name format, be sure to reflect those changes in
80        // [`Self::parse_filename`].
81        format!("static_file_{}_{}_{}", self.as_ref(), block_range.start(), block_range.end())
82    }
83
84    /// Returns file name for the provided segment and range, alongside filters, compression.
85    pub fn filename_with_configuration(
86        &self,
87        compression: Compression,
88        block_range: &SegmentRangeInclusive,
89    ) -> String {
90        let prefix = self.filename(block_range);
91
92        let filters_name = "none".to_string();
93
94        // ATTENTION: if changing the name format, be sure to reflect those changes in
95        // [`Self::parse_filename`.]
96        format!("{prefix}_{}_{}", filters_name, compression.as_ref())
97    }
98
99    /// Parses a filename into a `StaticFileSegment` and its expected block range.
100    ///
101    /// The filename is expected to follow the format:
102    /// "`static_file`_{segment}_{`block_start`}_{`block_end`}". This function checks
103    /// for the correct prefix ("`static_file`"), and then parses the segment and the inclusive
104    /// ranges for blocks. It ensures that the start of each range is less than or equal to the
105    /// end.
106    ///
107    /// # Returns
108    /// - `Some((segment, block_range))` if parsing is successful and all conditions are met.
109    /// - `None` if any condition fails, such as an incorrect prefix, parsing error, or invalid
110    ///   range.
111    ///
112    /// # Note
113    /// This function is tightly coupled with the naming convention defined in [`Self::filename`].
114    /// Any changes in the filename format in `filename` should be reflected here.
115    pub fn parse_filename(name: &str) -> Option<(Self, SegmentRangeInclusive)> {
116        let mut parts = name.split('_');
117        if !(parts.next() == Some("static") && parts.next() == Some("file")) {
118            return None
119        }
120
121        let segment = Self::from_str(parts.next()?).ok()?;
122        let (block_start, block_end) = (parts.next()?.parse().ok()?, parts.next()?.parse().ok()?);
123
124        if block_start > block_end {
125            return None
126        }
127
128        Some((segment, SegmentRangeInclusive::new(block_start, block_end)))
129    }
130
131    /// Returns `true` if the segment is `StaticFileSegment::Headers`.
132    pub const fn is_headers(&self) -> bool {
133        matches!(self, Self::Headers)
134    }
135
136    /// Returns `true` if the segment is `StaticFileSegment::BlockMeta`.
137    pub const fn is_block_meta(&self) -> bool {
138        matches!(self, Self::BlockMeta)
139    }
140
141    /// Returns `true` if the segment is `StaticFileSegment::Receipts`.
142    pub const fn is_receipts(&self) -> bool {
143        matches!(self, Self::Receipts)
144    }
145
146    /// Returns `true` if a segment row is linked to a transaction.
147    pub const fn is_tx_based(&self) -> bool {
148        matches!(self, Self::Receipts | Self::Transactions)
149    }
150
151    /// Returns `true` if a segment row is linked to a block.
152    pub const fn is_block_based(&self) -> bool {
153        matches!(self, Self::Headers | Self::BlockMeta)
154    }
155}
156
157/// A segment header that contains information common to all segments. Used for storage.
158#[derive(Debug, Serialize, Deserialize, Eq, PartialEq, Hash, Clone)]
159pub struct SegmentHeader {
160    /// Defines the expected block range for a static file segment. This attribute is crucial for
161    /// scenarios where the file contains no data, allowing for a representation beyond a
162    /// simple `start..=start` range. It ensures clarity in differentiating between an empty file
163    /// and a file with a single block numbered 0.
164    expected_block_range: SegmentRangeInclusive,
165    /// Block range of data on the static file segment
166    block_range: Option<SegmentRangeInclusive>,
167    /// Transaction range of data of the static file segment
168    tx_range: Option<SegmentRangeInclusive>,
169    /// Segment type
170    segment: StaticFileSegment,
171}
172
173impl SegmentHeader {
174    /// Returns [`SegmentHeader`].
175    pub const fn new(
176        expected_block_range: SegmentRangeInclusive,
177        block_range: Option<SegmentRangeInclusive>,
178        tx_range: Option<SegmentRangeInclusive>,
179        segment: StaticFileSegment,
180    ) -> Self {
181        Self { expected_block_range, block_range, tx_range, segment }
182    }
183
184    /// Returns the static file segment kind.
185    pub const fn segment(&self) -> StaticFileSegment {
186        self.segment
187    }
188
189    /// Returns the block range.
190    pub const fn block_range(&self) -> Option<&SegmentRangeInclusive> {
191        self.block_range.as_ref()
192    }
193
194    /// Returns the transaction range.
195    pub const fn tx_range(&self) -> Option<&SegmentRangeInclusive> {
196        self.tx_range.as_ref()
197    }
198
199    /// The expected block start of the segment.
200    pub const fn expected_block_start(&self) -> BlockNumber {
201        self.expected_block_range.start()
202    }
203
204    /// The expected block end of the segment.
205    pub const fn expected_block_end(&self) -> BlockNumber {
206        self.expected_block_range.end()
207    }
208
209    /// Returns the first block number of the segment.
210    pub fn block_start(&self) -> Option<BlockNumber> {
211        self.block_range.as_ref().map(|b| b.start())
212    }
213
214    /// Returns the last block number of the segment.
215    pub fn block_end(&self) -> Option<BlockNumber> {
216        self.block_range.as_ref().map(|b| b.end())
217    }
218
219    /// Returns the first transaction number of the segment.
220    pub fn tx_start(&self) -> Option<TxNumber> {
221        self.tx_range.as_ref().map(|t| t.start())
222    }
223
224    /// Returns the last transaction number of the segment.
225    pub fn tx_end(&self) -> Option<TxNumber> {
226        self.tx_range.as_ref().map(|t| t.end())
227    }
228
229    /// Number of transactions.
230    pub fn tx_len(&self) -> Option<u64> {
231        self.tx_range.as_ref().map(|r| (r.end() + 1) - r.start())
232    }
233
234    /// Number of blocks.
235    pub fn block_len(&self) -> Option<u64> {
236        self.block_range.as_ref().map(|r| (r.end() + 1) - r.start())
237    }
238
239    /// Increments block end range depending on segment
240    pub fn increment_block(&mut self) -> BlockNumber {
241        if let Some(block_range) = &mut self.block_range {
242            block_range.end += 1;
243            block_range.end
244        } else {
245            self.block_range = Some(SegmentRangeInclusive::new(
246                self.expected_block_start(),
247                self.expected_block_start(),
248            ));
249            self.expected_block_start()
250        }
251    }
252
253    /// Increments tx end range depending on segment
254    pub fn increment_tx(&mut self) {
255        if self.segment.is_tx_based() {
256            if let Some(tx_range) = &mut self.tx_range {
257                tx_range.end += 1;
258            } else {
259                self.tx_range = Some(SegmentRangeInclusive::new(0, 0));
260            }
261        }
262    }
263
264    /// Removes `num` elements from end of tx or block range.
265    pub fn prune(&mut self, num: u64) {
266        if self.segment.is_block_based() {
267            if let Some(range) = &mut self.block_range {
268                if num > range.end - range.start {
269                    self.block_range = None;
270                } else {
271                    range.end = range.end.saturating_sub(num);
272                }
273            };
274        } else if let Some(range) = &mut self.tx_range {
275            if num > range.end - range.start {
276                self.tx_range = None;
277            } else {
278                range.end = range.end.saturating_sub(num);
279            }
280        }
281    }
282
283    /// Sets a new `block_range`.
284    pub fn set_block_range(&mut self, block_start: BlockNumber, block_end: BlockNumber) {
285        if let Some(block_range) = &mut self.block_range {
286            block_range.start = block_start;
287            block_range.end = block_end;
288        } else {
289            self.block_range = Some(SegmentRangeInclusive::new(block_start, block_end))
290        }
291    }
292
293    /// Sets a new `tx_range`.
294    pub fn set_tx_range(&mut self, tx_start: TxNumber, tx_end: TxNumber) {
295        if let Some(tx_range) = &mut self.tx_range {
296            tx_range.start = tx_start;
297            tx_range.end = tx_end;
298        } else {
299            self.tx_range = Some(SegmentRangeInclusive::new(tx_start, tx_end))
300        }
301    }
302
303    /// Returns the row offset which depends on whether the segment is block or transaction based.
304    pub fn start(&self) -> Option<u64> {
305        if self.segment.is_block_based() {
306            return self.block_start()
307        }
308        self.tx_start()
309    }
310}
311
312/// Configuration used on the segment.
313#[derive(Debug, Clone, Copy)]
314pub struct SegmentConfig {
315    /// Compression used on the segment
316    pub compression: Compression,
317}
318
319/// Helper type to handle segment transaction and block INCLUSIVE ranges.
320///
321/// They can be modified on a hot loop, which makes the `std::ops::RangeInclusive` a poor fit.
322#[derive(Debug, Serialize, Deserialize, Eq, PartialEq, Hash, Clone, Copy)]
323pub struct SegmentRangeInclusive {
324    start: u64,
325    end: u64,
326}
327
328impl SegmentRangeInclusive {
329    /// Creates a new [`SegmentRangeInclusive`]
330    pub const fn new(start: u64, end: u64) -> Self {
331        Self { start, end }
332    }
333
334    /// Start of the inclusive range
335    pub const fn start(&self) -> u64 {
336        self.start
337    }
338
339    /// End of the inclusive range
340    pub const fn end(&self) -> u64 {
341        self.end
342    }
343}
344
345impl core::fmt::Display for SegmentRangeInclusive {
346    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
347        write!(f, "{}..={}", self.start, self.end)
348    }
349}
350
351impl From<RangeInclusive<u64>> for SegmentRangeInclusive {
352    fn from(value: RangeInclusive<u64>) -> Self {
353        Self { start: *value.start(), end: *value.end() }
354    }
355}
356
357impl From<&SegmentRangeInclusive> for RangeInclusive<u64> {
358    fn from(value: &SegmentRangeInclusive) -> Self {
359        value.start()..=value.end()
360    }
361}
362
363impl From<SegmentRangeInclusive> for RangeInclusive<u64> {
364    fn from(value: SegmentRangeInclusive) -> Self {
365        (&value).into()
366    }
367}
368
369#[cfg(test)]
370mod tests {
371    use super::*;
372    use alloy_primitives::hex;
373    use reth_nippy_jar::NippyJar;
374
375    #[test]
376    fn test_filename() {
377        let test_vectors = [
378            (StaticFileSegment::Headers, 2..=30, "static_file_headers_2_30", None),
379            (StaticFileSegment::Receipts, 30..=300, "static_file_receipts_30_300", None),
380            (
381                StaticFileSegment::Transactions,
382                1_123_233..=11_223_233,
383                "static_file_transactions_1123233_11223233",
384                None,
385            ),
386            (
387                StaticFileSegment::Headers,
388                2..=30,
389                "static_file_headers_2_30_none_lz4",
390                Some(Compression::Lz4),
391            ),
392            (
393                StaticFileSegment::Headers,
394                2..=30,
395                "static_file_headers_2_30_none_zstd",
396                Some(Compression::Zstd),
397            ),
398            (
399                StaticFileSegment::Headers,
400                2..=30,
401                "static_file_headers_2_30_none_zstd-dict",
402                Some(Compression::ZstdWithDictionary),
403            ),
404        ];
405
406        for (segment, block_range, filename, compression) in test_vectors {
407            let block_range: SegmentRangeInclusive = block_range.into();
408            if let Some(compression) = compression {
409                assert_eq!(
410                    segment.filename_with_configuration(compression, &block_range),
411                    filename
412                );
413            } else {
414                assert_eq!(segment.filename(&block_range), filename);
415            }
416
417            assert_eq!(StaticFileSegment::parse_filename(filename), Some((segment, block_range)));
418        }
419
420        assert_eq!(StaticFileSegment::parse_filename("static_file_headers_2"), None);
421        assert_eq!(StaticFileSegment::parse_filename("static_file_headers_"), None);
422
423        // roundtrip test
424        let dummy_range = SegmentRangeInclusive::new(123, 1230);
425        for segment in StaticFileSegment::iter() {
426            let filename = segment.filename(&dummy_range);
427            assert_eq!(Some((segment, dummy_range)), StaticFileSegment::parse_filename(&filename));
428        }
429    }
430
431    #[test]
432    fn test_segment_config_backwards() {
433        let headers = hex!("010000000000000000000000000000001fa10700000000000100000000000000001fa10700000000000000000000030000000000000020a107000000000001010000004a02000000000000");
434        let transactions = hex!("010000000000000000000000000000001fa10700000000000100000000000000001fa107000000000001000000000000000034a107000000000001000000010000000000000035a1070000000000004010000000000000");
435        let receipts = hex!("010000000000000000000000000000001fa10700000000000100000000000000000000000000000000000200000001000000000000000000000000000000000000000000000000");
436
437        {
438            let headers = NippyJar::<SegmentHeader>::load_from_reader(&headers[..]).unwrap();
439            assert_eq!(
440                &SegmentHeader {
441                    expected_block_range: SegmentRangeInclusive::new(0, 499999),
442                    block_range: Some(SegmentRangeInclusive::new(0, 499999)),
443                    tx_range: None,
444                    segment: StaticFileSegment::Headers,
445                },
446                headers.user_header()
447            );
448        }
449        {
450            let transactions =
451                NippyJar::<SegmentHeader>::load_from_reader(&transactions[..]).unwrap();
452            assert_eq!(
453                &SegmentHeader {
454                    expected_block_range: SegmentRangeInclusive::new(0, 499999),
455                    block_range: Some(SegmentRangeInclusive::new(0, 499999)),
456                    tx_range: Some(SegmentRangeInclusive::new(0, 500020)),
457                    segment: StaticFileSegment::Transactions,
458                },
459                transactions.user_header()
460            );
461        }
462        {
463            let receipts = NippyJar::<SegmentHeader>::load_from_reader(&receipts[..]).unwrap();
464            assert_eq!(
465                &SegmentHeader {
466                    expected_block_range: SegmentRangeInclusive::new(0, 499999),
467                    block_range: Some(SegmentRangeInclusive::new(0, 0)),
468                    tx_range: None,
469                    segment: StaticFileSegment::Receipts,
470                },
471                receipts.user_header()
472            );
473        }
474    }
475}