reth_static_file_types/
segment.rs

1use crate::{BlockNumber, Compression};
2use alloc::{
3    format,
4    string::{String, ToString},
5};
6use alloy_primitives::TxNumber;
7use core::{ops::RangeInclusive, str::FromStr};
8use derive_more::Display;
9use serde::{Deserialize, Serialize};
10use strum::{AsRefStr, EnumString};
11
12#[derive(
13    Debug,
14    Copy,
15    Clone,
16    Eq,
17    PartialEq,
18    Hash,
19    Ord,
20    PartialOrd,
21    Deserialize,
22    Serialize,
23    EnumString,
24    AsRefStr,
25    Display,
26)]
27#[cfg_attr(feature = "clap", derive(clap::ValueEnum))]
28/// Segment of the data that can be moved to static files.
29pub enum StaticFileSegment {
30    #[strum(serialize = "headers")]
31    /// Static File segment responsible for the `CanonicalHeaders`, `Headers`,
32    /// `HeaderTerminalDifficulties` tables.
33    Headers,
34    #[strum(serialize = "transactions")]
35    /// Static File segment responsible for the `Transactions` table.
36    Transactions,
37    #[strum(serialize = "receipts")]
38    /// Static File segment responsible for the `Receipts` table.
39    Receipts,
40}
41
42impl StaticFileSegment {
43    /// Returns the segment as a string.
44    pub const fn as_str(&self) -> &'static str {
45        match self {
46            Self::Headers => "headers",
47            Self::Transactions => "transactions",
48            Self::Receipts => "receipts",
49        }
50    }
51
52    /// Returns an iterator over all segments.
53    pub fn iter() -> impl Iterator<Item = Self> {
54        // The order of segments is significant and must be maintained to ensure correctness.
55        [Self::Headers, Self::Transactions, Self::Receipts].into_iter()
56    }
57
58    /// Returns the default configuration of the segment.
59    pub const fn config(&self) -> SegmentConfig {
60        SegmentConfig { compression: Compression::Lz4 }
61    }
62
63    /// Returns the number of columns for the segment
64    pub const fn columns(&self) -> usize {
65        match self {
66            Self::Headers => 3,
67            Self::Transactions | Self::Receipts => 1,
68        }
69    }
70
71    /// Returns the default file name for the provided segment and range.
72    pub fn filename(&self, block_range: &SegmentRangeInclusive) -> String {
73        // ATTENTION: if changing the name format, be sure to reflect those changes in
74        // [`Self::parse_filename`].
75        format!("static_file_{}_{}_{}", self.as_ref(), block_range.start(), block_range.end())
76    }
77
78    /// Returns file name for the provided segment and range, alongside filters, compression.
79    pub fn filename_with_configuration(
80        &self,
81        compression: Compression,
82        block_range: &SegmentRangeInclusive,
83    ) -> String {
84        let prefix = self.filename(block_range);
85
86        let filters_name = "none".to_string();
87
88        // ATTENTION: if changing the name format, be sure to reflect those changes in
89        // [`Self::parse_filename`.]
90        format!("{prefix}_{}_{}", filters_name, compression.as_ref())
91    }
92
93    /// Parses a filename into a `StaticFileSegment` and its expected block range.
94    ///
95    /// The filename is expected to follow the format:
96    /// "`static_file`_{segment}_{`block_start`}_{`block_end`}". This function checks
97    /// for the correct prefix ("`static_file`"), and then parses the segment and the inclusive
98    /// ranges for blocks. It ensures that the start of each range is less than or equal to the
99    /// end.
100    ///
101    /// # Returns
102    /// - `Some((segment, block_range))` if parsing is successful and all conditions are met.
103    /// - `None` if any condition fails, such as an incorrect prefix, parsing error, or invalid
104    ///   range.
105    ///
106    /// # Note
107    /// This function is tightly coupled with the naming convention defined in [`Self::filename`].
108    /// Any changes in the filename format in `filename` should be reflected here.
109    pub fn parse_filename(name: &str) -> Option<(Self, SegmentRangeInclusive)> {
110        let mut parts = name.split('_');
111        if !(parts.next() == Some("static") && parts.next() == Some("file")) {
112            return None
113        }
114
115        let segment = Self::from_str(parts.next()?).ok()?;
116        let (block_start, block_end) = (parts.next()?.parse().ok()?, parts.next()?.parse().ok()?);
117
118        if block_start > block_end {
119            return None
120        }
121
122        Some((segment, SegmentRangeInclusive::new(block_start, block_end)))
123    }
124
125    /// Returns `true` if the segment is `StaticFileSegment::Headers`.
126    pub const fn is_headers(&self) -> bool {
127        matches!(self, Self::Headers)
128    }
129
130    /// Returns `true` if the segment is `StaticFileSegment::Receipts`.
131    pub const fn is_receipts(&self) -> bool {
132        matches!(self, Self::Receipts)
133    }
134
135    /// Returns `true` if a segment row is linked to a transaction.
136    pub const fn is_tx_based(&self) -> bool {
137        matches!(self, Self::Receipts | Self::Transactions)
138    }
139
140    /// Returns `true` if a segment row is linked to a block.
141    pub const fn is_block_based(&self) -> bool {
142        matches!(self, Self::Headers)
143    }
144}
145
146/// A segment header that contains information common to all segments. Used for storage.
147#[derive(Debug, Serialize, Deserialize, Eq, PartialEq, Hash, Clone)]
148pub struct SegmentHeader {
149    /// Defines the expected block range for a static file segment. This attribute is crucial for
150    /// scenarios where the file contains no data, allowing for a representation beyond a
151    /// simple `start..=start` range. It ensures clarity in differentiating between an empty file
152    /// and a file with a single block numbered 0.
153    expected_block_range: SegmentRangeInclusive,
154    /// Block range of data on the static file segment
155    block_range: Option<SegmentRangeInclusive>,
156    /// Transaction range of data of the static file segment
157    tx_range: Option<SegmentRangeInclusive>,
158    /// Segment type
159    segment: StaticFileSegment,
160}
161
162impl SegmentHeader {
163    /// Returns [`SegmentHeader`].
164    pub const fn new(
165        expected_block_range: SegmentRangeInclusive,
166        block_range: Option<SegmentRangeInclusive>,
167        tx_range: Option<SegmentRangeInclusive>,
168        segment: StaticFileSegment,
169    ) -> Self {
170        Self { expected_block_range, block_range, tx_range, segment }
171    }
172
173    /// Returns the static file segment kind.
174    pub const fn segment(&self) -> StaticFileSegment {
175        self.segment
176    }
177
178    /// Returns the block range.
179    pub const fn block_range(&self) -> Option<&SegmentRangeInclusive> {
180        self.block_range.as_ref()
181    }
182
183    /// Returns the transaction range.
184    pub const fn tx_range(&self) -> Option<&SegmentRangeInclusive> {
185        self.tx_range.as_ref()
186    }
187
188    /// The expected block start of the segment.
189    pub const fn expected_block_start(&self) -> BlockNumber {
190        self.expected_block_range.start()
191    }
192
193    /// The expected block end of the segment.
194    pub const fn expected_block_end(&self) -> BlockNumber {
195        self.expected_block_range.end()
196    }
197
198    /// Returns the first block number of the segment.
199    pub fn block_start(&self) -> Option<BlockNumber> {
200        self.block_range.as_ref().map(|b| b.start())
201    }
202
203    /// Returns the last block number of the segment.
204    pub fn block_end(&self) -> Option<BlockNumber> {
205        self.block_range.as_ref().map(|b| b.end())
206    }
207
208    /// Returns the first transaction number of the segment.
209    pub fn tx_start(&self) -> Option<TxNumber> {
210        self.tx_range.as_ref().map(|t| t.start())
211    }
212
213    /// Returns the last transaction number of the segment.
214    pub fn tx_end(&self) -> Option<TxNumber> {
215        self.tx_range.as_ref().map(|t| t.end())
216    }
217
218    /// Number of transactions.
219    pub fn tx_len(&self) -> Option<u64> {
220        self.tx_range.as_ref().map(|r| (r.end() + 1) - r.start())
221    }
222
223    /// Number of blocks.
224    pub fn block_len(&self) -> Option<u64> {
225        self.block_range.as_ref().map(|r| (r.end() + 1) - r.start())
226    }
227
228    /// Increments block end range depending on segment
229    pub const fn increment_block(&mut self) -> BlockNumber {
230        if let Some(block_range) = &mut self.block_range {
231            block_range.end += 1;
232            block_range.end
233        } else {
234            self.block_range = Some(SegmentRangeInclusive::new(
235                self.expected_block_start(),
236                self.expected_block_start(),
237            ));
238            self.expected_block_start()
239        }
240    }
241
242    /// Increments tx end range depending on segment
243    pub const fn increment_tx(&mut self) {
244        if self.segment.is_tx_based() {
245            if let Some(tx_range) = &mut self.tx_range {
246                tx_range.end += 1;
247            } else {
248                self.tx_range = Some(SegmentRangeInclusive::new(0, 0));
249            }
250        }
251    }
252
253    /// Removes `num` elements from end of tx or block range.
254    pub const fn prune(&mut self, num: u64) {
255        if self.segment.is_block_based() {
256            if let Some(range) = &mut self.block_range {
257                if num > range.end - range.start {
258                    self.block_range = None;
259                } else {
260                    range.end = range.end.saturating_sub(num);
261                }
262            };
263        } else if let Some(range) = &mut self.tx_range {
264            if num > range.end - range.start {
265                self.tx_range = None;
266            } else {
267                range.end = range.end.saturating_sub(num);
268            }
269        }
270    }
271
272    /// Sets a new `block_range`.
273    pub const fn set_block_range(&mut self, block_start: BlockNumber, block_end: BlockNumber) {
274        if let Some(block_range) = &mut self.block_range {
275            block_range.start = block_start;
276            block_range.end = block_end;
277        } else {
278            self.block_range = Some(SegmentRangeInclusive::new(block_start, block_end))
279        }
280    }
281
282    /// Sets a new `tx_range`.
283    pub const fn set_tx_range(&mut self, tx_start: TxNumber, tx_end: TxNumber) {
284        if let Some(tx_range) = &mut self.tx_range {
285            tx_range.start = tx_start;
286            tx_range.end = tx_end;
287        } else {
288            self.tx_range = Some(SegmentRangeInclusive::new(tx_start, tx_end))
289        }
290    }
291
292    /// Returns the row offset which depends on whether the segment is block or transaction based.
293    pub fn start(&self) -> Option<u64> {
294        if self.segment.is_block_based() {
295            return self.block_start()
296        }
297        self.tx_start()
298    }
299}
300
301/// Configuration used on the segment.
302#[derive(Debug, Clone, Copy)]
303pub struct SegmentConfig {
304    /// Compression used on the segment
305    pub compression: Compression,
306}
307
308/// Helper type to handle segment transaction and block INCLUSIVE ranges.
309///
310/// They can be modified on a hot loop, which makes the `std::ops::RangeInclusive` a poor fit.
311#[derive(Debug, Serialize, Deserialize, Eq, PartialEq, Hash, Clone, Copy)]
312pub struct SegmentRangeInclusive {
313    start: u64,
314    end: u64,
315}
316
317impl SegmentRangeInclusive {
318    /// Creates a new [`SegmentRangeInclusive`]
319    pub const fn new(start: u64, end: u64) -> Self {
320        Self { start, end }
321    }
322
323    /// Start of the inclusive range
324    pub const fn start(&self) -> u64 {
325        self.start
326    }
327
328    /// End of the inclusive range
329    pub const fn end(&self) -> u64 {
330        self.end
331    }
332}
333
334impl core::fmt::Display for SegmentRangeInclusive {
335    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
336        write!(f, "{}..={}", self.start, self.end)
337    }
338}
339
340impl From<RangeInclusive<u64>> for SegmentRangeInclusive {
341    fn from(value: RangeInclusive<u64>) -> Self {
342        Self { start: *value.start(), end: *value.end() }
343    }
344}
345
346impl From<&SegmentRangeInclusive> for RangeInclusive<u64> {
347    fn from(value: &SegmentRangeInclusive) -> Self {
348        value.start()..=value.end()
349    }
350}
351
352impl From<SegmentRangeInclusive> for RangeInclusive<u64> {
353    fn from(value: SegmentRangeInclusive) -> Self {
354        (&value).into()
355    }
356}
357
358#[cfg(test)]
359mod tests {
360    use super::*;
361    use alloy_primitives::hex;
362    use reth_nippy_jar::NippyJar;
363
364    #[test]
365    fn test_filename() {
366        let test_vectors = [
367            (StaticFileSegment::Headers, 2..=30, "static_file_headers_2_30", None),
368            (StaticFileSegment::Receipts, 30..=300, "static_file_receipts_30_300", None),
369            (
370                StaticFileSegment::Transactions,
371                1_123_233..=11_223_233,
372                "static_file_transactions_1123233_11223233",
373                None,
374            ),
375            (
376                StaticFileSegment::Headers,
377                2..=30,
378                "static_file_headers_2_30_none_lz4",
379                Some(Compression::Lz4),
380            ),
381            (
382                StaticFileSegment::Headers,
383                2..=30,
384                "static_file_headers_2_30_none_zstd",
385                Some(Compression::Zstd),
386            ),
387            (
388                StaticFileSegment::Headers,
389                2..=30,
390                "static_file_headers_2_30_none_zstd-dict",
391                Some(Compression::ZstdWithDictionary),
392            ),
393        ];
394
395        for (segment, block_range, filename, compression) in test_vectors {
396            let block_range: SegmentRangeInclusive = block_range.into();
397            if let Some(compression) = compression {
398                assert_eq!(
399                    segment.filename_with_configuration(compression, &block_range),
400                    filename
401                );
402            } else {
403                assert_eq!(segment.filename(&block_range), filename);
404            }
405
406            assert_eq!(StaticFileSegment::parse_filename(filename), Some((segment, block_range)));
407        }
408
409        assert_eq!(StaticFileSegment::parse_filename("static_file_headers_2"), None);
410        assert_eq!(StaticFileSegment::parse_filename("static_file_headers_"), None);
411
412        // roundtrip test
413        let dummy_range = SegmentRangeInclusive::new(123, 1230);
414        for segment in StaticFileSegment::iter() {
415            let filename = segment.filename(&dummy_range);
416            assert_eq!(Some((segment, dummy_range)), StaticFileSegment::parse_filename(&filename));
417        }
418    }
419
420    #[test]
421    fn test_segment_config_backwards() {
422        let headers = hex!(
423            "010000000000000000000000000000001fa10700000000000100000000000000001fa10700000000000000000000030000000000000020a107000000000001010000004a02000000000000"
424        );
425        let transactions = hex!(
426            "010000000000000000000000000000001fa10700000000000100000000000000001fa107000000000001000000000000000034a107000000000001000000010000000000000035a1070000000000004010000000000000"
427        );
428        let receipts = hex!(
429            "010000000000000000000000000000001fa10700000000000100000000000000000000000000000000000200000001000000000000000000000000000000000000000000000000"
430        );
431
432        {
433            let headers = NippyJar::<SegmentHeader>::load_from_reader(&headers[..]).unwrap();
434            assert_eq!(
435                &SegmentHeader {
436                    expected_block_range: SegmentRangeInclusive::new(0, 499999),
437                    block_range: Some(SegmentRangeInclusive::new(0, 499999)),
438                    tx_range: None,
439                    segment: StaticFileSegment::Headers,
440                },
441                headers.user_header()
442            );
443        }
444        {
445            let transactions =
446                NippyJar::<SegmentHeader>::load_from_reader(&transactions[..]).unwrap();
447            assert_eq!(
448                &SegmentHeader {
449                    expected_block_range: SegmentRangeInclusive::new(0, 499999),
450                    block_range: Some(SegmentRangeInclusive::new(0, 499999)),
451                    tx_range: Some(SegmentRangeInclusive::new(0, 500020)),
452                    segment: StaticFileSegment::Transactions,
453                },
454                transactions.user_header()
455            );
456        }
457        {
458            let receipts = NippyJar::<SegmentHeader>::load_from_reader(&receipts[..]).unwrap();
459            assert_eq!(
460                &SegmentHeader {
461                    expected_block_range: SegmentRangeInclusive::new(0, 499999),
462                    block_range: Some(SegmentRangeInclusive::new(0, 0)),
463                    tx_range: None,
464                    segment: StaticFileSegment::Receipts,
465                },
466                receipts.user_header()
467            );
468        }
469    }
470}