reth_static_file_types/
segment.rs

1use crate::{BlockNumber, Compression};
2use alloc::{format, string::String};
3use alloy_primitives::TxNumber;
4use core::{ops::RangeInclusive, str::FromStr};
5use serde::{Deserialize, Serialize};
6use strum::{EnumIs, EnumString};
7
8#[derive(
9    Debug,
10    Copy,
11    Clone,
12    Eq,
13    PartialEq,
14    Hash,
15    Ord,
16    PartialOrd,
17    Deserialize,
18    Serialize,
19    EnumString,
20    derive_more::Display,
21    EnumIs,
22)]
23#[strum(serialize_all = "kebab-case")]
24#[cfg_attr(feature = "clap", derive(clap::ValueEnum))]
25/// Segment of the data that can be moved to static files.
26pub enum StaticFileSegment {
27    /// Static File segment responsible for the `CanonicalHeaders`, `Headers`,
28    /// `HeaderTerminalDifficulties` tables.
29    Headers,
30    /// Static File segment responsible for the `Transactions` table.
31    Transactions,
32    /// Static File segment responsible for the `Receipts` table.
33    Receipts,
34    /// Static File segment responsible for the `TransactionSenders` table.
35    TransactionSenders,
36}
37
38impl StaticFileSegment {
39    /// Returns a string representation of the segment.
40    pub const fn as_str(&self) -> &'static str {
41        // `strum` doesn't generate a doc comment for `into_str` when using `IntoStaticStr` derive
42        // macro, so we need to manually implement it.
43        //
44        // NOTE: this name cannot have underscores in it, as underscores are used as delimiters in
45        // static file paths, for fetching static files for a specific block range
46        match self {
47            Self::Headers => "headers",
48            Self::Transactions => "transactions",
49            Self::Receipts => "receipts",
50            Self::TransactionSenders => "transaction-senders",
51        }
52    }
53
54    /// Returns an iterator over all segments.
55    pub fn iter() -> impl Iterator<Item = Self> {
56        // The order of segments is significant and must be maintained to ensure correctness.
57        [Self::Headers, Self::Transactions, Self::Receipts, Self::TransactionSenders].into_iter()
58    }
59
60    /// Returns the default configuration of the segment.
61    pub const fn config(&self) -> SegmentConfig {
62        SegmentConfig { compression: Compression::Lz4 }
63    }
64
65    /// Returns the number of columns for the segment
66    pub const fn columns(&self) -> usize {
67        match self {
68            Self::Headers => 3,
69            Self::Transactions | Self::Receipts | Self::TransactionSenders => 1,
70        }
71    }
72
73    /// Returns the default file name for the provided segment and range.
74    pub fn filename(&self, block_range: &SegmentRangeInclusive) -> String {
75        // ATTENTION: if changing the name format, be sure to reflect those changes in
76        // [`Self::parse_filename`].
77        format!("static_file_{}_{}_{}", self.as_str(), block_range.start(), block_range.end())
78    }
79
80    /// Returns file name for the provided segment and range, alongside filters, compression.
81    pub fn filename_with_configuration(
82        &self,
83        compression: Compression,
84        block_range: &SegmentRangeInclusive,
85    ) -> String {
86        let prefix = self.filename(block_range);
87
88        let filters_name = "none";
89
90        // ATTENTION: if changing the name format, be sure to reflect those changes in
91        // [`Self::parse_filename`.]
92        format!("{prefix}_{}_{}", filters_name, compression.as_ref())
93    }
94
95    /// Parses a filename into a `StaticFileSegment` and its expected block range.
96    ///
97    /// The filename is expected to follow the format:
98    /// "`static_file`_{segment}_{`block_start`}_{`block_end`}". This function checks
99    /// for the correct prefix ("`static_file`"), and then parses the segment and the inclusive
100    /// ranges for blocks. It ensures that the start of each range is less than or equal to the
101    /// end.
102    ///
103    /// # Returns
104    /// - `Some((segment, block_range))` if parsing is successful and all conditions are met.
105    /// - `None` if any condition fails, such as an incorrect prefix, parsing error, or invalid
106    ///   range.
107    ///
108    /// # Note
109    /// This function is tightly coupled with the naming convention defined in [`Self::filename`].
110    /// Any changes in the filename format in `filename` should be reflected here.
111    pub fn parse_filename(name: &str) -> Option<(Self, SegmentRangeInclusive)> {
112        let mut parts = name.split('_');
113        if !(parts.next() == Some("static") && parts.next() == Some("file")) {
114            return None
115        }
116
117        let segment = Self::from_str(parts.next()?).ok()?;
118        let (block_start, block_end) = (parts.next()?.parse().ok()?, parts.next()?.parse().ok()?);
119
120        if block_start > block_end {
121            return None
122        }
123
124        Some((segment, SegmentRangeInclusive::new(block_start, block_end)))
125    }
126
127    /// Returns `true` if a segment row is linked to a transaction.
128    pub const fn is_tx_based(&self) -> bool {
129        match self {
130            Self::Receipts | Self::Transactions | Self::TransactionSenders => true,
131            Self::Headers => false,
132        }
133    }
134
135    /// Returns `true` if a segment row is linked to a block.
136    pub const fn is_block_based(&self) -> bool {
137        match self {
138            Self::Headers => true,
139            Self::Receipts | Self::Transactions | Self::TransactionSenders => false,
140        }
141    }
142}
143
144/// A segment header that contains information common to all segments. Used for storage.
145#[derive(Debug, Serialize, Deserialize, Eq, PartialEq, Hash, Clone, Copy)]
146pub struct SegmentHeader {
147    /// Defines the expected block range for a static file segment. This attribute is crucial for
148    /// scenarios where the file contains no data, allowing for a representation beyond a
149    /// simple `start..=start` range. It ensures clarity in differentiating between an empty file
150    /// and a file with a single block numbered 0.
151    expected_block_range: SegmentRangeInclusive,
152    /// Block range of data on the static file segment
153    block_range: Option<SegmentRangeInclusive>,
154    /// Transaction range of data of the static file segment
155    tx_range: Option<SegmentRangeInclusive>,
156    /// Segment type
157    segment: StaticFileSegment,
158}
159
160impl SegmentHeader {
161    /// Returns [`SegmentHeader`].
162    pub const fn new(
163        expected_block_range: SegmentRangeInclusive,
164        block_range: Option<SegmentRangeInclusive>,
165        tx_range: Option<SegmentRangeInclusive>,
166        segment: StaticFileSegment,
167    ) -> Self {
168        Self { expected_block_range, block_range, tx_range, segment }
169    }
170
171    /// Returns the static file segment kind.
172    pub const fn segment(&self) -> StaticFileSegment {
173        self.segment
174    }
175
176    /// Returns the expected block range.
177    pub const fn expected_block_range(&self) -> SegmentRangeInclusive {
178        self.expected_block_range
179    }
180
181    /// Returns the block range.
182    pub const fn block_range(&self) -> Option<SegmentRangeInclusive> {
183        self.block_range
184    }
185
186    /// Returns the transaction range.
187    pub const fn tx_range(&self) -> Option<SegmentRangeInclusive> {
188        self.tx_range
189    }
190
191    /// The expected block start of the segment.
192    pub const fn expected_block_start(&self) -> BlockNumber {
193        self.expected_block_range.start()
194    }
195
196    /// The expected block end of the segment.
197    pub const fn expected_block_end(&self) -> BlockNumber {
198        self.expected_block_range.end()
199    }
200
201    /// Returns the first block number of the segment.
202    pub fn block_start(&self) -> Option<BlockNumber> {
203        self.block_range.as_ref().map(|b| b.start())
204    }
205
206    /// Returns the last block number of the segment.
207    pub fn block_end(&self) -> Option<BlockNumber> {
208        self.block_range.as_ref().map(|b| b.end())
209    }
210
211    /// Returns the first transaction number of the segment.
212    pub fn tx_start(&self) -> Option<TxNumber> {
213        self.tx_range.as_ref().map(|t| t.start())
214    }
215
216    /// Returns the last transaction number of the segment.
217    pub fn tx_end(&self) -> Option<TxNumber> {
218        self.tx_range.as_ref().map(|t| t.end())
219    }
220
221    /// Number of transactions.
222    pub fn tx_len(&self) -> Option<u64> {
223        self.tx_range.as_ref().map(|r| r.len())
224    }
225
226    /// Number of blocks.
227    pub fn block_len(&self) -> Option<u64> {
228        self.block_range.as_ref().map(|r| r.len())
229    }
230
231    /// Increments block end range depending on segment
232    pub const fn increment_block(&mut self) -> BlockNumber {
233        if let Some(block_range) = &mut self.block_range {
234            block_range.end += 1;
235            block_range.end
236        } else {
237            self.block_range = Some(SegmentRangeInclusive::new(
238                self.expected_block_start(),
239                self.expected_block_start(),
240            ));
241            self.expected_block_start()
242        }
243    }
244
245    /// Increments tx end range depending on segment
246    pub const fn increment_tx(&mut self) {
247        if self.segment.is_tx_based() {
248            if let Some(tx_range) = &mut self.tx_range {
249                tx_range.end += 1;
250            } else {
251                self.tx_range = Some(SegmentRangeInclusive::new(0, 0));
252            }
253        }
254    }
255
256    /// Removes `num` elements from end of tx or block range.
257    pub const fn prune(&mut self, num: u64) {
258        if self.segment.is_block_based() {
259            if let Some(range) = &mut self.block_range {
260                if num > range.end - range.start {
261                    self.block_range = None;
262                } else {
263                    range.end = range.end.saturating_sub(num);
264                }
265            };
266        } else if let Some(range) = &mut self.tx_range {
267            if num > range.end - range.start {
268                self.tx_range = None;
269            } else {
270                range.end = range.end.saturating_sub(num);
271            }
272        }
273    }
274
275    /// Sets a new `block_range`.
276    pub const fn set_block_range(&mut self, block_start: BlockNumber, block_end: BlockNumber) {
277        if let Some(block_range) = &mut self.block_range {
278            block_range.start = block_start;
279            block_range.end = block_end;
280        } else {
281            self.block_range = Some(SegmentRangeInclusive::new(block_start, block_end))
282        }
283    }
284
285    /// Sets a new `tx_range`.
286    pub const fn set_tx_range(&mut self, tx_start: TxNumber, tx_end: TxNumber) {
287        if let Some(tx_range) = &mut self.tx_range {
288            tx_range.start = tx_start;
289            tx_range.end = tx_end;
290        } else {
291            self.tx_range = Some(SegmentRangeInclusive::new(tx_start, tx_end))
292        }
293    }
294
295    /// Returns the row offset which depends on whether the segment is block or transaction based.
296    pub fn start(&self) -> Option<u64> {
297        if self.segment.is_block_based() {
298            return self.block_start()
299        }
300        self.tx_start()
301    }
302}
303
304/// Configuration used on the segment.
305#[derive(Debug, Clone, Copy)]
306pub struct SegmentConfig {
307    /// Compression used on the segment
308    pub compression: Compression,
309}
310
311/// Helper type to handle segment transaction and block INCLUSIVE ranges.
312///
313/// They can be modified on a hot loop, which makes the `std::ops::RangeInclusive` a poor fit.
314#[derive(Debug, Serialize, Deserialize, Eq, PartialEq, Hash, Clone, Copy)]
315pub struct SegmentRangeInclusive {
316    start: u64,
317    end: u64,
318}
319
320impl SegmentRangeInclusive {
321    /// Creates a new [`SegmentRangeInclusive`]
322    pub const fn new(start: u64, end: u64) -> Self {
323        Self { start, end }
324    }
325
326    /// Start of the inclusive range
327    pub const fn start(&self) -> u64 {
328        self.start
329    }
330
331    /// End of the inclusive range
332    pub const fn end(&self) -> u64 {
333        self.end
334    }
335
336    /// Returns the length of the inclusive range.
337    pub const fn len(&self) -> u64 {
338        self.end.saturating_sub(self.start).saturating_add(1)
339    }
340
341    /// Returns true if the range is empty.
342    pub const fn is_empty(&self) -> bool {
343        self.start > self.end
344    }
345}
346
347impl core::fmt::Display for SegmentRangeInclusive {
348    fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result {
349        write!(f, "{}..={}", self.start, self.end)
350    }
351}
352
353impl From<RangeInclusive<u64>> for SegmentRangeInclusive {
354    fn from(value: RangeInclusive<u64>) -> Self {
355        Self { start: *value.start(), end: *value.end() }
356    }
357}
358
359impl From<&SegmentRangeInclusive> for RangeInclusive<u64> {
360    fn from(value: &SegmentRangeInclusive) -> Self {
361        value.start()..=value.end()
362    }
363}
364
365impl From<SegmentRangeInclusive> for RangeInclusive<u64> {
366    fn from(value: SegmentRangeInclusive) -> Self {
367        (&value).into()
368    }
369}
370
371#[cfg(test)]
372mod tests {
373    use super::*;
374    use alloy_primitives::Bytes;
375    use reth_nippy_jar::NippyJar;
376    use std::env::temp_dir;
377
378    #[test]
379    fn test_filename() {
380        let test_vectors = [
381            (StaticFileSegment::Headers, 2..=30, "static_file_headers_2_30", None),
382            (StaticFileSegment::Receipts, 30..=300, "static_file_receipts_30_300", None),
383            (
384                StaticFileSegment::Transactions,
385                1_123_233..=11_223_233,
386                "static_file_transactions_1123233_11223233",
387                None,
388            ),
389            (
390                StaticFileSegment::Headers,
391                2..=30,
392                "static_file_headers_2_30_none_lz4",
393                Some(Compression::Lz4),
394            ),
395            (
396                StaticFileSegment::Headers,
397                2..=30,
398                "static_file_headers_2_30_none_zstd",
399                Some(Compression::Zstd),
400            ),
401            (
402                StaticFileSegment::Headers,
403                2..=30,
404                "static_file_headers_2_30_none_zstd-dict",
405                Some(Compression::ZstdWithDictionary),
406            ),
407        ];
408
409        for (segment, block_range, filename, compression) in test_vectors {
410            let block_range: SegmentRangeInclusive = block_range.into();
411            if let Some(compression) = compression {
412                assert_eq!(
413                    segment.filename_with_configuration(compression, &block_range),
414                    filename
415                );
416            } else {
417                assert_eq!(segment.filename(&block_range), filename);
418            }
419
420            assert_eq!(StaticFileSegment::parse_filename(filename), Some((segment, block_range)));
421        }
422
423        assert_eq!(StaticFileSegment::parse_filename("static_file_headers_2"), None);
424        assert_eq!(StaticFileSegment::parse_filename("static_file_headers_"), None);
425
426        // roundtrip test
427        let dummy_range = SegmentRangeInclusive::new(123, 1230);
428        for segment in StaticFileSegment::iter() {
429            let filename = segment.filename(&dummy_range);
430            assert_eq!(Some((segment, dummy_range)), StaticFileSegment::parse_filename(&filename));
431        }
432    }
433
434    #[test]
435    fn test_segment_config_serialization() {
436        let segments = vec![
437            SegmentHeader {
438                expected_block_range: SegmentRangeInclusive::new(0, 200),
439                block_range: Some(SegmentRangeInclusive::new(0, 100)),
440                tx_range: None,
441                segment: StaticFileSegment::Headers,
442            },
443            SegmentHeader {
444                expected_block_range: SegmentRangeInclusive::new(0, 200),
445                block_range: None,
446                tx_range: Some(SegmentRangeInclusive::new(0, 300)),
447                segment: StaticFileSegment::Transactions,
448            },
449            SegmentHeader {
450                expected_block_range: SegmentRangeInclusive::new(0, 200),
451                block_range: Some(SegmentRangeInclusive::new(0, 100)),
452                tx_range: Some(SegmentRangeInclusive::new(0, 300)),
453                segment: StaticFileSegment::Receipts,
454            },
455            SegmentHeader {
456                expected_block_range: SegmentRangeInclusive::new(0, 200),
457                block_range: Some(SegmentRangeInclusive::new(0, 100)),
458                tx_range: Some(SegmentRangeInclusive::new(0, 300)),
459                segment: StaticFileSegment::TransactionSenders,
460            },
461        ];
462        // Check that we test all segments
463        assert_eq!(
464            segments.iter().map(|segment| segment.segment()).collect::<Vec<_>>(),
465            StaticFileSegment::iter().collect::<Vec<_>>()
466        );
467
468        for header in segments {
469            let segment_jar = NippyJar::new(1, &temp_dir(), header);
470            let mut serialized = Vec::new();
471            segment_jar.save_to_writer(&mut serialized).unwrap();
472
473            let deserialized =
474                NippyJar::<SegmentHeader>::load_from_reader(&serialized[..]).unwrap();
475            assert_eq!(deserialized.user_header(), segment_jar.user_header());
476
477            insta::assert_snapshot!(header.segment().to_string(), Bytes::from(serialized));
478        }
479    }
480
481    /// Used in filename writing/parsing
482    #[test]
483    fn test_static_file_segment_str_roundtrip() {
484        for segment in StaticFileSegment::iter() {
485            let static_str = segment.as_str();
486            assert_eq!(StaticFileSegment::from_str(static_str).unwrap(), segment);
487
488            let expected_str = match segment {
489                StaticFileSegment::Headers => "headers",
490                StaticFileSegment::Transactions => "transactions",
491                StaticFileSegment::Receipts => "receipts",
492                StaticFileSegment::TransactionSenders => "transaction-senders",
493            };
494            assert_eq!(static_str, expected_str);
495        }
496    }
497
498    /// Used in segment headers serialize/deserialize
499    #[test]
500    fn test_static_file_segment_serde_roundtrip() {
501        for segment in StaticFileSegment::iter() {
502            let ser = serde_json::to_string(&segment).unwrap();
503            assert_eq!(serde_json::from_str::<StaticFileSegment>(&ser).unwrap(), segment);
504
505            let expected_str = match segment {
506                StaticFileSegment::Headers => "Headers",
507                StaticFileSegment::Transactions => "Transactions",
508                StaticFileSegment::Receipts => "Receipts",
509                StaticFileSegment::TransactionSenders => "TransactionSenders",
510            };
511            assert_eq!(ser, format!("\"{expected_str}\""));
512        }
513    }
514}