Skip to main content

reth_cli_commands/download/
manifest_cmd.rs

1use crate::download::manifest::generate_manifest;
2use clap::Parser;
3use eyre::{Result, WrapErr};
4use reth_db::{mdbx::DatabaseArguments, open_db_read_only, tables, Database};
5use reth_db_api::transaction::DbTx;
6use reth_primitives_traits::FastInstant as Instant;
7use reth_stages_types::StageId;
8use reth_static_file_types::DEFAULT_BLOCKS_PER_STATIC_FILE;
9use std::path::PathBuf;
10use tracing::{info, warn};
11
12/// Generate modular chunk archives and a snapshot manifest from a source datadir.
13///
14/// Archive naming convention:
15///   - Chunked: `{component}-{start}-{end}.tar.zst` (e.g. `transactions-0-499999.tar.zst`)
16#[derive(Debug, Parser)]
17pub struct SnapshotManifestCommand {
18    /// Source datadir containing static files.
19    #[arg(long, short = 'd')]
20    source_datadir: PathBuf,
21
22    /// Optional base URL where archives will be hosted.
23    #[arg(long)]
24    base_url: Option<String>,
25
26    /// Output directory where chunk archives and manifest.json are written.
27    #[arg(long, short = 'o')]
28    output_dir: PathBuf,
29
30    /// Block number this snapshot was taken at.
31    ///
32    /// If omitted, this is inferred from the source datadir's `Finish` stage checkpoint.
33    #[arg(long)]
34    block: Option<u64>,
35
36    /// Chain ID.
37    #[arg(long, default_value = "1")]
38    chain_id: u64,
39
40    /// Blocks per archive file for chunked components.
41    ///
42    /// If omitted, this is inferred from header static file ranges in the source datadir.
43    #[arg(long)]
44    blocks_per_file: Option<u64>,
45}
46
47impl SnapshotManifestCommand {
48    pub fn execute(self) -> Result<()> {
49        let block = match self.block {
50            Some(block) => block,
51            None => infer_snapshot_block(&self.source_datadir)?,
52        };
53        let blocks_per_file = match self.blocks_per_file {
54            Some(blocks_per_file) => blocks_per_file,
55            None => infer_blocks_per_file(&self.source_datadir)?,
56        };
57
58        info!(target: "reth::cli",
59            dir = ?self.source_datadir,
60            output = ?self.output_dir,
61            block,
62            blocks_per_file,
63            "Packaging modular snapshot archives"
64        );
65        let start = Instant::now();
66        let manifest = generate_manifest(
67            &self.source_datadir,
68            &self.output_dir,
69            self.base_url.as_deref(),
70            block,
71            self.chain_id,
72            blocks_per_file,
73        )?;
74
75        let num_components = manifest.components.len();
76        let json = serde_json::to_string_pretty(&manifest)?;
77        let output = self.output_dir.join("manifest.json");
78        reth_fs_util::write(&output, &json)?;
79        info!(target: "reth::cli",
80            path = ?output,
81            components = num_components,
82            block = manifest.block,
83            elapsed = ?start.elapsed(),
84            "Manifest written"
85        );
86
87        Ok(())
88    }
89}
90
91fn infer_snapshot_block(source_datadir: &std::path::Path) -> Result<u64> {
92    if let Ok(block) = infer_snapshot_block_from_db(source_datadir) {
93        return Ok(block);
94    }
95
96    let block = infer_snapshot_block_from_headers(source_datadir)?;
97    warn!(
98        target: "reth::cli",
99        block,
100        "Could not read Finish stage checkpoint from source DB, using header static-file tip"
101    );
102    Ok(block)
103}
104
105fn infer_snapshot_block_from_db(source_datadir: &std::path::Path) -> Result<u64> {
106    let candidates = [source_datadir.join("db"), source_datadir.to_path_buf()];
107
108    for db_path in candidates {
109        if !db_path.exists() {
110            continue;
111        }
112
113        let db = match open_db_read_only(&db_path, DatabaseArguments::default()) {
114            Ok(db) => db,
115            Err(_) => continue,
116        };
117
118        let tx = db.tx()?;
119        if let Some(checkpoint) = tx.get::<tables::StageCheckpoints>(StageId::Finish.to_string())? {
120            return Ok(checkpoint.block_number);
121        }
122    }
123
124    eyre::bail!(
125        "Could not infer --block from source DB (Finish checkpoint missing); pass --block manually"
126    )
127}
128
129fn infer_snapshot_block_from_headers(source_datadir: &std::path::Path) -> Result<u64> {
130    let max_end = header_ranges(source_datadir)?
131        .into_iter()
132        .map(|(_, end)| end)
133        .max()
134        .ok_or_else(|| eyre::eyre!("No header static files found to infer --block"))?;
135    Ok(max_end)
136}
137
138fn infer_blocks_per_file(source_datadir: &std::path::Path) -> Result<u64> {
139    let mut inferred = None;
140    for (start, end) in header_ranges(source_datadir)? {
141        let span = end.saturating_sub(start).saturating_add(1);
142        if span == 0 {
143            continue;
144        }
145
146        if let Some(existing) = inferred {
147            if existing != span {
148                eyre::bail!(
149                    "Inconsistent header static file ranges; pass --blocks-per-file manually"
150                );
151            }
152        } else {
153            inferred = Some(span);
154        }
155    }
156
157    inferred.ok_or_else(|| {
158        eyre::eyre!(
159            "Could not infer --blocks-per-file from header static files; pass it manually (default is {DEFAULT_BLOCKS_PER_STATIC_FILE})"
160        )
161    })
162}
163
164fn header_ranges(source_datadir: &std::path::Path) -> Result<Vec<(u64, u64)>> {
165    let static_files_dir = source_datadir.join("static_files");
166    let static_files_dir =
167        if static_files_dir.exists() { static_files_dir } else { source_datadir.to_path_buf() };
168
169    let entries = std::fs::read_dir(&static_files_dir).wrap_err_with(|| {
170        format!("Failed to read static files directory: {}", static_files_dir.display())
171    })?;
172
173    let mut ranges = Vec::new();
174    for entry in entries {
175        let entry = entry?;
176        let file_name = entry.file_name();
177        let file_name = file_name.to_string_lossy();
178        if let Some(range) = parse_headers_range(&file_name) {
179            ranges.push(range);
180        }
181    }
182
183    Ok(ranges)
184}
185
186fn parse_headers_range(file_name: &str) -> Option<(u64, u64)> {
187    let remainder = file_name.strip_prefix("static_file_headers_")?;
188    let (start, end_with_suffix) = remainder.split_once('_')?;
189
190    let start = start.parse::<u64>().ok()?;
191    let end_digits: String = end_with_suffix.chars().take_while(|ch| ch.is_ascii_digit()).collect();
192    let end = end_digits.parse::<u64>().ok()?;
193
194    Some((start, end))
195}
196
197#[cfg(test)]
198mod tests {
199    use super::*;
200    use tempfile::tempdir;
201
202    #[test]
203    fn parse_headers_range_works_with_suffixes() {
204        assert_eq!(parse_headers_range("static_file_headers_0_499999"), Some((0, 499_999)));
205        assert_eq!(
206            parse_headers_range("static_file_headers_500000_999999.jar"),
207            Some((500_000, 999_999))
208        );
209        assert_eq!(parse_headers_range("static_file_transactions_0_499999"), None);
210    }
211
212    #[test]
213    fn infer_blocks_per_file_from_header_ranges() {
214        let dir = tempdir().unwrap();
215        let sf = dir.path().join("static_files");
216        std::fs::create_dir_all(&sf).unwrap();
217        std::fs::write(sf.join("static_file_headers_0_499999"), []).unwrap();
218        std::fs::write(sf.join("static_file_headers_500000_999999.jar"), []).unwrap();
219
220        assert_eq!(infer_blocks_per_file(dir.path()).unwrap(), 500_000);
221    }
222
223    #[test]
224    fn infer_snapshot_block_from_headers_uses_max_end() {
225        let dir = tempdir().unwrap();
226        let sf = dir.path().join("static_files");
227        std::fs::create_dir_all(&sf).unwrap();
228        std::fs::write(sf.join("static_file_headers_0_499999"), []).unwrap();
229        std::fs::write(sf.join("static_file_headers_500000_999999"), []).unwrap();
230
231        assert_eq!(infer_snapshot_block_from_headers(dir.path()).unwrap(), 999_999);
232    }
233}