Skip to main content

reth_cli_commands/download/
manifest_cmd.rs

1use crate::download::manifest::generate_manifest;
2use clap::Parser;
3use eyre::{Result, WrapErr};
4use reth_db::{mdbx::DatabaseArguments, open_db_read_only, tables, Database};
5use reth_db_api::transaction::DbTx;
6use reth_stages_types::StageId;
7use reth_static_file_types::DEFAULT_BLOCKS_PER_STATIC_FILE;
8use std::{path::PathBuf, time::Instant};
9use tracing::{info, warn};
10
11/// Generate modular chunk archives and a snapshot manifest from a source datadir.
12///
13/// Archive naming convention:
14///   - Chunked: `{component}-{start}-{end}.tar.zst` (e.g. `transactions-0-499999.tar.zst`)
15#[derive(Debug, Parser)]
16pub struct SnapshotManifestCommand {
17    /// Source datadir containing static files.
18    #[arg(long, short = 'd')]
19    source_datadir: PathBuf,
20
21    /// Optional base URL where archives will be hosted.
22    #[arg(long)]
23    base_url: Option<String>,
24
25    /// Output directory where chunk archives and manifest.json are written.
26    #[arg(long, short = 'o')]
27    output_dir: PathBuf,
28
29    /// Block number this snapshot was taken at.
30    ///
31    /// If omitted, this is inferred from the source datadir's `Finish` stage checkpoint.
32    #[arg(long)]
33    block: Option<u64>,
34
35    /// Chain ID.
36    #[arg(long, default_value = "1")]
37    chain_id: u64,
38
39    /// Blocks per archive file for chunked components.
40    ///
41    /// If omitted, this is inferred from header static file ranges in the source datadir.
42    #[arg(long)]
43    blocks_per_file: Option<u64>,
44}
45
46impl SnapshotManifestCommand {
47    pub fn execute(self) -> Result<()> {
48        let block = match self.block {
49            Some(block) => block,
50            None => infer_snapshot_block(&self.source_datadir)?,
51        };
52        let blocks_per_file = match self.blocks_per_file {
53            Some(blocks_per_file) => blocks_per_file,
54            None => infer_blocks_per_file(&self.source_datadir)?,
55        };
56
57        info!(target: "reth::cli",
58            dir = ?self.source_datadir,
59            output = ?self.output_dir,
60            block,
61            blocks_per_file,
62            "Packaging modular snapshot archives"
63        );
64        let start = Instant::now();
65        let manifest = generate_manifest(
66            &self.source_datadir,
67            &self.output_dir,
68            self.base_url.as_deref(),
69            block,
70            self.chain_id,
71            blocks_per_file,
72        )?;
73
74        let num_components = manifest.components.len();
75        let json = serde_json::to_string_pretty(&manifest)?;
76        let output = self.output_dir.join("manifest.json");
77        reth_fs_util::write(&output, &json)?;
78        info!(target: "reth::cli",
79            path = ?output,
80            components = num_components,
81            block = manifest.block,
82            elapsed = ?start.elapsed(),
83            "Manifest written"
84        );
85
86        Ok(())
87    }
88}
89
90fn infer_snapshot_block(source_datadir: &std::path::Path) -> Result<u64> {
91    if let Ok(block) = infer_snapshot_block_from_db(source_datadir) {
92        return Ok(block);
93    }
94
95    let block = infer_snapshot_block_from_headers(source_datadir)?;
96    warn!(
97        target: "reth::cli",
98        block,
99        "Could not read Finish stage checkpoint from source DB, using header static-file tip"
100    );
101    Ok(block)
102}
103
104fn infer_snapshot_block_from_db(source_datadir: &std::path::Path) -> Result<u64> {
105    let candidates = [source_datadir.join("db"), source_datadir.to_path_buf()];
106
107    for db_path in candidates {
108        if !db_path.exists() {
109            continue;
110        }
111
112        let db = match open_db_read_only(&db_path, DatabaseArguments::default()) {
113            Ok(db) => db,
114            Err(_) => continue,
115        };
116
117        let tx = db.tx()?;
118        if let Some(checkpoint) = tx.get::<tables::StageCheckpoints>(StageId::Finish.to_string())? {
119            return Ok(checkpoint.block_number);
120        }
121    }
122
123    eyre::bail!(
124        "Could not infer --block from source DB (Finish checkpoint missing); pass --block manually"
125    )
126}
127
128fn infer_snapshot_block_from_headers(source_datadir: &std::path::Path) -> Result<u64> {
129    let max_end = header_ranges(source_datadir)?
130        .into_iter()
131        .map(|(_, end)| end)
132        .max()
133        .ok_or_else(|| eyre::eyre!("No header static files found to infer --block"))?;
134    Ok(max_end)
135}
136
137fn infer_blocks_per_file(source_datadir: &std::path::Path) -> Result<u64> {
138    let mut inferred = None;
139    for (start, end) in header_ranges(source_datadir)? {
140        let span = end.saturating_sub(start).saturating_add(1);
141        if span == 0 {
142            continue;
143        }
144
145        if let Some(existing) = inferred {
146            if existing != span {
147                eyre::bail!(
148                    "Inconsistent header static file ranges; pass --blocks-per-file manually"
149                );
150            }
151        } else {
152            inferred = Some(span);
153        }
154    }
155
156    inferred.ok_or_else(|| {
157        eyre::eyre!(
158            "Could not infer --blocks-per-file from header static files; pass it manually (default is {DEFAULT_BLOCKS_PER_STATIC_FILE})"
159        )
160    })
161}
162
163fn header_ranges(source_datadir: &std::path::Path) -> Result<Vec<(u64, u64)>> {
164    let static_files_dir = source_datadir.join("static_files");
165    let static_files_dir =
166        if static_files_dir.exists() { static_files_dir } else { source_datadir.to_path_buf() };
167
168    let entries = std::fs::read_dir(&static_files_dir).wrap_err_with(|| {
169        format!("Failed to read static files directory: {}", static_files_dir.display())
170    })?;
171
172    let mut ranges = Vec::new();
173    for entry in entries {
174        let entry = entry?;
175        let file_name = entry.file_name();
176        let file_name = file_name.to_string_lossy();
177        if let Some(range) = parse_headers_range(&file_name) {
178            ranges.push(range);
179        }
180    }
181
182    Ok(ranges)
183}
184
185fn parse_headers_range(file_name: &str) -> Option<(u64, u64)> {
186    let remainder = file_name.strip_prefix("static_file_headers_")?;
187    let (start, end_with_suffix) = remainder.split_once('_')?;
188
189    let start = start.parse::<u64>().ok()?;
190    let end_digits: String = end_with_suffix.chars().take_while(|ch| ch.is_ascii_digit()).collect();
191    let end = end_digits.parse::<u64>().ok()?;
192
193    Some((start, end))
194}
195
196#[cfg(test)]
197mod tests {
198    use super::*;
199    use tempfile::tempdir;
200
201    #[test]
202    fn parse_headers_range_works_with_suffixes() {
203        assert_eq!(parse_headers_range("static_file_headers_0_499999"), Some((0, 499_999)));
204        assert_eq!(
205            parse_headers_range("static_file_headers_500000_999999.jar"),
206            Some((500_000, 999_999))
207        );
208        assert_eq!(parse_headers_range("static_file_transactions_0_499999"), None);
209    }
210
211    #[test]
212    fn infer_blocks_per_file_from_header_ranges() {
213        let dir = tempdir().unwrap();
214        let sf = dir.path().join("static_files");
215        std::fs::create_dir_all(&sf).unwrap();
216        std::fs::write(sf.join("static_file_headers_0_499999"), []).unwrap();
217        std::fs::write(sf.join("static_file_headers_500000_999999.jar"), []).unwrap();
218
219        assert_eq!(infer_blocks_per_file(dir.path()).unwrap(), 500_000);
220    }
221
222    #[test]
223    fn infer_snapshot_block_from_headers_uses_max_end() {
224        let dir = tempdir().unwrap();
225        let sf = dir.path().join("static_files");
226        std::fs::create_dir_all(&sf).unwrap();
227        std::fs::write(sf.join("static_file_headers_0_499999"), []).unwrap();
228        std::fs::write(sf.join("static_file_headers_500000_999999"), []).unwrap();
229
230        assert_eq!(infer_snapshot_block_from_headers(dir.path()).unwrap(), 999_999);
231    }
232}