reth_cli_commands/db/
diff.rs

1use clap::Parser;
2use reth_db::{open_db_read_only, tables_to_generic, DatabaseEnv};
3use reth_db_api::{
4    cursor::DbCursorRO, database::Database, table::Table, transaction::DbTx, Tables,
5};
6use reth_db_common::DbTool;
7use reth_node_builder::{NodeTypes, NodeTypesWithDBAdapter};
8use reth_node_core::{
9    args::DatabaseArgs,
10    dirs::{DataDirPath, PlatformPath},
11};
12use std::{
13    collections::HashMap,
14    fmt::Debug,
15    fs::{self, File},
16    hash::Hash,
17    io::Write,
18    path::{Path, PathBuf},
19    sync::Arc,
20};
21use tracing::{info, warn};
22
23#[derive(Parser, Debug)]
24/// The arguments for the `reth db diff` command
25pub struct Command {
26    /// The path to the data dir for all reth files and subdirectories.
27    #[arg(long, verbatim_doc_comment)]
28    secondary_datadir: PlatformPath<DataDirPath>,
29
30    /// Arguments for the second database
31    #[command(flatten)]
32    second_db: DatabaseArgs,
33
34    /// The table name to diff. If not specified, all tables are diffed.
35    #[arg(long, verbatim_doc_comment)]
36    table: Option<Tables>,
37
38    /// The output directory for the diff report.
39    #[arg(long, verbatim_doc_comment)]
40    output: PlatformPath<PathBuf>,
41}
42
43impl Command {
44    /// Execute the `db diff` command.
45    ///
46    /// This first opens the `db/` folder from the secondary datadir, where the second database is
47    /// opened read-only.
48    ///
49    /// The tool will then iterate through all key-value pairs for the primary and secondary
50    /// databases. The value for each key will be compared with its corresponding value in the
51    /// other database. If the values are different, a discrepancy will be recorded in-memory. If
52    /// one key is present in one database but not the other, this will be recorded as an "extra
53    /// element" for that database.
54    ///
55    /// The discrepancies and extra elements, along with a brief summary of the diff results are
56    /// then written to a file in the output directory.
57    pub fn execute<T: NodeTypes>(
58        self,
59        tool: &DbTool<NodeTypesWithDBAdapter<T, Arc<DatabaseEnv>>>,
60    ) -> eyre::Result<()> {
61        warn!("Make sure the node is not running when running `reth db diff`!");
62        // open second db
63        let second_db_path: PathBuf = self.secondary_datadir.join("db").into();
64        let second_db = open_db_read_only(&second_db_path, self.second_db.database_args())?;
65
66        let tables = match &self.table {
67            Some(table) => std::slice::from_ref(table),
68            None => Tables::ALL,
69        };
70
71        for table in tables {
72            let mut primary_tx = tool.provider_factory.db_ref().tx()?;
73            let mut secondary_tx = second_db.tx()?;
74
75            // disable long read transaction safety, since this will run for a while and it's
76            // expected that the node is not running
77            primary_tx.disable_long_read_transaction_safety();
78            secondary_tx.disable_long_read_transaction_safety();
79
80            let output_dir = self.output.clone();
81            tables_to_generic!(table, |Table| find_diffs::<Table>(
82                primary_tx,
83                secondary_tx,
84                output_dir
85            ))?;
86        }
87
88        Ok(())
89    }
90}
91
92/// Find diffs for a table, then analyzing the result
93fn find_diffs<T: Table>(
94    primary_tx: impl DbTx,
95    secondary_tx: impl DbTx,
96    output_dir: impl AsRef<Path>,
97) -> eyre::Result<()>
98where
99    T::Key: Hash,
100    T::Value: PartialEq,
101{
102    let table = T::NAME;
103
104    info!("Analyzing table {table}...");
105    let result = find_diffs_advanced::<T>(&primary_tx, &secondary_tx)?;
106    info!("Done analyzing table {table}!");
107
108    // Pretty info summary header: newline then header
109    info!("");
110    info!("Diff results for {table}:");
111
112    // create directory and open file
113    fs::create_dir_all(output_dir.as_ref())?;
114    let file_name = format!("{table}.txt");
115    let mut file = File::create(output_dir.as_ref().join(file_name.clone()))?;
116
117    // analyze the result and print some stats
118    let discrepancies = result.discrepancies.len();
119    let extra_elements = result.extra_elements.len();
120
121    // Make a pretty summary header for the table
122    writeln!(file, "Diff results for {table}")?;
123
124    if discrepancies > 0 {
125        // write to file
126        writeln!(file, "Found {discrepancies} discrepancies in table {table}")?;
127
128        // also print to info
129        info!("Found {discrepancies} discrepancies in table {table}");
130    } else {
131        // write to file
132        writeln!(file, "No discrepancies found in table {table}")?;
133
134        // also print to info
135        info!("No discrepancies found in table {table}");
136    }
137
138    if extra_elements > 0 {
139        // write to file
140        writeln!(file, "Found {extra_elements} extra elements in table {table}")?;
141
142        // also print to info
143        info!("Found {extra_elements} extra elements in table {table}");
144    } else {
145        writeln!(file, "No extra elements found in table {table}")?;
146
147        // also print to info
148        info!("No extra elements found in table {table}");
149    }
150
151    info!("Writing diff results for {table} to {file_name}...");
152
153    if discrepancies > 0 {
154        writeln!(file, "Discrepancies:")?;
155    }
156
157    for discrepancy in result.discrepancies.values() {
158        writeln!(file, "{discrepancy:?}")?;
159    }
160
161    if extra_elements > 0 {
162        writeln!(file, "Extra elements:")?;
163    }
164
165    for extra_element in result.extra_elements.values() {
166        writeln!(file, "{extra_element:?}")?;
167    }
168
169    let full_file_name = output_dir.as_ref().join(file_name);
170    info!("Done writing diff results for {table} to {}", full_file_name.display());
171    Ok(())
172}
173
174/// This diff algorithm is slightly different, it will walk _each_ table, cross-checking for the
175/// element in the other table.
176fn find_diffs_advanced<T: Table>(
177    primary_tx: &impl DbTx,
178    secondary_tx: &impl DbTx,
179) -> eyre::Result<TableDiffResult<T>>
180where
181    T::Value: PartialEq,
182    T::Key: Hash,
183{
184    // initialize the zipped walker
185    let mut primary_zip_cursor =
186        primary_tx.cursor_read::<T>().expect("Was not able to obtain a cursor.");
187    let primary_walker = primary_zip_cursor.walk(None)?;
188
189    let mut secondary_zip_cursor =
190        secondary_tx.cursor_read::<T>().expect("Was not able to obtain a cursor.");
191    let secondary_walker = secondary_zip_cursor.walk(None)?;
192    let zipped_cursor = primary_walker.zip(secondary_walker);
193
194    // initialize the cursors for seeking when we are cross checking elements
195    let mut primary_cursor =
196        primary_tx.cursor_read::<T>().expect("Was not able to obtain a cursor.");
197
198    let mut secondary_cursor =
199        secondary_tx.cursor_read::<T>().expect("Was not able to obtain a cursor.");
200
201    let mut result = TableDiffResult::<T>::default();
202
203    // this loop will walk both tables, cross-checking for the element in the other table.
204    // it basically just loops through both tables at the same time. if the keys are different, it
205    // will check each key in the other table. if the keys are the same, it will compare the
206    // values
207    for (primary_entry, secondary_entry) in zipped_cursor {
208        let (primary_key, primary_value) = primary_entry?;
209        let (secondary_key, secondary_value) = secondary_entry?;
210
211        if primary_key != secondary_key {
212            // if the keys are different, we need to check if the key is in the other table
213            let crossed_secondary =
214                secondary_cursor.seek_exact(primary_key.clone())?.map(|(_, value)| value);
215            result.try_push_discrepancy(
216                primary_key.clone(),
217                Some(primary_value),
218                crossed_secondary,
219            );
220
221            // now do the same for the primary table
222            let crossed_primary =
223                primary_cursor.seek_exact(secondary_key.clone())?.map(|(_, value)| value);
224            result.try_push_discrepancy(
225                secondary_key.clone(),
226                crossed_primary,
227                Some(secondary_value),
228            );
229        } else {
230            // the keys are the same, so we need to compare the values
231            result.try_push_discrepancy(primary_key, Some(primary_value), Some(secondary_value));
232        }
233    }
234
235    Ok(result)
236}
237
238/// Includes a table element between two databases with the same key, but different values
239#[derive(Debug)]
240struct TableDiffElement<T: Table> {
241    /// The key for the element
242    key: T::Key,
243
244    /// The element from the first table
245    #[expect(dead_code)]
246    first: T::Value,
247
248    /// The element from the second table
249    #[expect(dead_code)]
250    second: T::Value,
251}
252
253/// The diff result for an entire table. If the tables had the same number of elements, there will
254/// be no extra elements.
255struct TableDiffResult<T: Table>
256where
257    T::Key: Hash,
258{
259    /// All elements of the database that are different
260    discrepancies: HashMap<T::Key, TableDiffElement<T>>,
261
262    /// Any extra elements, and the table they are in
263    extra_elements: HashMap<T::Key, ExtraTableElement<T>>,
264}
265
266impl<T> Default for TableDiffResult<T>
267where
268    T: Table,
269    T::Key: Hash,
270{
271    fn default() -> Self {
272        Self { discrepancies: HashMap::default(), extra_elements: HashMap::default() }
273    }
274}
275
276impl<T: Table> TableDiffResult<T>
277where
278    T::Key: Hash,
279{
280    /// Push a diff result into the discrepancies set.
281    fn push_discrepancy(&mut self, discrepancy: TableDiffElement<T>) {
282        self.discrepancies.insert(discrepancy.key.clone(), discrepancy);
283    }
284
285    /// Push an extra element into the extra elements set.
286    fn push_extra_element(&mut self, element: ExtraTableElement<T>) {
287        self.extra_elements.insert(element.key().clone(), element);
288    }
289}
290
291impl<T> TableDiffResult<T>
292where
293    T: Table,
294    T::Key: Hash,
295    T::Value: PartialEq,
296{
297    /// Try to push a diff result into the discrepancy set, only pushing if the given elements are
298    /// different, and the discrepancy does not exist anywhere already.
299    fn try_push_discrepancy(
300        &mut self,
301        key: T::Key,
302        first: Option<T::Value>,
303        second: Option<T::Value>,
304    ) {
305        // do not bother comparing if the key is already in the discrepancies map
306        if self.discrepancies.contains_key(&key) {
307            return
308        }
309
310        // do not bother comparing if the key is already in the extra elements map
311        if self.extra_elements.contains_key(&key) {
312            return
313        }
314
315        match (first, second) {
316            (Some(first), Some(second)) => {
317                if first != second {
318                    self.push_discrepancy(TableDiffElement { key, first, second });
319                }
320            }
321            (Some(first), None) => {
322                self.push_extra_element(ExtraTableElement::First { key, value: first });
323            }
324            (None, Some(second)) => {
325                self.push_extra_element(ExtraTableElement::Second { key, value: second });
326            }
327            (None, None) => {}
328        }
329    }
330}
331
332/// A single extra element from a table
333#[derive(Debug)]
334enum ExtraTableElement<T: Table> {
335    /// The extra element that is in the first table
336    #[expect(dead_code)]
337    First { key: T::Key, value: T::Value },
338
339    /// The extra element that is in the second table
340    #[expect(dead_code)]
341    Second { key: T::Key, value: T::Value },
342}
343
344impl<T: Table> ExtraTableElement<T> {
345    /// Return the key for the extra element
346    const fn key(&self) -> &T::Key {
347        match self {
348            Self::First { key, .. } | Self::Second { key, .. } => key,
349        }
350    }
351}