Skip to main content

reth_cli_commands/db/
diff.rs

1use clap::Parser;
2use reth_db::{open_db_read_only, tables_to_generic, DatabaseEnv};
3use reth_db_api::{
4    cursor::DbCursorRO, database::Database, table::Table, transaction::DbTx, Tables,
5};
6use reth_db_common::DbTool;
7use reth_node_builder::{NodeTypes, NodeTypesWithDBAdapter};
8use reth_node_core::{
9    args::DatabaseArgs,
10    dirs::{DataDirPath, PlatformPath},
11};
12use std::{
13    collections::BTreeMap,
14    fmt::Debug,
15    fs::{self, File},
16    hash::Hash,
17    io::Write,
18    path::{Path, PathBuf},
19};
20use tracing::{info, warn};
21
22#[derive(Parser, Debug)]
23/// The arguments for the `reth db diff` command
24pub struct Command {
25    /// The path to the data dir for all reth files and subdirectories.
26    #[arg(long, verbatim_doc_comment)]
27    secondary_datadir: PlatformPath<DataDirPath>,
28
29    /// Arguments for the second database
30    #[command(flatten)]
31    second_db: DatabaseArgs,
32
33    /// The table name to diff. If not specified, all tables are diffed.
34    #[arg(long, verbatim_doc_comment)]
35    table: Option<Tables>,
36
37    /// The output directory for the diff report.
38    #[arg(long, verbatim_doc_comment)]
39    output: PlatformPath<PathBuf>,
40}
41
42impl Command {
43    /// Execute the `db diff` command.
44    ///
45    /// This first opens the `db/` folder from the secondary datadir, where the second database is
46    /// opened read-only.
47    ///
48    /// The tool will then iterate through all key-value pairs for the primary and secondary
49    /// databases. The value for each key will be compared with its corresponding value in the
50    /// other database. If the values are different, a discrepancy will be recorded in-memory. If
51    /// one key is present in one database but not the other, this will be recorded as an "extra
52    /// element" for that database.
53    ///
54    /// The discrepancies and extra elements, along with a brief summary of the diff results are
55    /// then written to a file in the output directory.
56    pub fn execute<T: NodeTypes>(
57        self,
58        tool: &DbTool<NodeTypesWithDBAdapter<T, DatabaseEnv>>,
59    ) -> eyre::Result<()> {
60        warn!("Make sure the node is not running when running `reth db diff`!");
61        // open second db
62        let second_db_path: PathBuf = self.secondary_datadir.join("db").into();
63        let second_db = open_db_read_only(&second_db_path, self.second_db.database_args())?;
64
65        let tables = match &self.table {
66            Some(table) => std::slice::from_ref(table),
67            None => Tables::ALL,
68        };
69
70        for table in tables {
71            let mut primary_tx = tool.provider_factory.db_ref().tx()?;
72            let mut secondary_tx = second_db.tx()?;
73
74            // disable long read transaction safety, since this will run for a while and it's
75            // expected that the node is not running
76            primary_tx.disable_long_read_transaction_safety();
77            secondary_tx.disable_long_read_transaction_safety();
78
79            let output_dir = self.output.clone();
80            tables_to_generic!(table, |Table| find_diffs::<Table>(
81                primary_tx,
82                secondary_tx,
83                output_dir
84            ))?;
85        }
86
87        Ok(())
88    }
89}
90
91/// Find diffs for a table, then analyzing the result
92fn find_diffs<T: Table>(
93    primary_tx: impl DbTx,
94    secondary_tx: impl DbTx,
95    output_dir: impl AsRef<Path>,
96) -> eyre::Result<()>
97where
98    T::Key: Hash,
99    T::Value: PartialEq,
100{
101    let table = T::NAME;
102
103    info!("Analyzing table {table}...");
104    let result = find_diffs_advanced::<T>(&primary_tx, &secondary_tx)?;
105    info!("Done analyzing table {table}!");
106
107    // Pretty info summary header: newline then header
108    info!("");
109    info!("Diff results for {table}:");
110
111    // analyze the result and print some stats
112    let discrepancies = result.discrepancies.len();
113    let extra_elements = result.extra_elements.len();
114
115    if discrepancies == 0 && extra_elements == 0 {
116        info!("No discrepancies or extra elements found in table {table}");
117        return Ok(());
118    }
119
120    // create directory and open file
121    fs::create_dir_all(output_dir.as_ref())?;
122    let file_name = format!("{table}.txt");
123    let mut file = File::create(output_dir.as_ref().join(file_name.clone()))?;
124
125    // Make a pretty summary header for the table
126    writeln!(file, "Diff results for {table}")?;
127
128    if discrepancies > 0 {
129        // write to file
130        writeln!(file, "Found {discrepancies} discrepancies in table {table}")?;
131
132        // also print to info
133        info!("Found {discrepancies} discrepancies in table {table}");
134    } else {
135        // write to file
136        writeln!(file, "No discrepancies found in table {table}")?;
137
138        // also print to info
139        info!("No discrepancies found in table {table}");
140    }
141
142    if extra_elements > 0 {
143        // write to file
144        writeln!(file, "Found {extra_elements} extra elements in table {table}")?;
145
146        // also print to info
147        info!("Found {extra_elements} extra elements in table {table}");
148    } else {
149        writeln!(file, "No extra elements found in table {table}")?;
150
151        // also print to info
152        info!("No extra elements found in table {table}");
153    }
154
155    info!("Writing diff results for {table} to {file_name}...");
156
157    if discrepancies > 0 {
158        writeln!(file, "Discrepancies:")?;
159    }
160
161    for discrepancy in result.discrepancies.values() {
162        writeln!(file, "{discrepancy:#?}")?;
163    }
164
165    if extra_elements > 0 {
166        writeln!(file, "Extra elements:")?;
167    }
168
169    for extra_element in result.extra_elements.values() {
170        writeln!(file, "{extra_element:#?}")?;
171    }
172
173    let full_file_name = output_dir.as_ref().join(file_name);
174    info!("Done writing diff results for {table} to {}", full_file_name.display());
175    Ok(())
176}
177
178/// This diff algorithm is slightly different, it will walk _each_ table, cross-checking for the
179/// element in the other table.
180fn find_diffs_advanced<T: Table>(
181    primary_tx: &impl DbTx,
182    secondary_tx: &impl DbTx,
183) -> eyre::Result<TableDiffResult<T>>
184where
185    T::Value: PartialEq,
186    T::Key: Hash,
187{
188    // initialize the zipped walker
189    let mut primary_zip_cursor =
190        primary_tx.cursor_read::<T>().expect("Was not able to obtain a cursor.");
191    let primary_walker = primary_zip_cursor.walk(None)?;
192
193    let mut secondary_zip_cursor =
194        secondary_tx.cursor_read::<T>().expect("Was not able to obtain a cursor.");
195    let secondary_walker = secondary_zip_cursor.walk(None)?;
196    let zipped_cursor = primary_walker.zip(secondary_walker);
197
198    // initialize the cursors for seeking when we are cross checking elements
199    let mut primary_cursor =
200        primary_tx.cursor_read::<T>().expect("Was not able to obtain a cursor.");
201
202    let mut secondary_cursor =
203        secondary_tx.cursor_read::<T>().expect("Was not able to obtain a cursor.");
204
205    let mut result = TableDiffResult::<T>::default();
206
207    // this loop will walk both tables, cross-checking for the element in the other table.
208    // it basically just loops through both tables at the same time. if the keys are different, it
209    // will check each key in the other table. if the keys are the same, it will compare the
210    // values
211    for (primary_entry, secondary_entry) in zipped_cursor {
212        let (primary_key, primary_value) = primary_entry?;
213        let (secondary_key, secondary_value) = secondary_entry?;
214
215        if primary_key != secondary_key {
216            // if the keys are different, we need to check if the key is in the other table
217            let crossed_secondary =
218                secondary_cursor.seek_exact(primary_key.clone())?.map(|(_, value)| value);
219            result.try_push_discrepancy(
220                primary_key.clone(),
221                Some(primary_value),
222                crossed_secondary,
223            );
224
225            // now do the same for the primary table
226            let crossed_primary =
227                primary_cursor.seek_exact(secondary_key.clone())?.map(|(_, value)| value);
228            result.try_push_discrepancy(
229                secondary_key.clone(),
230                crossed_primary,
231                Some(secondary_value),
232            );
233        } else {
234            // the keys are the same, so we need to compare the values
235            result.try_push_discrepancy(primary_key, Some(primary_value), Some(secondary_value));
236        }
237    }
238
239    Ok(result)
240}
241
242/// Includes a table element between two databases with the same key, but different values
243#[derive(Debug)]
244struct TableDiffElement<T: Table> {
245    /// The key for the element
246    key: T::Key,
247
248    /// The element from the first table
249    #[expect(dead_code)]
250    first: T::Value,
251
252    /// The element from the second table
253    #[expect(dead_code)]
254    second: T::Value,
255}
256
257/// The diff result for an entire table. If the tables had the same number of elements, there will
258/// be no extra elements.
259struct TableDiffResult<T: Table>
260where
261    T::Key: Hash,
262{
263    /// All elements of the database that are different
264    discrepancies: BTreeMap<T::Key, TableDiffElement<T>>,
265
266    /// Any extra elements, and the table they are in
267    extra_elements: BTreeMap<T::Key, ExtraTableElement<T>>,
268}
269
270impl<T> Default for TableDiffResult<T>
271where
272    T: Table,
273    T::Key: Hash,
274{
275    fn default() -> Self {
276        Self { discrepancies: BTreeMap::default(), extra_elements: BTreeMap::default() }
277    }
278}
279
280impl<T: Table> TableDiffResult<T>
281where
282    T::Key: Hash,
283{
284    /// Push a diff result into the discrepancies set.
285    fn push_discrepancy(&mut self, discrepancy: TableDiffElement<T>) {
286        self.discrepancies.insert(discrepancy.key.clone(), discrepancy);
287    }
288
289    /// Push an extra element into the extra elements set.
290    fn push_extra_element(&mut self, element: ExtraTableElement<T>) {
291        self.extra_elements.insert(element.key().clone(), element);
292    }
293}
294
295impl<T> TableDiffResult<T>
296where
297    T: Table,
298    T::Key: Hash,
299    T::Value: PartialEq,
300{
301    /// Try to push a diff result into the discrepancy set, only pushing if the given elements are
302    /// different, and the discrepancy does not exist anywhere already.
303    fn try_push_discrepancy(
304        &mut self,
305        key: T::Key,
306        first: Option<T::Value>,
307        second: Option<T::Value>,
308    ) {
309        // do not bother comparing if the key is already in the discrepancies map
310        if self.discrepancies.contains_key(&key) {
311            return
312        }
313
314        // do not bother comparing if the key is already in the extra elements map
315        if self.extra_elements.contains_key(&key) {
316            return
317        }
318
319        match (first, second) {
320            (Some(first), Some(second)) => {
321                if first != second {
322                    self.push_discrepancy(TableDiffElement { key, first, second });
323                }
324            }
325            (Some(first), None) => {
326                self.push_extra_element(ExtraTableElement::First { key, value: first });
327            }
328            (None, Some(second)) => {
329                self.push_extra_element(ExtraTableElement::Second { key, value: second });
330            }
331            (None, None) => {}
332        }
333    }
334}
335
336/// A single extra element from a table
337#[derive(Debug)]
338enum ExtraTableElement<T: Table> {
339    /// The extra element that is in the first table
340    #[expect(dead_code)]
341    First { key: T::Key, value: T::Value },
342
343    /// The extra element that is in the second table
344    #[expect(dead_code)]
345    Second { key: T::Key, value: T::Value },
346}
347
348impl<T: Table> ExtraTableElement<T> {
349    /// Return the key for the extra element
350    const fn key(&self) -> &T::Key {
351        match self {
352            Self::First { key, .. } | Self::Second { key, .. } => key,
353        }
354    }
355}