reth_cli_commands/test_vectors/
tables.rs

1use alloy_consensus::Header;
2use alloy_primitives::{hex, private::getrandom::getrandom};
3use arbitrary::Arbitrary;
4use eyre::Result;
5use proptest::{
6    prelude::ProptestConfig,
7    strategy::{Strategy, ValueTree},
8    test_runner::{TestRng, TestRunner},
9};
10use proptest_arbitrary_interop::arb;
11use reth_db_api::{
12    table::{DupSort, Table, TableRow},
13    tables,
14};
15use reth_ethereum_primitives::TransactionSigned;
16use reth_fs_util as fs;
17use std::collections::HashSet;
18use tracing::error;
19
20const VECTORS_FOLDER: &str = "testdata/micro/db";
21const PER_TABLE: usize = 1000;
22
23/// Generates test vectors for specified `tables`. If list is empty, then generate for all tables.
24pub fn generate_vectors(mut tables: Vec<String>) -> Result<()> {
25    // Prepare random seed for test (same method as used by proptest)
26    let mut seed = [0u8; 32];
27    getrandom(&mut seed)?;
28    println!("Seed for table test vectors: {:?}", hex::encode_prefixed(seed));
29
30    // Start the runner with the seed
31    let config = ProptestConfig::default();
32    let rng = TestRng::from_seed(config.rng_algorithm, &seed);
33    let mut runner = TestRunner::new_with_rng(config, rng);
34
35    fs::create_dir_all(VECTORS_FOLDER)?;
36
37    macro_rules! generate_vector {
38        ($table_type:ident$(<$($generic:ident),+>)?, $per_table:expr, TABLE) => {
39            generate_table_vector::<tables::$table_type$(<$($generic),+>)?>(&mut runner, $per_table)?;
40        };
41        ($table_type:ident$(<$($generic:ident),+>)?, $per_table:expr, DUPSORT) => {
42            generate_dupsort_vector::<tables::$table_type$(<$($generic),+>)?>(&mut runner, $per_table)?;
43        };
44    }
45
46    macro_rules! generate {
47        ([$(($table_type:ident$(<$($generic:ident),+>)?, $per_table:expr, $table_or_dup:tt)),*]) => {
48            let all_tables = vec![$(stringify!($table_type).to_string(),)*];
49
50            if tables.is_empty() {
51                tables = all_tables;
52            }
53
54            for table in tables {
55                match table.as_str() {
56                    $(
57                        stringify!($table_type) => {
58                            println!("Generating test vectors for {} <{}>.", stringify!($table_or_dup), tables::$table_type$(::<$($generic),+>)?::NAME);
59
60                            generate_vector!($table_type$(<$($generic),+>)?, $per_table, $table_or_dup);
61                        },
62                    )*
63                    _ => {
64                        error!(target: "reth::cli", "Unknown table: {}", table);
65                    }
66                }
67            }
68        }
69    }
70
71    generate!([
72        (CanonicalHeaders, PER_TABLE, TABLE),
73        (HeaderTerminalDifficulties, PER_TABLE, TABLE),
74        (HeaderNumbers, PER_TABLE, TABLE),
75        (Headers<Header>, PER_TABLE, TABLE),
76        (BlockBodyIndices, PER_TABLE, TABLE),
77        (BlockOmmers<Header>, 100, TABLE),
78        (TransactionHashNumbers, PER_TABLE, TABLE),
79        (Transactions<TransactionSigned>, 100, TABLE),
80        (PlainStorageState, PER_TABLE, DUPSORT),
81        (PlainAccountState, PER_TABLE, TABLE)
82    ]);
83
84    Ok(())
85}
86
87/// Generates test-vectors for normal tables. Keys are sorted and not repeated.
88fn generate_table_vector<T>(runner: &mut TestRunner, per_table: usize) -> Result<()>
89where
90    T: Table,
91    T::Key: for<'a> Arbitrary<'a> + serde::Serialize + Ord + std::hash::Hash + Clone,
92    T::Value: for<'a> Arbitrary<'a> + serde::Serialize + Clone,
93{
94    let mut rows = vec![];
95    let mut seen_keys = HashSet::new();
96    let strategy =
97        proptest::collection::vec(arb::<TableRow<T>>(), per_table - rows.len()).no_shrink().boxed();
98
99    while rows.len() < per_table {
100        // Generate all `per_table` rows: (Key, Value)
101        rows.extend(
102            &mut strategy
103                .new_tree(runner)
104                .map_err(|e| eyre::eyre!("{e}"))?
105                .current()
106                .into_iter()
107                .filter(|e| seen_keys.insert(e.0.clone())),
108        );
109    }
110    // Sort them by `Key`
111    rows.sort_by(|a, b| a.0.cmp(&b.0));
112
113    save_to_file::<T>(rows)
114}
115
116/// Generates test-vectors for DUPSORT tables. Each key has multiple (subkey, value). Keys and
117/// subkeys are sorted.
118fn generate_dupsort_vector<T>(runner: &mut TestRunner, per_table: usize) -> Result<()>
119where
120    T: Table + DupSort,
121    T::Key: for<'a> Arbitrary<'a> + serde::Serialize + Ord + std::hash::Hash + Clone,
122    T::Value: for<'a> Arbitrary<'a> + serde::Serialize + Ord + Clone,
123{
124    let mut rows = vec![];
125
126    // We want to control our repeated keys
127    let mut seen_keys = HashSet::new();
128
129    let start_values = proptest::collection::vec(arb::<T::Value>(), 100..300).no_shrink().boxed();
130
131    let start_keys = arb::<T::Key>().no_shrink().boxed();
132
133    while rows.len() < per_table {
134        let key: T::Key = start_keys.new_tree(runner).map_err(|e| eyre::eyre!("{e}"))?.current();
135
136        if !seen_keys.insert(key.clone()) {
137            continue
138        }
139
140        let mut values: Vec<T::Value> =
141            start_values.new_tree(runner).map_err(|e| eyre::eyre!("{e}"))?.current();
142
143        values.sort();
144
145        for value in values {
146            rows.push((key.clone(), value));
147        }
148    }
149
150    // Sort them by `Key`
151    rows.sort_by(|a, b| a.0.cmp(&b.0));
152
153    save_to_file::<T>(rows)
154}
155
156/// Save rows to file.
157fn save_to_file<T: Table>(rows: Vec<TableRow<T>>) -> eyre::Result<()>
158where
159    T::Key: serde::Serialize,
160    T::Value: serde::Serialize,
161{
162    serde_json::to_writer_pretty(
163        std::io::BufWriter::new(
164            std::fs::File::create(format!("{VECTORS_FOLDER}/{}.json", T::NAME)).unwrap(),
165        ),
166        &rows,
167    )
168    .map_err(|e| eyre::eyre!({ e }))
169}