Skip to main content

reth_db_common/
init.rs

1//! Reth genesis initialization utility functions.
2
3use alloy_consensus::BlockHeader;
4use alloy_genesis::GenesisAccount;
5use alloy_primitives::{
6    keccak256,
7    map::{AddressMap, B256Map, HashMap},
8    Address, B256, U256,
9};
10use reth_chainspec::EthChainSpec;
11use reth_codecs::Compact;
12use reth_config::config::EtlConfig;
13use reth_db_api::{
14    models::{storage_sharded_key::StorageShardedKey, ShardedKey},
15    tables,
16    transaction::DbTxMut,
17    BlockNumberList, DatabaseError,
18};
19use reth_etl::Collector;
20use reth_execution_errors::StateRootError;
21use reth_primitives_traits::{
22    Account, Bytecode, GotExpected, NodePrimitives, SealedHeader, StorageEntry,
23};
24use reth_provider::{
25    errors::provider::ProviderResult, providers::StaticFileWriter, BlockHashReader, BlockNumReader,
26    BundleStateInit, ChainSpecProvider, DBProvider, DatabaseProviderFactory, EitherWriter,
27    ExecutionOutcome, HashingWriter, HeaderProvider, HistoryWriter, MetadataProvider,
28    MetadataWriter, NodePrimitivesProvider, OriginalValuesKnown, ProviderError, RevertsInit,
29    RocksDBProviderFactory, StageCheckpointReader, StageCheckpointWriter, StateWriteConfig,
30    StateWriter, StaticFileProviderFactory, StorageSettings, StorageSettingsCache, TrieWriter,
31};
32use reth_stages_types::{StageCheckpoint, StageId};
33use reth_static_file_types::StaticFileSegment;
34use reth_trie::{
35    prefix_set::{TriePrefixSets, TriePrefixSetsMut},
36    IntermediateStateRootState, Nibbles, StateRoot as StateRootComputer, StateRootProgress,
37};
38use reth_trie_db::DatabaseStateRoot;
39use serde::{Deserialize, Serialize};
40use std::io::BufRead;
41use tracing::{debug, error, info, trace, warn};
42
43/// Default soft limit for number of bytes to read from state dump file, before inserting into
44/// database.
45///
46/// Default is 1 GB.
47pub const DEFAULT_SOFT_LIMIT_BYTE_LEN_ACCOUNTS_CHUNK: usize = 1_000_000_000;
48
49/// Approximate number of accounts per 1 GB of state dump file. One account is approximately 3.5 KB
50///
51/// Approximate is 285 228 accounts.
52//
53// (14.05 GB OP mainnet state dump at Bedrock block / 4 007 565 accounts in file > 3.5 KB per
54// account)
55pub const AVERAGE_COUNT_ACCOUNTS_PER_GB_STATE_DUMP: usize = 285_228;
56
57/// Soft limit for the number of flushed updates after which to log progress summary.
58const SOFT_LIMIT_COUNT_FLUSHED_UPDATES: usize = 1_000_000;
59
60/// Storage initialization error type.
61#[derive(Debug, thiserror::Error, Clone)]
62pub enum InitStorageError {
63    /// Genesis header found on static files but the database is empty.
64    #[error(
65        "static files found, but the database is uninitialized. If attempting to re-syncing, delete both."
66    )]
67    UninitializedDatabase,
68    /// An existing genesis block was found in the database, and its hash did not match the hash of
69    /// the chainspec.
70    #[error(
71        "genesis hash in the storage does not match the specified chainspec: chainspec is {chainspec_hash}, database is {storage_hash}"
72    )]
73    GenesisHashMismatch {
74        /// Expected genesis hash.
75        chainspec_hash: B256,
76        /// Actual genesis hash.
77        storage_hash: B256,
78    },
79    /// Provider error.
80    #[error(transparent)]
81    Provider(#[from] ProviderError),
82    /// State root error while computing the state root
83    #[error(transparent)]
84    StateRootError(#[from] StateRootError),
85    /// State root doesn't match the expected one.
86    #[error("state root mismatch: {_0}")]
87    StateRootMismatch(GotExpected<B256>),
88}
89
90impl From<DatabaseError> for InitStorageError {
91    fn from(error: DatabaseError) -> Self {
92        Self::Provider(ProviderError::Database(error))
93    }
94}
95
96/// Write the genesis block if it has not already been written
97pub fn init_genesis<PF>(factory: &PF) -> Result<B256, InitStorageError>
98where
99    PF: DatabaseProviderFactory
100        + StaticFileProviderFactory<Primitives: NodePrimitives<BlockHeader: Compact>>
101        + ChainSpecProvider
102        + StageCheckpointReader
103        + BlockNumReader
104        + MetadataProvider
105        + StorageSettingsCache,
106    PF::ProviderRW: StaticFileProviderFactory<Primitives = PF::Primitives>
107        + StageCheckpointWriter
108        + HistoryWriter
109        + HeaderProvider
110        + HashingWriter
111        + StateWriter
112        + TrieWriter
113        + MetadataWriter
114        + ChainSpecProvider
115        + StorageSettingsCache
116        + RocksDBProviderFactory
117        + NodePrimitivesProvider
118        + AsRef<PF::ProviderRW>,
119    PF::ChainSpec: EthChainSpec<Header = <PF::Primitives as NodePrimitives>::BlockHeader>,
120{
121    init_genesis_with_settings(factory, StorageSettings::base())
122}
123
124/// Write the genesis block if it has not already been written with [`StorageSettings`].
125pub fn init_genesis_with_settings<PF>(
126    factory: &PF,
127    genesis_storage_settings: StorageSettings,
128) -> Result<B256, InitStorageError>
129where
130    PF: DatabaseProviderFactory
131        + StaticFileProviderFactory<Primitives: NodePrimitives<BlockHeader: Compact>>
132        + ChainSpecProvider
133        + StageCheckpointReader
134        + BlockNumReader
135        + MetadataProvider
136        + StorageSettingsCache,
137    PF::ProviderRW: StaticFileProviderFactory<Primitives = PF::Primitives>
138        + StageCheckpointWriter
139        + HistoryWriter
140        + HeaderProvider
141        + HashingWriter
142        + StateWriter
143        + TrieWriter
144        + MetadataWriter
145        + ChainSpecProvider
146        + StorageSettingsCache
147        + RocksDBProviderFactory
148        + NodePrimitivesProvider
149        + AsRef<PF::ProviderRW>,
150    PF::ChainSpec: EthChainSpec<Header = <PF::Primitives as NodePrimitives>::BlockHeader>,
151{
152    let chain = factory.chain_spec();
153
154    let genesis = chain.genesis();
155    let hash = chain.genesis_hash();
156
157    // Get the genesis block number from the chain spec
158    let genesis_block_number = chain.genesis_header().number();
159
160    // Check if we already have the genesis header or if we have the wrong one.
161    match factory.block_hash(genesis_block_number) {
162        Ok(None) | Err(ProviderError::MissingStaticFileBlock(StaticFileSegment::Headers, _)) => {}
163        Ok(Some(block_hash)) => {
164            if block_hash == hash {
165                // Some users will at times attempt to re-sync from scratch by just deleting the
166                // database. Since `factory.block_hash` will only query the static files, we need to
167                // make sure that our database has been written to, and throw error if it's empty.
168                if factory.get_stage_checkpoint(StageId::Headers)?.is_none() {
169                    error!(target: "reth::storage", "Genesis header found on static files, but database is uninitialized.");
170                    return Err(InitStorageError::UninitializedDatabase)
171                }
172
173                let stored = factory.storage_settings()?.unwrap_or_else(StorageSettings::v1);
174                if stored != genesis_storage_settings {
175                    warn!(
176                        target: "reth::storage",
177                        ?stored,
178                        requested = ?genesis_storage_settings,
179                        "Storage settings mismatch detected"
180                    );
181                }
182
183                debug!("Genesis already written, skipping.");
184                return Ok(hash)
185            }
186
187            return Err(InitStorageError::GenesisHashMismatch {
188                chainspec_hash: hash,
189                storage_hash: block_hash,
190            })
191        }
192        Err(e) => {
193            debug!(?e);
194            return Err(e.into());
195        }
196    }
197
198    debug!("Writing genesis block.");
199
200    // Make sure to set storage settings before anything writes
201    factory.set_storage_settings_cache(genesis_storage_settings);
202
203    let alloc = &genesis.alloc;
204
205    // use transaction to insert genesis header
206    let provider_rw = factory.database_provider_rw()?;
207
208    // Behaviour reserved only for new nodes should be set in the storage settings.
209    provider_rw.write_storage_settings(genesis_storage_settings)?;
210
211    // For non-zero genesis blocks, set expected_block_start BEFORE insert_genesis_state.
212    // When block_range is None, next_block_number() uses expected_block_start. By default,
213    // expected_block_start comes from find_fixed_range which returns the file range start (0),
214    // not the genesis block number. This would cause increment_block(N) to fail.
215    let static_file_provider = provider_rw.static_file_provider();
216    if genesis_block_number > 0 {
217        if genesis_storage_settings.storage_v2 {
218            static_file_provider
219                .get_writer(genesis_block_number, StaticFileSegment::AccountChangeSets)?
220                .user_header_mut()
221                .set_expected_block_start(genesis_block_number);
222        }
223        if genesis_storage_settings.storage_v2 {
224            static_file_provider
225                .get_writer(genesis_block_number, StaticFileSegment::StorageChangeSets)?
226                .user_header_mut()
227                .set_expected_block_start(genesis_block_number);
228        }
229    }
230
231    insert_genesis_hashes(&provider_rw, alloc.iter())?;
232    insert_genesis_history(&provider_rw, alloc.iter())?;
233
234    // Insert header
235    insert_genesis_header(&provider_rw, &chain)?;
236
237    insert_genesis_state(&provider_rw, alloc.iter())?;
238
239    // compute state root to populate trie tables
240    compute_state_root(&provider_rw, None)?;
241
242    // set stage checkpoint to genesis block number for all stages
243    let checkpoint = StageCheckpoint::new(genesis_block_number);
244    for stage in StageId::ALL {
245        provider_rw.save_stage_checkpoint(stage, checkpoint)?;
246    }
247
248    // Static file segments start empty, so we need to initialize the block range.
249    // For genesis blocks with non-zero block numbers, we use get_writer() instead of
250    // latest_writer() and set_block_range() to ensure static files start at the correct block.
251    let static_file_provider = provider_rw.static_file_provider();
252
253    static_file_provider
254        .get_writer(genesis_block_number, StaticFileSegment::Receipts)?
255        .user_header_mut()
256        .set_block_range(genesis_block_number, genesis_block_number);
257    static_file_provider
258        .get_writer(genesis_block_number, StaticFileSegment::Transactions)?
259        .user_header_mut()
260        .set_block_range(genesis_block_number, genesis_block_number);
261
262    if genesis_storage_settings.storage_v2 {
263        static_file_provider
264            .get_writer(genesis_block_number, StaticFileSegment::TransactionSenders)?
265            .user_header_mut()
266            .set_block_range(genesis_block_number, genesis_block_number);
267    }
268
269    // `commit_unwind`` will first commit the DB and then the static file provider, which is
270    // necessary on `init_genesis`.
271    provider_rw.commit()?;
272
273    Ok(hash)
274}
275
276/// Inserts the genesis state into the database.
277pub fn insert_genesis_state<'a, 'b, Provider>(
278    provider: &Provider,
279    alloc: impl Iterator<Item = (&'a Address, &'b GenesisAccount)>,
280) -> ProviderResult<()>
281where
282    Provider: StaticFileProviderFactory
283        + DBProvider<Tx: DbTxMut>
284        + HeaderProvider
285        + StateWriter
286        + ChainSpecProvider
287        + AsRef<Provider>,
288{
289    let genesis_block_number = provider.chain_spec().genesis_header().number();
290    insert_state(provider, alloc, genesis_block_number)
291}
292
293/// Inserts state at given block into database.
294pub fn insert_state<'a, 'b, Provider>(
295    provider: &Provider,
296    alloc: impl Iterator<Item = (&'a Address, &'b GenesisAccount)>,
297    block: u64,
298) -> ProviderResult<()>
299where
300    Provider: StaticFileProviderFactory
301        + DBProvider<Tx: DbTxMut>
302        + HeaderProvider
303        + StateWriter
304        + AsRef<Provider>,
305{
306    let capacity = alloc.size_hint().1.unwrap_or(0);
307    let mut state_init: BundleStateInit =
308        AddressMap::with_capacity_and_hasher(capacity, Default::default());
309    let mut reverts_init: AddressMap<_> =
310        AddressMap::with_capacity_and_hasher(capacity, Default::default());
311    let mut contracts: B256Map<Bytecode> =
312        B256Map::with_capacity_and_hasher(capacity, Default::default());
313
314    for (address, account) in alloc {
315        let bytecode_hash = if let Some(code) = &account.code {
316            match Bytecode::new_raw_checked(code.clone()) {
317                Ok(bytecode) => {
318                    let hash = bytecode.hash_slow();
319                    contracts.insert(hash, bytecode);
320                    Some(hash)
321                }
322                Err(err) => {
323                    error!(%address, %err, "Failed to decode genesis bytecode.");
324                    return Err(DatabaseError::Other(err.to_string()).into());
325                }
326            }
327        } else {
328            None
329        };
330
331        // get state
332        let storage = account
333            .storage
334            .as_ref()
335            .map(|m| {
336                m.iter()
337                    .map(|(key, value)| {
338                        let value = U256::from_be_bytes(value.0);
339                        (*key, (U256::ZERO, value))
340                    })
341                    .collect::<B256Map<_>>()
342            })
343            .unwrap_or_default();
344
345        reverts_init.insert(
346            *address,
347            (Some(None), storage.keys().map(|k| StorageEntry::new(*k, U256::ZERO)).collect()),
348        );
349
350        state_init.insert(
351            *address,
352            (
353                None,
354                Some(Account {
355                    nonce: account.nonce.unwrap_or_default(),
356                    balance: account.balance,
357                    bytecode_hash,
358                }),
359                storage,
360            ),
361        );
362    }
363    let all_reverts_init: RevertsInit = HashMap::from_iter([(block, reverts_init)]);
364
365    let execution_outcome = ExecutionOutcome::new_init(
366        state_init,
367        all_reverts_init,
368        contracts,
369        Vec::default(),
370        block,
371        Vec::new(),
372    );
373
374    provider.write_state(
375        &execution_outcome,
376        OriginalValuesKnown::Yes,
377        StateWriteConfig::default(),
378    )?;
379
380    trace!(target: "reth::cli", "Inserted state");
381
382    Ok(())
383}
384
385/// Inserts hashes for the genesis state.
386pub fn insert_genesis_hashes<'a, 'b, Provider>(
387    provider: &Provider,
388    alloc: impl Iterator<Item = (&'a Address, &'b GenesisAccount)> + Clone,
389) -> ProviderResult<()>
390where
391    Provider: DBProvider<Tx: DbTxMut> + HashingWriter,
392{
393    // insert and hash accounts to hashing table
394    let alloc_accounts = alloc.clone().map(|(addr, account)| (*addr, Some(Account::from(account))));
395    provider.insert_account_for_hashing(alloc_accounts)?;
396
397    trace!(target: "reth::cli", "Inserted account hashes");
398
399    let alloc_storage = alloc.filter_map(|(addr, account)| {
400        // only return Some if there is storage
401        account.storage.as_ref().map(|storage| {
402            (*addr, storage.iter().map(|(&key, &value)| StorageEntry { key, value: value.into() }))
403        })
404    });
405    provider.insert_storage_for_hashing(alloc_storage)?;
406
407    trace!(target: "reth::cli", "Inserted storage hashes");
408
409    Ok(())
410}
411
412/// Inserts history indices for genesis accounts and storage.
413///
414/// Writes to either MDBX or `RocksDB` based on storage settings configuration,
415/// using [`EitherWriter`] to abstract over the storage backend.
416pub fn insert_genesis_history<'a, 'b, Provider>(
417    provider: &Provider,
418    alloc: impl Iterator<Item = (&'a Address, &'b GenesisAccount)> + Clone,
419) -> ProviderResult<()>
420where
421    Provider: DBProvider<Tx: DbTxMut>
422        + HistoryWriter
423        + ChainSpecProvider
424        + StorageSettingsCache
425        + RocksDBProviderFactory
426        + NodePrimitivesProvider,
427{
428    let genesis_block_number = provider.chain_spec().genesis_header().number();
429    insert_history(provider, alloc, genesis_block_number)
430}
431
432/// Inserts account history indices for genesis accounts.
433pub fn insert_genesis_account_history<'a, 'b, Provider>(
434    provider: &Provider,
435    alloc: impl Iterator<Item = (&'a Address, &'b GenesisAccount)>,
436) -> ProviderResult<()>
437where
438    Provider: DBProvider<Tx: DbTxMut>
439        + HistoryWriter
440        + ChainSpecProvider
441        + StorageSettingsCache
442        + RocksDBProviderFactory
443        + NodePrimitivesProvider,
444{
445    let genesis_block_number = provider.chain_spec().genesis_header().number();
446    insert_account_history(provider, alloc, genesis_block_number)
447}
448
449/// Inserts storage history indices for genesis accounts.
450pub fn insert_genesis_storage_history<'a, 'b, Provider>(
451    provider: &Provider,
452    alloc: impl Iterator<Item = (&'a Address, &'b GenesisAccount)>,
453) -> ProviderResult<()>
454where
455    Provider: DBProvider<Tx: DbTxMut>
456        + HistoryWriter
457        + ChainSpecProvider
458        + StorageSettingsCache
459        + RocksDBProviderFactory
460        + NodePrimitivesProvider,
461{
462    let genesis_block_number = provider.chain_spec().genesis_header().number();
463    insert_storage_history(provider, alloc, genesis_block_number)
464}
465
466/// Inserts history indices for genesis accounts and storage.
467///
468/// Writes to either MDBX or `RocksDB` based on storage settings configuration,
469/// using [`EitherWriter`] to abstract over the storage backend.
470pub fn insert_history<'a, 'b, Provider>(
471    provider: &Provider,
472    alloc: impl Iterator<Item = (&'a Address, &'b GenesisAccount)> + Clone,
473    block: u64,
474) -> ProviderResult<()>
475where
476    Provider: DBProvider<Tx: DbTxMut>
477        + HistoryWriter
478        + StorageSettingsCache
479        + RocksDBProviderFactory
480        + NodePrimitivesProvider,
481{
482    insert_account_history(provider, alloc.clone(), block)?;
483    insert_storage_history(provider, alloc, block)?;
484    Ok(())
485}
486
487/// Inserts account history indices at the given block.
488pub fn insert_account_history<'a, 'b, Provider>(
489    provider: &Provider,
490    alloc: impl Iterator<Item = (&'a Address, &'b GenesisAccount)>,
491    block: u64,
492) -> ProviderResult<()>
493where
494    Provider: DBProvider<Tx: DbTxMut>
495        + HistoryWriter
496        + StorageSettingsCache
497        + RocksDBProviderFactory
498        + NodePrimitivesProvider,
499{
500    provider.with_rocksdb_batch(|batch| {
501        let mut writer = EitherWriter::new_accounts_history(provider, batch)?;
502        let list = BlockNumberList::new([block]).expect("single block always fits");
503        for (addr, _) in alloc {
504            writer.upsert_account_history(ShardedKey::last(*addr), &list)?;
505        }
506        trace!(target: "reth::cli", "Inserted account history");
507        Ok(((), writer.into_raw_rocksdb_batch()))
508    })?;
509
510    Ok(())
511}
512
513/// Inserts storage history indices at the given block.
514pub fn insert_storage_history<'a, 'b, Provider>(
515    provider: &Provider,
516    alloc: impl Iterator<Item = (&'a Address, &'b GenesisAccount)>,
517    block: u64,
518) -> ProviderResult<()>
519where
520    Provider: DBProvider<Tx: DbTxMut>
521        + HistoryWriter
522        + StorageSettingsCache
523        + RocksDBProviderFactory
524        + NodePrimitivesProvider,
525{
526    provider.with_rocksdb_batch(|batch| {
527        let mut writer = EitherWriter::new_storages_history(provider, batch)?;
528        let list = BlockNumberList::new([block]).expect("single block always fits");
529        for (addr, account) in alloc {
530            if let Some(storage) = &account.storage {
531                for key in storage.keys() {
532                    writer.upsert_storage_history(StorageShardedKey::last(*addr, *key), &list)?;
533                }
534            }
535        }
536        trace!(target: "reth::cli", "Inserted storage history");
537        Ok(((), writer.into_raw_rocksdb_batch()))
538    })?;
539
540    Ok(())
541}
542
543/// Inserts header for the genesis state.
544pub fn insert_genesis_header<Provider, Spec>(
545    provider: &Provider,
546    chain: &Spec,
547) -> ProviderResult<()>
548where
549    Provider: StaticFileProviderFactory<Primitives: NodePrimitives<BlockHeader: Compact>>
550        + DBProvider<Tx: DbTxMut>,
551    Spec: EthChainSpec<Header = <Provider::Primitives as NodePrimitives>::BlockHeader>,
552{
553    let (header, block_hash) = (chain.genesis_header(), chain.genesis_hash());
554    let static_file_provider = provider.static_file_provider();
555
556    // Get the actual genesis block number from the header
557    let genesis_block_number = header.number();
558
559    match static_file_provider.block_hash(genesis_block_number) {
560        Ok(None) | Err(ProviderError::MissingStaticFileBlock(StaticFileSegment::Headers, _)) => {
561            let difficulty = header.difficulty();
562
563            // For genesis blocks with non-zero block numbers, we need to ensure they are stored
564            // in the correct static file range. We use get_writer() with the genesis block number
565            // to ensure the genesis block is stored in the correct static file range.
566            let mut writer = static_file_provider
567                .get_writer(genesis_block_number, StaticFileSegment::Headers)?;
568
569            // For non-zero genesis blocks, we need to set block range to genesis_block_number and
570            // append header without increment block
571            if genesis_block_number > 0 {
572                writer
573                    .user_header_mut()
574                    .set_block_range(genesis_block_number, genesis_block_number);
575                writer.append_header_direct(header, difficulty, &block_hash)?;
576            } else {
577                // For zero genesis blocks, use normal append_header
578                writer.append_header(header, &block_hash)?;
579            }
580        }
581        Ok(Some(_)) => {}
582        Err(e) => return Err(e),
583    }
584
585    provider.tx_ref().put::<tables::HeaderNumbers>(block_hash, genesis_block_number)?;
586    provider.tx_ref().put::<tables::BlockBodyIndices>(genesis_block_number, Default::default())?;
587
588    Ok(())
589}
590
591/// Reads account state from a [`BufRead`] reader and initializes it at the highest block that can
592/// be found on database.
593///
594/// It's similar to [`init_genesis`] but supports importing state too big to fit in memory, and can
595/// be set to the highest block present. One practical usecase is to import OP mainnet state at
596/// bedrock transition block.
597pub fn init_from_state_dump<Provider>(
598    mut reader: impl BufRead,
599    provider_rw: &Provider,
600    etl_config: EtlConfig,
601) -> eyre::Result<B256>
602where
603    Provider: StaticFileProviderFactory
604        + DBProvider<Tx: DbTxMut>
605        + BlockNumReader
606        + BlockHashReader
607        + ChainSpecProvider
608        + StageCheckpointWriter
609        + HistoryWriter
610        + HeaderProvider
611        + HashingWriter
612        + TrieWriter
613        + StateWriter
614        + StorageSettingsCache
615        + RocksDBProviderFactory
616        + NodePrimitivesProvider
617        + AsRef<Provider>,
618{
619    if etl_config.file_size == 0 {
620        return Err(eyre::eyre!("ETL file size cannot be zero"))
621    }
622
623    let block = provider_rw.last_block_number()?;
624
625    let hash = provider_rw
626        .block_hash(block)?
627        .ok_or_else(|| eyre::eyre!("Block hash not found for block {}", block))?;
628    let header = provider_rw
629        .header_by_number(block)?
630        .map(SealedHeader::seal_slow)
631        .ok_or_else(|| ProviderError::HeaderNotFound(block.into()))?;
632
633    let expected_state_root = header.state_root();
634
635    // first line can be state root
636    let dump_state_root = parse_state_root(&mut reader)?;
637    if expected_state_root != dump_state_root {
638        error!(target: "reth::cli",
639            ?dump_state_root,
640            ?expected_state_root,
641            header=?header.num_hash(),
642            "State root from state dump does not match state root in current header."
643        );
644        return Err(InitStorageError::StateRootMismatch(GotExpected {
645            got: dump_state_root,
646            expected: expected_state_root,
647        })
648        .into())
649    }
650
651    debug!(target: "reth::cli",
652        block,
653        chain=%provider_rw.chain_spec().chain(),
654        "Initializing state at block"
655    );
656
657    // remaining lines are accounts
658    let collector = parse_accounts(&mut reader, etl_config)?;
659
660    // write state to db and collect prefix sets
661    let mut prefix_sets = TriePrefixSetsMut::default();
662    dump_state(collector, provider_rw, block, &mut prefix_sets)?;
663
664    info!(target: "reth::cli", "All accounts written to database, starting state root computation (may take some time)");
665
666    // compute and compare state root. this advances the stage checkpoints.
667    let computed_state_root = compute_state_root(provider_rw, Some(prefix_sets.freeze()))?;
668    if computed_state_root == expected_state_root {
669        info!(target: "reth::cli",
670            ?computed_state_root,
671            "Computed state root matches state root in state dump"
672        );
673    } else {
674        error!(target: "reth::cli",
675            ?computed_state_root,
676            ?expected_state_root,
677            "Computed state root does not match state root in state dump"
678        );
679
680        return Err(InitStorageError::StateRootMismatch(GotExpected {
681            got: computed_state_root,
682            expected: expected_state_root,
683        })
684        .into())
685    }
686
687    // insert sync stages for stages that require state
688    for stage in StageId::STATE_REQUIRED {
689        provider_rw.save_stage_checkpoint(stage, StageCheckpoint::new(block))?;
690    }
691
692    Ok(hash)
693}
694
695/// Parses and returns expected state root.
696fn parse_state_root(reader: &mut impl BufRead) -> eyre::Result<B256> {
697    let mut line = String::new();
698    reader.read_line(&mut line)?;
699
700    let expected_state_root = serde_json::from_str::<StateRoot>(&line)?.root;
701    trace!(target: "reth::cli",
702        root=%expected_state_root,
703        "Read state root from file"
704    );
705    Ok(expected_state_root)
706}
707
708/// Parses accounts and pushes them to a [`Collector`].
709fn parse_accounts(
710    mut reader: impl BufRead,
711    etl_config: EtlConfig,
712) -> Result<Collector<Address, GenesisAccount>, eyre::Error> {
713    let mut line = String::new();
714    let mut collector = Collector::new(etl_config.file_size, etl_config.dir);
715
716    loop {
717        let n = reader.read_line(&mut line)?;
718        if n == 0 {
719            break
720        }
721
722        let GenesisAccountWithAddress { genesis_account, address } = serde_json::from_str(&line)?;
723        collector.insert(address, genesis_account)?;
724
725        if !collector.is_empty() &&
726            collector.len().is_multiple_of(AVERAGE_COUNT_ACCOUNTS_PER_GB_STATE_DUMP)
727        {
728            info!(target: "reth::cli",
729                parsed_new_accounts=collector.len(),
730            );
731        }
732
733        line.clear();
734    }
735
736    Ok(collector)
737}
738
739/// Takes a [`Collector`] and processes all accounts.
740fn dump_state<Provider>(
741    mut collector: Collector<Address, GenesisAccount>,
742    provider_rw: &Provider,
743    block: u64,
744    prefix_sets: &mut TriePrefixSetsMut,
745) -> Result<(), eyre::Error>
746where
747    Provider: StaticFileProviderFactory
748        + DBProvider<Tx: DbTxMut>
749        + HeaderProvider
750        + HashingWriter
751        + HistoryWriter
752        + StateWriter
753        + StorageSettingsCache
754        + RocksDBProviderFactory
755        + NodePrimitivesProvider
756        + AsRef<Provider>,
757{
758    let accounts_len = collector.len();
759    let mut accounts = Vec::with_capacity(AVERAGE_COUNT_ACCOUNTS_PER_GB_STATE_DUMP);
760    let mut total_inserted_accounts = 0;
761
762    for (index, entry) in collector.iter()?.enumerate() {
763        let (address, account) = entry?;
764        let (address, _) = Address::from_compact(address.as_slice(), address.len());
765        let (account, _) = GenesisAccount::from_compact(account.as_slice(), account.len());
766
767        // Add to prefix sets
768        let hashed_address = keccak256(address);
769        prefix_sets.account_prefix_set.insert(Nibbles::unpack(hashed_address));
770
771        // Add storage keys to prefix sets if storage exists
772        if let Some(ref storage) = account.storage {
773            for key in storage.keys() {
774                let hashed_key = keccak256(key);
775                prefix_sets
776                    .storage_prefix_sets
777                    .entry(hashed_address)
778                    .or_default()
779                    .insert(Nibbles::unpack(hashed_key));
780            }
781        }
782
783        accounts.push((address, account));
784
785        if (index > 0 && index.is_multiple_of(AVERAGE_COUNT_ACCOUNTS_PER_GB_STATE_DUMP)) ||
786            index == accounts_len - 1
787        {
788            total_inserted_accounts += accounts.len();
789
790            info!(target: "reth::cli",
791                total_inserted_accounts,
792                "Writing accounts to db"
793            );
794
795            // use transaction to insert genesis header
796            insert_genesis_hashes(
797                provider_rw,
798                accounts.iter().map(|(address, account)| (address, account)),
799            )?;
800
801            insert_history(
802                provider_rw,
803                accounts.iter().map(|(address, account)| (address, account)),
804                block,
805            )?;
806
807            // block is already written to static files
808            insert_state(
809                provider_rw,
810                accounts.iter().map(|(address, account)| (address, account)),
811                block,
812            )?;
813
814            accounts.clear();
815        }
816    }
817    Ok(())
818}
819
820/// Computes the state root (from scratch) based on the accounts and storages present in the
821/// database.
822fn compute_state_root<Provider>(
823    provider: &Provider,
824    prefix_sets: Option<TriePrefixSets>,
825) -> Result<B256, InitStorageError>
826where
827    Provider: DBProvider<Tx: DbTxMut> + TrieWriter,
828{
829    trace!(target: "reth::cli", "Computing state root");
830
831    let tx = provider.tx_ref();
832    let mut intermediate_state: Option<IntermediateStateRootState> = None;
833    let mut total_flushed_updates = 0;
834
835    loop {
836        let mut state_root =
837            StateRootComputer::from_tx(tx).with_intermediate_state(intermediate_state);
838
839        if let Some(sets) = prefix_sets.clone() {
840            state_root = state_root.with_prefix_sets(sets);
841        }
842
843        match state_root.root_with_progress()? {
844            StateRootProgress::Progress(state, _, updates) => {
845                let updated_len = provider.write_trie_updates(updates)?;
846                total_flushed_updates += updated_len;
847
848                trace!(target: "reth::cli",
849                    last_account_key = %state.account_root_state.last_hashed_key,
850                    updated_len,
851                    total_flushed_updates,
852                    "Flushing trie updates"
853                );
854
855                intermediate_state = Some(*state);
856
857                if total_flushed_updates.is_multiple_of(SOFT_LIMIT_COUNT_FLUSHED_UPDATES) {
858                    info!(target: "reth::cli",
859                        total_flushed_updates,
860                        "Flushing trie updates"
861                    );
862                }
863            }
864            StateRootProgress::Complete(root, _, updates) => {
865                let updated_len = provider.write_trie_updates(updates)?;
866                total_flushed_updates += updated_len;
867
868                trace!(target: "reth::cli",
869                    %root,
870                    updated_len,
871                    total_flushed_updates,
872                    "State root has been computed"
873                );
874
875                return Ok(root)
876            }
877        }
878    }
879}
880
881/// Type to deserialize state root from state dump file.
882#[derive(Debug, Serialize, Deserialize, PartialEq, Eq)]
883struct StateRoot {
884    root: B256,
885}
886
887/// An account as in the state dump file. This contains a [`GenesisAccount`] and the account's
888/// address.
889#[derive(Debug, Serialize, Deserialize)]
890struct GenesisAccountWithAddress {
891    /// The account's balance, nonce, code, and storage.
892    #[serde(flatten)]
893    genesis_account: GenesisAccount,
894    /// The account's address.
895    address: Address,
896}
897
898#[cfg(test)]
899mod tests {
900    use super::*;
901    use alloy_consensus::constants::{
902        HOLESKY_GENESIS_HASH, MAINNET_GENESIS_HASH, SEPOLIA_GENESIS_HASH,
903    };
904    use alloy_genesis::Genesis;
905    use reth_chainspec::{Chain, ChainSpec, HOLESKY, MAINNET, SEPOLIA};
906    use reth_db::DatabaseEnv;
907    use reth_db_api::{
908        cursor::DbCursorRO,
909        models::{storage_sharded_key::StorageShardedKey, IntegerList, ShardedKey},
910        table::{Table, TableRow},
911        transaction::DbTx,
912        Database,
913    };
914    use reth_provider::{
915        test_utils::{create_test_provider_factory_with_chain_spec, MockNodeTypesWithDB},
916        ProviderFactory, RocksDBProviderFactory,
917    };
918    use std::{collections::BTreeMap, sync::Arc};
919
920    fn collect_table_entries<DB, T>(
921        tx: &<DB as Database>::TX,
922    ) -> Result<Vec<TableRow<T>>, InitStorageError>
923    where
924        DB: Database,
925        T: Table,
926    {
927        Ok(tx.cursor_read::<T>()?.walk_range(..)?.collect::<Result<Vec<_>, _>>()?)
928    }
929
930    #[test]
931    fn success_init_genesis_mainnet() {
932        let genesis_hash =
933            init_genesis(&create_test_provider_factory_with_chain_spec(MAINNET.clone())).unwrap();
934
935        // actual, expected
936        assert_eq!(genesis_hash, MAINNET_GENESIS_HASH);
937    }
938
939    #[test]
940    fn success_init_genesis_sepolia() {
941        let genesis_hash =
942            init_genesis(&create_test_provider_factory_with_chain_spec(SEPOLIA.clone())).unwrap();
943
944        // actual, expected
945        assert_eq!(genesis_hash, SEPOLIA_GENESIS_HASH);
946    }
947
948    #[test]
949    fn success_init_genesis_holesky() {
950        let genesis_hash =
951            init_genesis(&create_test_provider_factory_with_chain_spec(HOLESKY.clone())).unwrap();
952
953        // actual, expected
954        assert_eq!(genesis_hash, HOLESKY_GENESIS_HASH);
955    }
956
957    #[test]
958    fn fail_init_inconsistent_db() {
959        let factory = create_test_provider_factory_with_chain_spec(SEPOLIA.clone());
960        let static_file_provider = factory.static_file_provider();
961        let rocksdb_provider = factory.rocksdb_provider();
962        init_genesis(&factory).unwrap();
963
964        // Try to init db with a different genesis block
965        let genesis_hash = init_genesis(
966            &ProviderFactory::<MockNodeTypesWithDB>::new(
967                factory.into_db(),
968                MAINNET.clone(),
969                static_file_provider,
970                rocksdb_provider,
971                reth_tasks::Runtime::test(),
972            )
973            .unwrap(),
974        );
975
976        assert!(matches!(
977            genesis_hash.unwrap_err(),
978            InitStorageError::GenesisHashMismatch {
979                chainspec_hash: MAINNET_GENESIS_HASH,
980                storage_hash: SEPOLIA_GENESIS_HASH
981            }
982        ))
983    }
984
985    #[test]
986    fn init_genesis_history() {
987        let address_with_balance = Address::with_last_byte(1);
988        let address_with_storage = Address::with_last_byte(2);
989        let storage_key = B256::with_last_byte(1);
990        let chain_spec = Arc::new(ChainSpec {
991            chain: Chain::from_id(1),
992            genesis: Genesis {
993                alloc: BTreeMap::from([
994                    (
995                        address_with_balance,
996                        GenesisAccount { balance: U256::from(1), ..Default::default() },
997                    ),
998                    (
999                        address_with_storage,
1000                        GenesisAccount {
1001                            storage: Some(BTreeMap::from([(storage_key, B256::random())])),
1002                            ..Default::default()
1003                        },
1004                    ),
1005                ]),
1006                ..Default::default()
1007            },
1008            hardforks: Default::default(),
1009            paris_block_and_final_difficulty: None,
1010            deposit_contract: None,
1011            ..Default::default()
1012        });
1013
1014        let factory = create_test_provider_factory_with_chain_spec(chain_spec);
1015        init_genesis(&factory).unwrap();
1016
1017        let expected_accounts = vec![
1018            (ShardedKey::new(address_with_balance, u64::MAX), IntegerList::new([0]).unwrap()),
1019            (ShardedKey::new(address_with_storage, u64::MAX), IntegerList::new([0]).unwrap()),
1020        ];
1021        let expected_storages = vec![(
1022            StorageShardedKey::new(address_with_storage, storage_key, u64::MAX),
1023            IntegerList::new([0]).unwrap(),
1024        )];
1025
1026        let collect_from_mdbx = |factory: &ProviderFactory<MockNodeTypesWithDB>| {
1027            let provider = factory.provider().unwrap();
1028            let tx = provider.tx_ref();
1029            (
1030                collect_table_entries::<DatabaseEnv, tables::AccountsHistory>(tx).unwrap(),
1031                collect_table_entries::<DatabaseEnv, tables::StoragesHistory>(tx).unwrap(),
1032            )
1033        };
1034
1035        #[cfg(feature = "rocksdb")]
1036        {
1037            let settings = factory.cached_storage_settings();
1038            let rocksdb = factory.rocksdb_provider();
1039
1040            let collect_rocksdb = |rocksdb: &reth_provider::providers::RocksDBProvider| {
1041                (
1042                    rocksdb
1043                        .iter::<tables::AccountsHistory>()
1044                        .unwrap()
1045                        .collect::<Result<Vec<_>, _>>()
1046                        .unwrap(),
1047                    rocksdb
1048                        .iter::<tables::StoragesHistory>()
1049                        .unwrap()
1050                        .collect::<Result<Vec<_>, _>>()
1051                        .unwrap(),
1052                )
1053            };
1054
1055            let (accounts, storages) = if settings.storage_v2 {
1056                collect_rocksdb(&rocksdb)
1057            } else {
1058                collect_from_mdbx(&factory)
1059            };
1060            assert_eq!(accounts, expected_accounts);
1061            assert_eq!(storages, expected_storages);
1062        }
1063
1064        #[cfg(not(feature = "rocksdb"))]
1065        {
1066            let (accounts, storages) = collect_from_mdbx(&factory);
1067            assert_eq!(accounts, expected_accounts);
1068            assert_eq!(storages, expected_storages);
1069        }
1070    }
1071
1072    #[test]
1073    fn warn_storage_settings_mismatch() {
1074        let factory = create_test_provider_factory_with_chain_spec(MAINNET.clone());
1075        init_genesis_with_settings(&factory, StorageSettings::v1()).unwrap();
1076
1077        // Request different settings - should warn but succeed
1078        let result = init_genesis_with_settings(&factory, StorageSettings::v2());
1079
1080        // Should succeed (warning is logged, not an error)
1081        assert!(result.is_ok());
1082    }
1083
1084    #[test]
1085    fn allow_same_storage_settings() {
1086        let factory = create_test_provider_factory_with_chain_spec(MAINNET.clone());
1087        let settings = StorageSettings::v2();
1088        init_genesis_with_settings(&factory, settings).unwrap();
1089
1090        let result = init_genesis_with_settings(&factory, settings);
1091
1092        assert!(result.is_ok());
1093    }
1094}