Skip to main content

reth_db_common/
init.rs

1//! Reth genesis initialization utility functions.
2
3use alloy_consensus::BlockHeader;
4use alloy_genesis::GenesisAccount;
5use alloy_primitives::{
6    keccak256,
7    map::{AddressMap, B256Map, B256Set, HashMap},
8    Address, B256, U256,
9};
10use reth_chainspec::EthChainSpec;
11use reth_codecs::Compact;
12use reth_config::config::EtlConfig;
13use reth_db_api::{
14    cursor::{DbCursorRW, DbDupCursorRW},
15    models::{
16        storage_sharded_key::StorageShardedKey, AccountBeforeTx, BlockNumberAddress, IntegerList,
17        ShardedKey,
18    },
19    tables,
20    transaction::DbTxMut,
21    DatabaseError,
22};
23use reth_etl::Collector;
24use reth_execution_errors::StateRootError;
25use reth_primitives_traits::{
26    Account, Bytecode, GotExpected, NodePrimitives, SealedHeader, StorageEntry,
27};
28use reth_provider::{
29    errors::provider::ProviderResult, providers::StaticFileWriter, BlockHashReader, BlockNumReader,
30    BundleStateInit, ChainSpecProvider, DBProvider, DatabaseProviderFactory, ExecutionOutcome,
31    HashingWriter, HeaderProvider, HistoryWriter, MetadataProvider, MetadataWriter,
32    NodePrimitivesProvider, OriginalValuesKnown, ProviderError, RevertsInit,
33    RocksDBProviderFactory, StageCheckpointReader, StageCheckpointWriter, StateWriteConfig,
34    StateWriter, StaticFileProviderFactory, StorageSettings, StorageSettingsCache, TrieWriter,
35};
36use reth_stages_types::{StageCheckpoint, StageId};
37use reth_static_file_types::StaticFileSegment;
38use reth_trie::{
39    prefix_set::TriePrefixSets, IntermediateStateRootState, StateRoot as StateRootComputer,
40    StateRootProgress,
41};
42use reth_trie_db::DatabaseStateRoot;
43
44type DbStateRoot<'a, TX, A> = StateRootComputer<
45    reth_trie_db::DatabaseTrieCursorFactory<&'a TX, A>,
46    reth_trie_db::DatabaseHashedCursorFactory<&'a TX>,
47>;
48
49use serde::{Deserialize, Serialize};
50use std::io::BufRead;
51use tracing::{debug, error, info, trace, warn};
52
53pub use reth_provider::init::{
54    insert_account_history, insert_genesis_account_history, insert_genesis_history,
55    insert_genesis_storage_history, insert_history, insert_storage_history,
56};
57
58/// Default soft limit for number of bytes to read from state dump file, before inserting into
59/// database.
60///
61/// Default is 1 GB.
62pub const DEFAULT_SOFT_LIMIT_BYTE_LEN_ACCOUNTS_CHUNK: usize = 1_000_000_000;
63
64/// Soft limit for the number of flushed updates after which to log progress summary.
65const SOFT_LIMIT_COUNT_FLUSHED_UPDATES: usize = 1_000_000;
66
67/// Max number of storage "units" (1 per account + 1 per storage slot) before committing
68/// the current MDBX transaction and opening a new one. This bounds dirty page accumulation
69/// and prevents OOM on large state imports.
70const STORAGE_COMMIT_THRESHOLD: usize = 100_000;
71
72/// Max number of trie updates retained before init-state state root computation commits progress.
73const STATE_ROOT_COMMIT_THRESHOLD: u64 = 25_000;
74
75/// Storage initialization error type.
76#[derive(Debug, thiserror::Error, Clone)]
77pub enum InitStorageError {
78    /// Genesis header found on static files but the database is empty.
79    #[error(
80        "static files found, but the database is uninitialized. If attempting to re-syncing, delete both."
81    )]
82    UninitializedDatabase,
83    /// An existing genesis block was found in the database, and its hash did not match the hash of
84    /// the chainspec.
85    #[error(
86        "genesis hash in the storage does not match the specified chainspec: chainspec is {chainspec_hash}, database is {storage_hash}"
87    )]
88    GenesisHashMismatch {
89        /// Expected genesis hash.
90        chainspec_hash: B256,
91        /// Actual genesis hash.
92        storage_hash: B256,
93    },
94    /// Provider error.
95    #[error(transparent)]
96    Provider(#[from] ProviderError),
97    /// State root error while computing the state root
98    #[error(transparent)]
99    StateRootError(#[from] StateRootError),
100    /// State root doesn't match the expected one.
101    #[error("state root mismatch: {_0}")]
102    StateRootMismatch(GotExpected<B256>),
103}
104
105impl From<DatabaseError> for InitStorageError {
106    fn from(error: DatabaseError) -> Self {
107        Self::Provider(ProviderError::Database(error))
108    }
109}
110
111/// Write the genesis block if it has not already been written
112pub fn init_genesis<PF>(factory: &PF) -> Result<B256, InitStorageError>
113where
114    PF: DatabaseProviderFactory
115        + StaticFileProviderFactory<Primitives: NodePrimitives<BlockHeader: Compact>>
116        + ChainSpecProvider
117        + StageCheckpointReader
118        + BlockNumReader
119        + MetadataProvider
120        + StorageSettingsCache,
121    PF::ProviderRW: StaticFileProviderFactory<Primitives = PF::Primitives>
122        + StageCheckpointWriter
123        + HistoryWriter
124        + HeaderProvider
125        + HashingWriter
126        + StateWriter
127        + TrieWriter
128        + MetadataWriter
129        + ChainSpecProvider
130        + StorageSettingsCache
131        + RocksDBProviderFactory
132        + NodePrimitivesProvider
133        + AsRef<PF::ProviderRW>,
134    PF::ChainSpec: EthChainSpec<Header = <PF::Primitives as NodePrimitives>::BlockHeader>,
135{
136    init_genesis_with_settings(factory, StorageSettings::base())
137}
138
139/// Write the genesis block if it has not already been written with [`StorageSettings`].
140pub fn init_genesis_with_settings<PF>(
141    factory: &PF,
142    genesis_storage_settings: StorageSettings,
143) -> Result<B256, InitStorageError>
144where
145    PF: DatabaseProviderFactory
146        + StaticFileProviderFactory<Primitives: NodePrimitives<BlockHeader: Compact>>
147        + ChainSpecProvider
148        + StageCheckpointReader
149        + BlockNumReader
150        + MetadataProvider
151        + StorageSettingsCache,
152    PF::ProviderRW: StaticFileProviderFactory<Primitives = PF::Primitives>
153        + StageCheckpointWriter
154        + HistoryWriter
155        + HeaderProvider
156        + HashingWriter
157        + StateWriter
158        + TrieWriter
159        + MetadataWriter
160        + ChainSpecProvider
161        + StorageSettingsCache
162        + RocksDBProviderFactory
163        + NodePrimitivesProvider
164        + AsRef<PF::ProviderRW>,
165    PF::ChainSpec: EthChainSpec<Header = <PF::Primitives as NodePrimitives>::BlockHeader>,
166{
167    init_genesis_with_settings_and_validate(factory, genesis_storage_settings, true)
168}
169
170/// Write the genesis block if it has not already been written with [`StorageSettings`],
171/// optionally validating the DB-resident genesis hash against the chainspec hash.
172pub fn init_genesis_with_settings_and_validate<PF>(
173    factory: &PF,
174    genesis_storage_settings: StorageSettings,
175    validate_genesis_hash: bool,
176) -> Result<B256, InitStorageError>
177where
178    PF: DatabaseProviderFactory
179        + StaticFileProviderFactory<Primitives: NodePrimitives<BlockHeader: Compact>>
180        + ChainSpecProvider
181        + StageCheckpointReader
182        + BlockNumReader
183        + MetadataProvider
184        + StorageSettingsCache,
185    PF::ProviderRW: StaticFileProviderFactory<Primitives = PF::Primitives>
186        + StageCheckpointWriter
187        + HistoryWriter
188        + HeaderProvider
189        + HashingWriter
190        + StateWriter
191        + TrieWriter
192        + MetadataWriter
193        + ChainSpecProvider
194        + StorageSettingsCache
195        + RocksDBProviderFactory
196        + NodePrimitivesProvider
197        + AsRef<PF::ProviderRW>,
198    PF::ChainSpec: EthChainSpec<Header = <PF::Primitives as NodePrimitives>::BlockHeader>,
199{
200    let chain = factory.chain_spec();
201
202    let genesis = chain.genesis();
203    let hash = chain.genesis_hash();
204
205    // Get the genesis block number from the chain spec
206    let genesis_block_number = chain.genesis_header().number();
207
208    // Check if we already have the genesis header or if we have the wrong one.
209    match factory.block_hash(genesis_block_number) {
210        Ok(None) | Err(ProviderError::MissingStaticFileBlock(StaticFileSegment::Headers, _)) => {}
211        Ok(Some(block_hash)) => {
212            if block_hash == hash {
213                // Some users will at times attempt to re-sync from scratch by just deleting the
214                // database. Since `factory.block_hash` will only query the static files, we need to
215                // make sure that our database has been written to, and throw error if it's empty.
216                if factory.get_stage_checkpoint(StageId::Headers)?.is_none() {
217                    error!(target: "reth::storage", "Genesis header found on static files, but database is uninitialized.");
218                    return Err(InitStorageError::UninitializedDatabase)
219                }
220
221                let stored = factory.storage_settings()?.unwrap_or_else(StorageSettings::v1);
222                if stored != genesis_storage_settings {
223                    warn!(
224                        target: "reth::storage",
225                        ?stored,
226                        requested = ?genesis_storage_settings,
227                        "Storage settings mismatch detected. Using the stored settings from the existing database."
228                    );
229                }
230
231                debug!("Genesis already written, skipping.");
232                return Ok(hash)
233            }
234
235            if !validate_genesis_hash {
236                warn!(
237                    target: "reth::storage",
238                    chainspec_hash = %hash,
239                    storage_hash = %block_hash,
240                    "Genesis hash mismatch with chainspec; trusting DB per --debug.skip-genesis-validation"
241                );
242                return Ok(block_hash)
243            }
244            return Err(InitStorageError::GenesisHashMismatch {
245                chainspec_hash: hash,
246                storage_hash: block_hash,
247            })
248        }
249        Err(e) => {
250            debug!(?e);
251            return Err(e.into());
252        }
253    }
254
255    debug!("Writing genesis block.");
256
257    // Make sure to set storage settings before anything writes
258    factory.set_storage_settings_cache(genesis_storage_settings);
259
260    let alloc = &genesis.alloc;
261
262    // use transaction to insert genesis header
263    let provider_rw = factory.database_provider_rw()?;
264
265    // Behaviour reserved only for new nodes should be set in the storage settings.
266    provider_rw.write_storage_settings(genesis_storage_settings)?;
267
268    // For non-zero genesis blocks, set expected_block_start BEFORE insert_genesis_state.
269    // When block_range is None, next_block_number() uses expected_block_start. By default,
270    // expected_block_start comes from find_fixed_range which returns the file range start (0),
271    // not the genesis block number. This would cause increment_block(N) to fail.
272    let static_file_provider = provider_rw.static_file_provider();
273    if genesis_block_number > 0 {
274        if genesis_storage_settings.storage_v2 {
275            static_file_provider
276                .get_writer(genesis_block_number, StaticFileSegment::AccountChangeSets)?
277                .user_header_mut()
278                .set_expected_block_start(genesis_block_number);
279        }
280        if genesis_storage_settings.storage_v2 {
281            static_file_provider
282                .get_writer(genesis_block_number, StaticFileSegment::StorageChangeSets)?
283                .user_header_mut()
284                .set_expected_block_start(genesis_block_number);
285        }
286    }
287
288    insert_genesis_hashes(&provider_rw, alloc.iter())?;
289    insert_genesis_history(&provider_rw, alloc.iter())?;
290
291    // Insert header
292    insert_genesis_header(&provider_rw, &chain)?;
293
294    insert_genesis_state(&provider_rw, alloc.iter())?;
295
296    // compute state root to populate trie tables
297    compute_state_root(&provider_rw, None)?;
298
299    // set stage checkpoint to genesis block number for all stages
300    let checkpoint = StageCheckpoint::new(genesis_block_number);
301    for stage in StageId::ALL {
302        provider_rw.save_stage_checkpoint(stage, checkpoint)?;
303    }
304
305    // Static file segments start empty, so we need to initialize the block range.
306    // For genesis blocks with non-zero block numbers, we use get_writer() instead of
307    // latest_writer() and set_block_range() to ensure static files start at the correct block.
308    let static_file_provider = provider_rw.static_file_provider();
309
310    static_file_provider
311        .get_writer(genesis_block_number, StaticFileSegment::Receipts)?
312        .user_header_mut()
313        .set_block_range(genesis_block_number, genesis_block_number);
314    static_file_provider
315        .get_writer(genesis_block_number, StaticFileSegment::Transactions)?
316        .user_header_mut()
317        .set_block_range(genesis_block_number, genesis_block_number);
318
319    if genesis_storage_settings.storage_v2 {
320        static_file_provider
321            .get_writer(genesis_block_number, StaticFileSegment::TransactionSenders)?
322            .user_header_mut()
323            .set_block_range(genesis_block_number, genesis_block_number);
324    }
325
326    // `commit_unwind`` will first commit the DB and then the static file provider, which is
327    // necessary on `init_genesis`.
328    provider_rw.commit()?;
329
330    Ok(hash)
331}
332
333/// Inserts the genesis state into the database.
334pub fn insert_genesis_state<'a, 'b, Provider>(
335    provider: &Provider,
336    alloc: impl Iterator<Item = (&'a Address, &'b GenesisAccount)>,
337) -> ProviderResult<()>
338where
339    Provider: StaticFileProviderFactory
340        + DBProvider<Tx: DbTxMut>
341        + HeaderProvider
342        + StateWriter
343        + ChainSpecProvider
344        + AsRef<Provider>,
345{
346    let genesis_block_number = provider.chain_spec().genesis_header().number();
347    insert_state(provider, alloc, genesis_block_number)
348}
349
350/// Inserts state at given block into database.
351pub fn insert_state<'a, 'b, Provider>(
352    provider: &Provider,
353    alloc: impl Iterator<Item = (&'a Address, &'b GenesisAccount)>,
354    block: u64,
355) -> ProviderResult<()>
356where
357    Provider: StaticFileProviderFactory
358        + DBProvider<Tx: DbTxMut>
359        + HeaderProvider
360        + StateWriter
361        + AsRef<Provider>,
362{
363    let capacity = alloc.size_hint().1.unwrap_or(0);
364    let mut state_init: BundleStateInit =
365        AddressMap::with_capacity_and_hasher(capacity, Default::default());
366    let mut reverts_init: AddressMap<_> =
367        AddressMap::with_capacity_and_hasher(capacity, Default::default());
368    let mut contracts: B256Map<Bytecode> =
369        B256Map::with_capacity_and_hasher(capacity, Default::default());
370
371    for (address, account) in alloc {
372        let bytecode_hash = if let Some(code) = &account.code {
373            match Bytecode::new_raw_checked(code.clone()) {
374                Ok(bytecode) => {
375                    let hash = bytecode.hash_slow();
376                    contracts.insert(hash, bytecode);
377                    Some(hash)
378                }
379                Err(err) => {
380                    error!(%address, %err, "Failed to decode genesis bytecode.");
381                    return Err(DatabaseError::Other(err.to_string()).into());
382                }
383            }
384        } else {
385            None
386        };
387
388        // get state
389        let storage = account
390            .storage
391            .as_ref()
392            .map(|m| {
393                m.iter()
394                    .map(|(key, value)| {
395                        let value = U256::from_be_bytes(value.0);
396                        (*key, (U256::ZERO, value))
397                    })
398                    .collect::<B256Map<_>>()
399            })
400            .unwrap_or_default();
401
402        reverts_init.insert(
403            *address,
404            (Some(None), storage.keys().map(|k| StorageEntry::new(*k, U256::ZERO)).collect()),
405        );
406
407        state_init.insert(
408            *address,
409            (
410                None,
411                Some(Account {
412                    nonce: account.nonce.unwrap_or_default(),
413                    balance: account.balance,
414                    bytecode_hash,
415                }),
416                storage,
417            ),
418        );
419    }
420    let all_reverts_init: RevertsInit = HashMap::from_iter([(block, reverts_init)]);
421
422    let execution_outcome = ExecutionOutcome::new_init(
423        state_init,
424        all_reverts_init,
425        contracts,
426        Vec::default(),
427        block,
428        Vec::new(),
429    );
430
431    provider.write_state(
432        &execution_outcome,
433        OriginalValuesKnown::Yes,
434        StateWriteConfig::default(),
435    )?;
436
437    trace!(target: "reth::cli", "Inserted state");
438
439    Ok(())
440}
441
442/// Inserts hashes for the genesis state.
443pub fn insert_genesis_hashes<'a, 'b, Provider>(
444    provider: &Provider,
445    alloc: impl Iterator<Item = (&'a Address, &'b GenesisAccount)> + Clone,
446) -> ProviderResult<()>
447where
448    Provider: DBProvider<Tx: DbTxMut> + HashingWriter,
449{
450    // insert and hash accounts to hashing table
451    let alloc_accounts = alloc.clone().map(|(addr, account)| (*addr, Some(Account::from(account))));
452    provider.insert_account_for_hashing(alloc_accounts)?;
453
454    trace!(target: "reth::cli", "Inserted account hashes");
455
456    let alloc_storage = alloc.filter_map(|(addr, account)| {
457        // only return Some if there is storage
458        account.storage.as_ref().map(|storage| {
459            (*addr, storage.iter().map(|(&key, &value)| StorageEntry { key, value: value.into() }))
460        })
461    });
462    provider.insert_storage_for_hashing(alloc_storage)?;
463
464    trace!(target: "reth::cli", "Inserted storage hashes");
465
466    Ok(())
467}
468
469/// Inserts header for the genesis state.
470pub fn insert_genesis_header<Provider, Spec>(
471    provider: &Provider,
472    chain: &Spec,
473) -> ProviderResult<()>
474where
475    Provider: StaticFileProviderFactory<Primitives: NodePrimitives<BlockHeader: Compact>>
476        + DBProvider<Tx: DbTxMut>,
477    Spec: EthChainSpec<Header = <Provider::Primitives as NodePrimitives>::BlockHeader>,
478{
479    let (header, block_hash) = (chain.genesis_header(), chain.genesis_hash());
480    let static_file_provider = provider.static_file_provider();
481
482    // Get the actual genesis block number from the header
483    let genesis_block_number = header.number();
484
485    match static_file_provider.block_hash(genesis_block_number) {
486        Ok(None) | Err(ProviderError::MissingStaticFileBlock(StaticFileSegment::Headers, _)) => {
487            let difficulty = header.difficulty();
488
489            // For genesis blocks with non-zero block numbers, we need to ensure they are stored
490            // in the correct static file range. We use get_writer() with the genesis block number
491            // to ensure the genesis block is stored in the correct static file range.
492            let mut writer = static_file_provider
493                .get_writer(genesis_block_number, StaticFileSegment::Headers)?;
494
495            // For non-zero genesis blocks, we need to set block range to genesis_block_number and
496            // append header without increment block
497            if genesis_block_number > 0 {
498                writer
499                    .user_header_mut()
500                    .set_block_range(genesis_block_number, genesis_block_number);
501                writer.append_header_direct(header, difficulty, &block_hash)?;
502            } else {
503                // For zero genesis blocks, use normal append_header
504                writer.append_header(header, &block_hash)?;
505            }
506        }
507        Ok(Some(_)) => {}
508        Err(e) => return Err(e),
509    }
510
511    provider.tx_ref().put::<tables::HeaderNumbers>(block_hash, genesis_block_number)?;
512    provider.tx_ref().put::<tables::BlockBodyIndices>(genesis_block_number, Default::default())?;
513
514    Ok(())
515}
516
517/// Reads account state from a [`BufRead`] reader and initializes it at the highest block that can
518/// be found on database.
519///
520/// It's similar to [`init_genesis`] but supports importing state too big to fit in memory, and can
521/// be set to the highest block present. One practical usecase is to import OP mainnet state at
522/// bedrock transition block.
523pub fn init_from_state_dump<PF>(
524    mut reader: impl BufRead,
525    provider_factory: &PF,
526    etl_config: EtlConfig,
527) -> eyre::Result<B256>
528where
529    PF: DatabaseProviderFactory<
530        ProviderRW: StaticFileProviderFactory
531                        + DBProvider<Tx: DbTxMut>
532                        + BlockNumReader
533                        + BlockHashReader
534                        + ChainSpecProvider
535                        + StageCheckpointWriter
536                        + HistoryWriter
537                        + HeaderProvider
538                        + HashingWriter
539                        + TrieWriter
540                        + StateWriter
541                        + StorageSettingsCache
542                        + RocksDBProviderFactory
543                        + NodePrimitivesProvider
544                        + AsRef<PF::ProviderRW>,
545    >,
546{
547    if etl_config.file_size == 0 {
548        return Err(eyre::eyre!("ETL file size cannot be zero"))
549    }
550
551    let (block, hash, expected_state_root) = {
552        let provider_rw = provider_factory.database_provider_rw()?;
553        let block = provider_rw.last_block_number()?;
554        let hash = provider_rw
555            .block_hash(block)?
556            .ok_or_else(|| eyre::eyre!("Block hash not found for block {}", block))?;
557        let header = provider_rw
558            .header_by_number(block)?
559            .map(|h| SealedHeader::new(h, hash))
560            .ok_or_else(|| ProviderError::HeaderNotFound(block.into()))?;
561        let state_root = header.state_root();
562
563        debug!(target: "reth::cli",
564            block,
565            chain=%provider_rw.chain_spec().chain(),
566            "Initializing state at block"
567        );
568
569        (block, hash, state_root)
570    };
571
572    // first line can be state root
573    let dump_state_root = parse_state_root(&mut reader)?;
574    if expected_state_root != dump_state_root {
575        error!(target: "reth::cli",
576            ?dump_state_root,
577            ?expected_state_root,
578            "State root from state dump does not match state root in current header."
579        );
580        return Err(InitStorageError::StateRootMismatch(GotExpected {
581            got: dump_state_root,
582            expected: expected_state_root,
583        })
584        .into())
585    }
586
587    // remaining lines are accounts
588    let collector = parse_accounts(&mut reader, etl_config)?;
589
590    // write state to db with chunked commits to avoid OOM
591    dump_state(collector, provider_factory, block)?;
592
593    info!(target: "reth::cli", "All accounts written to database, starting state root computation (may take some time)");
594
595    // clear trie tables so state root is computed from scratch
596    {
597        let provider_rw = provider_factory.database_provider_rw()?;
598        provider_rw.tx_ref().clear::<tables::AccountsTrie>()?;
599        provider_rw.tx_ref().clear::<tables::StoragesTrie>()?;
600        provider_rw.commit()?;
601    }
602
603    // compute and compare state root
604    let computed_state_root = compute_state_root_chunked(provider_factory)?;
605    if computed_state_root == expected_state_root {
606        info!(target: "reth::cli",
607            ?computed_state_root,
608            "Computed state root matches state root in state dump"
609        );
610    } else {
611        error!(target: "reth::cli",
612            ?computed_state_root,
613            ?expected_state_root,
614            "Computed state root does not match state root in state dump"
615        );
616
617        return Err(InitStorageError::StateRootMismatch(GotExpected {
618            got: computed_state_root,
619            expected: expected_state_root,
620        })
621        .into())
622    }
623
624    // insert sync stages for stages that require state
625    {
626        let provider_rw = provider_factory.database_provider_rw()?;
627        for stage in StageId::STATE_REQUIRED {
628            provider_rw.save_stage_checkpoint(stage, StageCheckpoint::new(block))?;
629        }
630        provider_rw.commit()?;
631    }
632
633    Ok(hash)
634}
635
636/// Parses and returns expected state root.
637fn parse_state_root(reader: &mut impl BufRead) -> eyre::Result<B256> {
638    let mut line = String::new();
639    reader.read_line(&mut line)?;
640
641    let expected_state_root = serde_json::from_str::<StateRoot>(&line)?.root;
642    trace!(target: "reth::cli",
643        root=%expected_state_root,
644        "Read state root from file"
645    );
646    Ok(expected_state_root)
647}
648
649/// Parses accounts and pushes them to a [`Collector`].
650fn parse_accounts(
651    reader: impl BufRead,
652    etl_config: EtlConfig,
653) -> Result<Collector<Address, GenesisAccount>, eyre::Error> {
654    let mut collector = Collector::new(etl_config.file_size, etl_config.dir);
655    let mut parsed_accounts = 0usize;
656
657    let stream =
658        serde_json::Deserializer::from_reader(reader).into_iter::<GenesisAccountWithAddress>();
659    for account in stream {
660        let GenesisAccountWithAddress { genesis_account, address } = account?;
661        collector.insert(address, genesis_account)?;
662
663        parsed_accounts += 1;
664        if parsed_accounts.is_multiple_of(100_000) {
665            info!(target: "reth::cli", parsed_accounts, "Parsed accounts");
666        }
667    }
668
669    Ok(collector)
670}
671
672/// Takes a [`Collector`] and writes all accounts directly to database tables.
673///
674/// This bypasses the higher-level `insert_state`/`insert_genesis_hashes`/`insert_history`
675/// functions which build intermediate structures (`BundleStateInit`, `RevertsInit`,
676/// `ExecutionOutcome`) that duplicate all storage data 2-3x in memory. For accounts with
677/// millions of storage entries this causes OOM.
678///
679/// Instead, each account is written directly to all required tables using cursor operations,
680/// using `append`/`append_dup` for sorted tables where possible (MDBX fast path that skips
681/// B-tree traversal). Commits happen every [`STORAGE_COMMIT_THRESHOLD`] storage units to
682/// bound MDBX dirty page accumulation.
683///
684/// NOTE: This function is not idempotent. If the process crashes mid-import, the database
685/// must be wiped before retrying.
686fn dump_state<PF>(
687    mut collector: Collector<Address, GenesisAccount>,
688    provider_factory: &PF,
689    block: u64,
690) -> Result<(), eyre::Error>
691where
692    PF: DatabaseProviderFactory<ProviderRW: DBProvider<Tx: DbTxMut>>,
693    PF::ProviderRW: StaticFileProviderFactory
694        + StorageSettingsCache
695        + RocksDBProviderFactory
696        + NodePrimitivesProvider,
697{
698    let storage_settings = provider_factory.database_provider_rw()?.cached_storage_settings();
699    if storage_settings.storage_v2 {
700        return dump_state_v2(collector, provider_factory, block)
701    }
702
703    let accounts_len = collector.len();
704    let mut total_accounts: usize = 0;
705    let mut storage_units: usize = 0;
706
707    // pre-allocate the history list once — every entry uses the same single-block bitmap
708    let history_list = IntegerList::new([block])?;
709
710    // track seen bytecode hashes to avoid re-hashing and re-writing duplicates
711    let mut seen_bytecodes: B256Set = B256Set::default();
712
713    let mut provider_rw = provider_factory.database_provider_rw()?;
714
715    for entry in collector.iter()? {
716        let (address_raw, account_raw) = entry?;
717        let (address, _) = Address::from_compact(address_raw.as_slice(), address_raw.len());
718        let (account, _) = GenesisAccount::from_compact(account_raw.as_slice(), account_raw.len());
719
720        let account_storage_len = account.storage.as_ref().map_or(0, |s| s.len());
721        let account_units = 1 + account_storage_len;
722
723        // commit before this account would push us over the threshold
724        if storage_units > 0 && storage_units + account_units > STORAGE_COMMIT_THRESHOLD {
725            provider_rw.commit()?;
726            provider_rw = provider_factory.database_provider_rw()?;
727            info!(target: "reth::cli",
728                total_accounts,
729                accounts_len,
730                storage_units,
731                "Committed chunk"
732            );
733            storage_units = 0;
734            seen_bytecodes = B256Set::default();
735        }
736
737        write_account_to_db(
738            provider_rw.tx_ref(),
739            &address,
740            &account,
741            block,
742            &history_list,
743            &mut seen_bytecodes,
744        )?;
745
746        total_accounts += 1;
747        storage_units += account_units;
748
749        if total_accounts.is_multiple_of(100_000) {
750            info!(target: "reth::cli", total_accounts, accounts_len, "Writing accounts...");
751        }
752    }
753
754    // commit final batch
755    provider_rw.commit()?;
756
757    info!(target: "reth::cli", total_accounts, "All accounts written to database");
758
759    Ok(())
760}
761
762fn dump_state_v2<PF>(
763    mut collector: Collector<Address, GenesisAccount>,
764    provider_factory: &PF,
765    block: u64,
766) -> Result<(), eyre::Error>
767where
768    PF: DatabaseProviderFactory<
769        ProviderRW: StaticFileProviderFactory
770                        + DBProvider<Tx: DbTxMut>
771                        + StorageSettingsCache
772                        + RocksDBProviderFactory
773                        + NodePrimitivesProvider,
774    >,
775{
776    let accounts_len = collector.len();
777    let mut total_accounts: usize = 0;
778    let mut storage_units: usize = 0;
779
780    // pre-allocate the history list once — every entry uses the same single-block bitmap
781    let history_list = IntegerList::new([block])?;
782
783    // track seen bytecode hashes to avoid re-hashing and re-writing duplicates
784    let mut seen_bytecodes: B256Set = B256Set::default();
785
786    let mut provider_rw = provider_factory.database_provider_rw()?;
787    let static_file_provider = provider_rw.static_file_provider();
788    let rocksdb_provider = provider_rw.rocksdb_provider();
789    let mut history_batch = rocksdb_provider.batch_with_auto_commit();
790    if snapshot_state_tables_empty(provider_rw.tx_ref())? {
791        reset_pre_snapshot_changeset_segment(
792            &static_file_provider,
793            StaticFileSegment::AccountChangeSets,
794            block,
795        )?;
796        reset_pre_snapshot_changeset_segment(
797            &static_file_provider,
798            StaticFileSegment::StorageChangeSets,
799            block,
800        )?;
801    }
802
803    {
804        let mut account_changeset_writer =
805            static_file_provider.get_writer(block, StaticFileSegment::AccountChangeSets)?;
806        let mut storage_changeset_writer =
807            static_file_provider.get_writer(block, StaticFileSegment::StorageChangeSets)?;
808        prepare_account_changeset_writer(&mut account_changeset_writer, block)?;
809        prepare_storage_changeset_writer(&mut storage_changeset_writer, block)?;
810
811        for entry in collector.iter()? {
812            let (address_raw, account_raw) = entry?;
813            let (address, _) = Address::from_compact(address_raw.as_slice(), address_raw.len());
814            let (account, _) =
815                GenesisAccount::from_compact(account_raw.as_slice(), account_raw.len());
816
817            let account_storage_len = account.storage.as_ref().map_or(0, |s| s.len());
818            let account_units = 1 + account_storage_len;
819
820            // commit before this account would push us over the threshold
821            if storage_units > 0 && storage_units + account_units > STORAGE_COMMIT_THRESHOLD {
822                history_batch.commit()?;
823                commit_mdbx_only(provider_rw)?;
824                provider_rw = provider_factory.database_provider_rw()?;
825                history_batch = rocksdb_provider.batch_with_auto_commit();
826                info!(target: "reth::cli",
827                    total_accounts,
828                    accounts_len,
829                    storage_units,
830                    "Committed chunk"
831                );
832                storage_units = 0;
833                seen_bytecodes = B256Set::default();
834            }
835
836            write_account_to_db_v2(
837                provider_rw.tx_ref(),
838                (&mut account_changeset_writer, &mut storage_changeset_writer),
839                &mut history_batch,
840                &address,
841                &account,
842                &history_list,
843                &mut seen_bytecodes,
844            )?;
845
846            total_accounts += 1;
847            storage_units += account_units;
848
849            if total_accounts.is_multiple_of(100_000) {
850                info!(target: "reth::cli", total_accounts, accounts_len, "Writing accounts...");
851            }
852        }
853    }
854
855    history_batch.commit()?;
856    commit_mdbx_only(provider_rw)?;
857    static_file_provider.finalize()?;
858
859    info!(target: "reth::cli", total_accounts, "All accounts written to database");
860
861    Ok(())
862}
863
864fn prepare_account_changeset_writer<N: NodePrimitives>(
865    writer: &mut reth_provider::providers::StaticFileProviderRWRefMut<'_, N>,
866    block: u64,
867) -> ProviderResult<()> {
868    let next_block = writer.next_block_number();
869    if next_block < block {
870        info!(
871            target: "reth::cli",
872            from_block = next_block,
873            to_block = block - 1,
874            "Padding empty account changesets before state import"
875        );
876        for empty_block in next_block..block {
877            writer.append_account_changeset(Vec::new(), empty_block)?;
878            if empty_block > next_block && empty_block.is_multiple_of(1_000_000) {
879                info!(
880                    target: "reth::cli",
881                    padded_to_block = empty_block,
882                    "Padded empty account changesets"
883                );
884            }
885        }
886    }
887
888    writer.begin_account_changeset(block)
889}
890
891fn prepare_storage_changeset_writer<N: NodePrimitives>(
892    writer: &mut reth_provider::providers::StaticFileProviderRWRefMut<'_, N>,
893    block: u64,
894) -> ProviderResult<()> {
895    let next_block = writer.next_block_number();
896    if next_block < block {
897        info!(
898            target: "reth::cli",
899            from_block = next_block,
900            to_block = block - 1,
901            "Padding empty storage changesets before state import"
902        );
903        for empty_block in next_block..block {
904            writer.append_storage_changeset(Vec::new(), empty_block)?;
905            if empty_block > next_block && empty_block.is_multiple_of(1_000_000) {
906                info!(
907                    target: "reth::cli",
908                    padded_to_block = empty_block,
909                    "Padded empty storage changesets"
910                );
911            }
912        }
913    }
914
915    writer.begin_storage_changeset(block)
916}
917
918fn snapshot_state_tables_empty<TX: reth_db_api::transaction::DbTx>(
919    tx: &TX,
920) -> ProviderResult<bool> {
921    Ok(tx.entries::<tables::PlainAccountState>()? == 0 &&
922        tx.entries::<tables::PlainStorageState>()? == 0 &&
923        tx.entries::<tables::HashedAccounts>()? == 0 &&
924        tx.entries::<tables::HashedStorages>()? == 0 &&
925        tx.entries::<tables::AccountChangeSets>()? == 0 &&
926        tx.entries::<tables::StorageChangeSets>()? == 0 &&
927        tx.entries::<tables::Bytecodes>()? == 0)
928}
929
930fn reset_pre_snapshot_changeset_segment<N: NodePrimitives>(
931    static_file_provider: &reth_provider::providers::StaticFileProvider<N>,
932    segment: StaticFileSegment,
933    block: u64,
934) -> ProviderResult<()> {
935    if block == 0 {
936        return Ok(())
937    }
938
939    let Some(highest_block) = static_file_provider.get_highest_static_file_block(segment) else {
940        return Ok(())
941    };
942
943    if highest_block >= block {
944        return Ok(())
945    }
946
947    let file_start = static_file_provider.find_fixed_range(segment, block).start();
948    info!(
949        target: "reth::cli",
950        ?segment,
951        highest_block,
952        import_block = block,
953        file_start,
954        "Resetting pre-snapshot changeset static files before state import"
955    );
956    static_file_provider.delete_segment(segment)?;
957
958    Ok(())
959}
960
961fn commit_mdbx_only<Provider>(provider: Provider) -> ProviderResult<()>
962where
963    Provider: DBProvider<Tx: DbTxMut>,
964{
965    reth_db_api::transaction::DbTx::commit(provider.into_tx()).map_err(ProviderError::from)
966}
967
968/// Writes a single account and all its storage to every required DB table directly,
969/// without building intermediary structures.
970///
971/// Uses `append_dup` for `DupSort` tables where insertion order matches key order (the ETL
972/// collector sorts by address, so `AccountChangeSets`, `PlainStorageState`, and
973/// `StorageChangeSets` receive data in sorted order within each account). For `HashedAccounts`
974/// and `HashedStorages`, insertion order is unsorted (keccak scrambles address order), so we
975/// use `put`/`upsert` which do a full B-tree lookup.
976fn write_account_to_db<TX: DbTxMut>(
977    tx: &TX,
978    address: &Address,
979    genesis_account: &GenesisAccount,
980    block: u64,
981    history_list: &IntegerList,
982    seen_bytecodes: &mut B256Set,
983) -> Result<(), eyre::Error> {
984    let bytecode_hash = if let Some(code) = &genesis_account.code {
985        let bytecode = Bytecode::new_raw_checked(code.clone())
986            .map_err(|e| eyre::eyre!("Invalid bytecode for {address}: {e}"))?;
987        let hash = bytecode.hash_slow();
988        if seen_bytecodes.insert(hash) {
989            tx.put::<tables::Bytecodes>(hash, bytecode)?;
990        }
991        Some(hash)
992    } else {
993        None
994    };
995
996    let account = Account {
997        nonce: genesis_account.nonce.unwrap_or_default(),
998        balance: genesis_account.balance,
999        bytecode_hash,
1000    };
1001
1002    let hashed_address = keccak256(address);
1003
1004    // plain state — sorted by address (ETL order), use append
1005    tx.put::<tables::PlainAccountState>(*address, account)?;
1006
1007    // hashed state — unsorted (keccak scrambles order), must use put
1008    tx.put::<tables::HashedAccounts>(hashed_address, account)?;
1009
1010    // account changeset — DupSort keyed by block, subkey sorted by address (ETL order)
1011    let mut acct_cs_cursor = tx.cursor_dup_write::<tables::AccountChangeSets>()?;
1012    acct_cs_cursor.append_dup(block, AccountBeforeTx { address: *address, info: None })?;
1013
1014    // account history
1015    tx.put::<tables::AccountsHistory>(ShardedKey::new(*address, u64::MAX), history_list.clone())?;
1016
1017    // storage entries
1018    if let Some(storage) = &genesis_account.storage {
1019        let mut hashed_storage_cursor = tx.cursor_dup_write::<tables::HashedStorages>()?;
1020        let mut plain_storage_cursor = tx.cursor_dup_write::<tables::PlainStorageState>()?;
1021        let mut storage_cs_cursor = tx.cursor_dup_write::<tables::StorageChangeSets>()?;
1022
1023        for (&key, &value) in storage {
1024            let value_u256 = U256::from_be_bytes(value.0);
1025
1026            // plain storage — sorted by (address, key), use append_dup
1027            plain_storage_cursor.append_dup(*address, StorageEntry { key, value: value_u256 })?;
1028
1029            // hashed storage — unsorted keccak order, use upsert
1030            let hashed_key = keccak256(key);
1031            hashed_storage_cursor
1032                .upsert(hashed_address, &StorageEntry { key: hashed_key, value: value_u256 })?;
1033
1034            // storage changeset — sorted by (block, address), then by key via append_dup
1035            storage_cs_cursor.append_dup(
1036                BlockNumberAddress((block, *address)),
1037                StorageEntry { key, value: U256::ZERO },
1038            )?;
1039
1040            // storage history
1041            tx.put::<tables::StoragesHistory>(
1042                StorageShardedKey::new(*address, key, u64::MAX),
1043                history_list.clone(),
1044            )?;
1045        }
1046    }
1047
1048    Ok(())
1049}
1050
1051/// Writes a single account to the v2 storage destinations.
1052///
1053/// Storage v2 uses hashed state as the canonical state, static-file change sets, and `RocksDB`
1054/// history indices. The ETL collector yields accounts sorted by address and genesis storage is a
1055/// `BTreeMap`, so the streaming static-file writes preserve the required order.
1056fn write_account_to_db_v2<TX, N>(
1057    tx: &TX,
1058    changeset_writers: (
1059        &mut reth_provider::providers::StaticFileProviderRWRefMut<'_, N>,
1060        &mut reth_provider::providers::StaticFileProviderRWRefMut<'_, N>,
1061    ),
1062    history_batch: &mut reth_provider::providers::RocksDBBatch<'_>,
1063    address: &Address,
1064    genesis_account: &GenesisAccount,
1065    history_list: &IntegerList,
1066    seen_bytecodes: &mut B256Set,
1067) -> Result<(), eyre::Error>
1068where
1069    TX: DbTxMut,
1070    N: NodePrimitives,
1071{
1072    let bytecode_hash = if let Some(code) = &genesis_account.code {
1073        let bytecode = Bytecode::new_raw_checked(code.clone())
1074            .map_err(|e| eyre::eyre!("Invalid bytecode for {address}: {e}"))?;
1075        let hash = bytecode.hash_slow();
1076        if seen_bytecodes.insert(hash) {
1077            tx.put::<tables::Bytecodes>(hash, bytecode)?;
1078        }
1079        Some(hash)
1080    } else {
1081        None
1082    };
1083
1084    let account = Account {
1085        nonce: genesis_account.nonce.unwrap_or_default(),
1086        balance: genesis_account.balance,
1087        bytecode_hash,
1088    };
1089
1090    let hashed_address = keccak256(address);
1091    let (account_changeset_writer, storage_changeset_writer) = changeset_writers;
1092
1093    tx.put::<tables::HashedAccounts>(hashed_address, account)?;
1094    account_changeset_writer
1095        .append_account_changeset_entry(AccountBeforeTx { address: *address, info: None })?;
1096    history_batch
1097        .put::<tables::AccountsHistory>(ShardedKey::new(*address, u64::MAX), history_list)?;
1098
1099    if let Some(storage) = &genesis_account.storage {
1100        let mut hashed_storage_cursor = tx.cursor_dup_write::<tables::HashedStorages>()?;
1101
1102        for (&key, &value) in storage {
1103            let value_u256 = U256::from_be_bytes(value.0);
1104
1105            let hashed_key = keccak256(key);
1106            hashed_storage_cursor
1107                .upsert(hashed_address, &StorageEntry { key: hashed_key, value: value_u256 })?;
1108
1109            storage_changeset_writer.append_storage_changeset_entry(
1110                reth_db_api::models::StorageBeforeTx { address: *address, key, value: U256::ZERO },
1111            )?;
1112
1113            history_batch.put::<tables::StoragesHistory>(
1114                StorageShardedKey::new(*address, key, u64::MAX),
1115                history_list,
1116            )?;
1117        }
1118    }
1119
1120    Ok(())
1121}
1122
1123/// Computes the state root (from scratch) based on the accounts and storages present in the
1124/// database.
1125fn compute_state_root<Provider>(
1126    provider: &Provider,
1127    prefix_sets: Option<TriePrefixSets>,
1128) -> Result<B256, InitStorageError>
1129where
1130    Provider: DBProvider<Tx: DbTxMut> + TrieWriter + StorageSettingsCache,
1131{
1132    reth_trie_db::with_adapter!(provider, |A| {
1133        compute_state_root_inner::<_, A>(provider, prefix_sets)
1134    })
1135}
1136
1137fn compute_state_root_inner<Provider, A>(
1138    provider: &Provider,
1139    prefix_sets: Option<TriePrefixSets>,
1140) -> Result<B256, InitStorageError>
1141where
1142    Provider: DBProvider<Tx: DbTxMut> + TrieWriter + StorageSettingsCache,
1143    A: reth_trie_db::TrieTableAdapter,
1144{
1145    trace!(target: "reth::cli", "Computing state root");
1146
1147    let tx = provider.tx_ref();
1148    let mut intermediate_state: Option<IntermediateStateRootState> = None;
1149    let mut total_flushed_updates = 0;
1150
1151    loop {
1152        let mut state_root =
1153            DbStateRoot::<_, A>::from_tx(tx).with_intermediate_state(intermediate_state);
1154
1155        if let Some(sets) = prefix_sets.clone() {
1156            state_root = state_root.with_prefix_sets(sets);
1157        }
1158
1159        match state_root.root_with_progress()? {
1160            StateRootProgress::Progress(state, _, updates) => {
1161                let updated_len = provider.write_trie_updates(updates)?;
1162                total_flushed_updates += updated_len;
1163
1164                trace!(target: "reth::cli",
1165                    last_account_key = %state.account_root_state.last_hashed_key,
1166                    updated_len,
1167                    total_flushed_updates,
1168                    "Flushing trie updates"
1169                );
1170
1171                intermediate_state = Some(*state);
1172
1173                if total_flushed_updates.is_multiple_of(SOFT_LIMIT_COUNT_FLUSHED_UPDATES) {
1174                    info!(target: "reth::cli",
1175                        total_flushed_updates,
1176                        "Flushing trie updates"
1177                    );
1178                }
1179            }
1180            StateRootProgress::Complete(root, _, updates) => {
1181                let updated_len = provider.write_trie_updates(updates)?;
1182                total_flushed_updates += updated_len;
1183
1184                trace!(target: "reth::cli",
1185                    %root,
1186                    updated_len,
1187                    total_flushed_updates,
1188                    "State root has been computed"
1189                );
1190
1191                return Ok(root)
1192            }
1193        }
1194    }
1195}
1196
1197/// Computes the state root (from scratch) with periodic commits to free MDBX dirty pages.
1198///
1199/// Opens a fresh transaction each iteration to release dirty pages, preventing OOM on large
1200/// states where trie updates accumulate gigabytes of MDBX dirty pages.
1201fn compute_state_root_chunked<PF>(provider_factory: &PF) -> Result<B256, InitStorageError>
1202where
1203    PF: DatabaseProviderFactory<
1204        ProviderRW: DBProvider<Tx: DbTxMut> + TrieWriter + StorageSettingsCache,
1205    >,
1206{
1207    let provider_rw = provider_factory.database_provider_rw().map_err(provider_db_err)?;
1208
1209    reth_trie_db::with_adapter!(&provider_rw, |A| {
1210        drop(provider_rw);
1211        compute_state_root_chunked_inner::<PF, A>(provider_factory)
1212    })
1213}
1214
1215fn compute_state_root_chunked_inner<PF, A>(provider_factory: &PF) -> Result<B256, InitStorageError>
1216where
1217    PF: DatabaseProviderFactory<
1218        ProviderRW: DBProvider<Tx: DbTxMut> + TrieWriter + StorageSettingsCache,
1219    >,
1220    A: reth_trie_db::TrieTableAdapter,
1221{
1222    trace!(target: "reth::cli", "Computing state root");
1223
1224    let mut intermediate_state: Option<IntermediateStateRootState> = None;
1225    let mut total_flushed_updates = 0;
1226
1227    loop {
1228        let provider_rw = provider_factory.database_provider_rw().map_err(provider_db_err)?;
1229        let tx = provider_rw.tx_ref();
1230
1231        let state_root = DbStateRoot::<_, A>::from_tx(tx)
1232            .with_intermediate_state(intermediate_state.take())
1233            .with_threshold(STATE_ROOT_COMMIT_THRESHOLD);
1234
1235        match state_root.root_with_progress()? {
1236            StateRootProgress::Progress(state, _, updates) => {
1237                let updated_len = provider_rw.write_trie_updates(updates)?;
1238                total_flushed_updates += updated_len;
1239
1240                info!(target: "reth::cli",
1241                    last_account_key = %state.account_root_state.last_hashed_key,
1242                    updated_len,
1243                    total_flushed_updates,
1244                    "Flushing trie updates (committing to free memory)"
1245                );
1246
1247                intermediate_state = Some(*state);
1248                provider_rw.commit().map_err(provider_db_err)?;
1249            }
1250            StateRootProgress::Complete(root, _, updates) => {
1251                let updated_len = provider_rw.write_trie_updates(updates)?;
1252                total_flushed_updates += updated_len;
1253
1254                info!(target: "reth::cli",
1255                    %root,
1256                    updated_len,
1257                    total_flushed_updates,
1258                    "State root computation complete"
1259                );
1260
1261                provider_rw.commit().map_err(provider_db_err)?;
1262                return Ok(root)
1263            }
1264        }
1265    }
1266}
1267
1268/// Converts a provider error into an [`InitStorageError`].
1269fn provider_db_err(e: impl std::fmt::Display) -> InitStorageError {
1270    InitStorageError::from(StateRootError::Database(DatabaseError::Other(e.to_string())))
1271}
1272
1273/// Type to deserialize state root from state dump file.
1274#[derive(Debug, Serialize, Deserialize, PartialEq, Eq)]
1275struct StateRoot {
1276    root: B256,
1277}
1278
1279/// An account as in the state dump file. This contains a [`GenesisAccount`] and the account's
1280/// address.
1281#[derive(Debug, Serialize, Deserialize)]
1282struct GenesisAccountWithAddress {
1283    /// The account's balance, nonce, code, and storage.
1284    #[serde(flatten)]
1285    genesis_account: GenesisAccount,
1286    /// The account's address.
1287    address: Address,
1288}
1289
1290#[cfg(test)]
1291mod tests {
1292    use super::*;
1293    use alloy_consensus::constants::{
1294        HOLESKY_GENESIS_HASH, MAINNET_GENESIS_HASH, SEPOLIA_GENESIS_HASH,
1295    };
1296    use alloy_genesis::Genesis;
1297    use reth_chainspec::{Chain, ChainSpec, HOLESKY, MAINNET, SEPOLIA};
1298    use reth_db::DatabaseEnv;
1299    use reth_db_api::{
1300        cursor::DbCursorRO,
1301        models::{storage_sharded_key::StorageShardedKey, IntegerList, ShardedKey},
1302        table::{Table, TableRow},
1303        transaction::DbTx,
1304        Database,
1305    };
1306    use reth_provider::{
1307        test_utils::{create_test_provider_factory_with_chain_spec, MockNodeTypesWithDB},
1308        ProviderFactory, RocksDBProviderFactory,
1309    };
1310    use std::{collections::BTreeMap, sync::Arc};
1311
1312    fn collect_table_entries<DB, T>(
1313        tx: &<DB as Database>::TX,
1314    ) -> Result<Vec<TableRow<T>>, InitStorageError>
1315    where
1316        DB: Database,
1317        T: Table,
1318    {
1319        Ok(tx.cursor_read::<T>()?.walk_range(..)?.collect::<Result<Vec<_>, _>>()?)
1320    }
1321
1322    #[test]
1323    fn parse_accounts_streams_jsonl_accounts() {
1324        let input = br#"{"address":"0x0000000000000000000000000000000000000002","balance":"0x2"}
1325{"address":"0x0000000000000000000000000000000000000001","balance":"0x1"}
1326"#;
1327
1328        let mut collector =
1329            parse_accounts(&input[..], EtlConfig::new(None, 128)).expect("parse succeeds");
1330
1331        let accounts = collector
1332            .iter()
1333            .unwrap()
1334            .map(|entry| {
1335                let (address_raw, account_raw) = entry.unwrap();
1336                let (address, _) = Address::from_compact(address_raw.as_slice(), address_raw.len());
1337                let (account, _) =
1338                    GenesisAccount::from_compact(account_raw.as_slice(), account_raw.len());
1339                (address, account.balance)
1340            })
1341            .collect::<Vec<_>>();
1342
1343        assert_eq!(
1344            accounts,
1345            vec![
1346                (Address::with_last_byte(1), U256::from(1)),
1347                (Address::with_last_byte(2), U256::from(2))
1348            ]
1349        );
1350    }
1351
1352    #[test]
1353    fn dump_state_uses_storage_v2_destinations() {
1354        let storage_key = B256::with_last_byte(3);
1355        let input = br#"{"address":"0x0000000000000000000000000000000000000001","balance":"0x1"}
1356{"address":"0x0000000000000000000000000000000000000002","balance":"0x0","storage":{"0x0000000000000000000000000000000000000000000000000000000000000003":"0x0000000000000000000000000000000000000000000000000000000000000004"}}
1357"#;
1358
1359        let collector = parse_accounts(&input[..], EtlConfig::new(None, 128)).unwrap();
1360        let factory = create_test_provider_factory_with_chain_spec(MAINNET.clone());
1361        factory.set_storage_settings_cache(StorageSettings::v2());
1362        let block = 10;
1363
1364        dump_state(collector, &factory, block).unwrap();
1365
1366        let provider = factory.provider().unwrap();
1367        let tx = provider.tx_ref();
1368        assert_eq!(tx.entries::<tables::PlainAccountState>().unwrap(), 0);
1369        assert_eq!(tx.entries::<tables::PlainStorageState>().unwrap(), 0);
1370        assert_eq!(tx.entries::<tables::AccountChangeSets>().unwrap(), 0);
1371        assert_eq!(tx.entries::<tables::StorageChangeSets>().unwrap(), 0);
1372        assert_eq!(tx.entries::<tables::HashedAccounts>().unwrap(), 2);
1373        assert_eq!(tx.entries::<tables::HashedStorages>().unwrap(), 1);
1374
1375        let address_with_balance = Address::with_last_byte(1);
1376        let address_with_storage = Address::with_last_byte(2);
1377        assert_eq!(
1378            reth_provider::ChangeSetReader::account_block_changeset(&provider, block).unwrap(),
1379            vec![
1380                AccountBeforeTx { address: address_with_balance, info: None },
1381                AccountBeforeTx { address: address_with_storage, info: None }
1382            ]
1383        );
1384        assert_eq!(
1385            reth_provider::StorageChangeSetReader::storage_changeset(&provider, block).unwrap(),
1386            vec![(
1387                BlockNumberAddress((block, address_with_storage)),
1388                StorageEntry { key: storage_key, value: U256::ZERO }
1389            )]
1390        );
1391
1392        let rocksdb = factory.rocksdb_provider();
1393        let accounts = rocksdb
1394            .iter::<tables::AccountsHistory>()
1395            .unwrap()
1396            .collect::<Result<Vec<_>, _>>()
1397            .unwrap();
1398        let storages = rocksdb
1399            .iter::<tables::StoragesHistory>()
1400            .unwrap()
1401            .collect::<Result<Vec<_>, _>>()
1402            .unwrap();
1403
1404        assert_eq!(
1405            accounts,
1406            vec![
1407                (
1408                    ShardedKey::new(address_with_balance, u64::MAX),
1409                    IntegerList::new([block]).unwrap()
1410                ),
1411                (
1412                    ShardedKey::new(address_with_storage, u64::MAX),
1413                    IntegerList::new([block]).unwrap()
1414                )
1415            ]
1416        );
1417        assert_eq!(
1418            storages,
1419            vec![(
1420                StorageShardedKey::new(address_with_storage, storage_key, u64::MAX),
1421                IntegerList::new([block]).unwrap()
1422            )]
1423        );
1424    }
1425
1426    #[test]
1427    fn dump_state_v2_resets_presnapshot_changeset_static_files() {
1428        let storage_key = B256::with_last_byte(3);
1429        let input = br#"{"address":"0x0000000000000000000000000000000000000002","balance":"0x0","storage":{"0x0000000000000000000000000000000000000000000000000000000000000003":"0x0000000000000000000000000000000000000000000000000000000000000004"}}
1430"#;
1431
1432        let collector = parse_accounts(&input[..], EtlConfig::new(None, 128)).unwrap();
1433        let factory = create_test_provider_factory_with_chain_spec(MAINNET.clone());
1434        factory.set_storage_settings_cache(StorageSettings::v2());
1435        let static_files = factory.static_file_provider();
1436
1437        {
1438            let mut writer =
1439                static_files.get_writer(0, StaticFileSegment::AccountChangeSets).unwrap();
1440            writer.append_account_changeset(Vec::new(), 0).unwrap();
1441        }
1442        {
1443            let mut writer =
1444                static_files.get_writer(0, StaticFileSegment::StorageChangeSets).unwrap();
1445            writer.append_storage_changeset(Vec::new(), 0).unwrap();
1446        }
1447        static_files.commit().unwrap();
1448
1449        let block = 500_010;
1450        dump_state(collector, &factory, block).unwrap();
1451        static_files.initialize_index().unwrap();
1452
1453        let provider = factory.provider().unwrap();
1454        let address = Address::with_last_byte(2);
1455        assert!(reth_provider::ChangeSetReader::account_block_changeset(&provider, 5)
1456            .unwrap()
1457            .is_empty());
1458        assert!(reth_provider::StorageChangeSetReader::storage_changeset(&provider, 5)
1459            .unwrap()
1460            .is_empty());
1461
1462        let account_file_start =
1463            static_files.find_fixed_range(StaticFileSegment::AccountChangeSets, block).start();
1464        let storage_file_start =
1465            static_files.find_fixed_range(StaticFileSegment::StorageChangeSets, block).start();
1466        assert_eq!(account_file_start, 500_000);
1467        assert_eq!(storage_file_start, 500_000);
1468        assert!(reth_provider::ChangeSetReader::account_block_changeset(
1469            &provider,
1470            account_file_start
1471        )
1472        .unwrap()
1473        .is_empty());
1474        assert!(reth_provider::StorageChangeSetReader::storage_changeset(
1475            &provider,
1476            storage_file_start
1477        )
1478        .unwrap()
1479        .is_empty());
1480
1481        assert_eq!(
1482            reth_provider::ChangeSetReader::account_block_changeset(&provider, block).unwrap(),
1483            vec![AccountBeforeTx { address, info: None }]
1484        );
1485        assert_eq!(
1486            reth_provider::StorageChangeSetReader::storage_changeset(&provider, block).unwrap(),
1487            vec![(
1488                BlockNumberAddress((block, address)),
1489                StorageEntry { key: storage_key, value: U256::ZERO }
1490            )]
1491        );
1492
1493        let account_offsets = static_files
1494            .get_segment_provider_for_block(StaticFileSegment::AccountChangeSets, block, None)
1495            .unwrap()
1496            .read_changeset_offsets()
1497            .unwrap()
1498            .unwrap();
1499        let storage_offsets = static_files
1500            .get_segment_provider_for_block(StaticFileSegment::StorageChangeSets, block, None)
1501            .unwrap()
1502            .read_changeset_offsets()
1503            .unwrap()
1504            .unwrap();
1505        assert_eq!(account_offsets.len() as u64, block - account_file_start + 1);
1506        assert_eq!(storage_offsets.len() as u64, block - storage_file_start + 1);
1507    }
1508
1509    #[test]
1510    fn success_init_genesis_mainnet() {
1511        let genesis_hash =
1512            init_genesis(&create_test_provider_factory_with_chain_spec(MAINNET.clone())).unwrap();
1513
1514        // actual, expected
1515        assert_eq!(genesis_hash, MAINNET_GENESIS_HASH);
1516    }
1517
1518    #[test]
1519    fn success_init_genesis_sepolia() {
1520        let genesis_hash =
1521            init_genesis(&create_test_provider_factory_with_chain_spec(SEPOLIA.clone())).unwrap();
1522
1523        // actual, expected
1524        assert_eq!(genesis_hash, SEPOLIA_GENESIS_HASH);
1525    }
1526
1527    #[test]
1528    fn success_init_genesis_holesky() {
1529        let genesis_hash =
1530            init_genesis(&create_test_provider_factory_with_chain_spec(HOLESKY.clone())).unwrap();
1531
1532        // actual, expected
1533        assert_eq!(genesis_hash, HOLESKY_GENESIS_HASH);
1534    }
1535
1536    #[test]
1537    fn fail_init_inconsistent_db() {
1538        let factory = create_test_provider_factory_with_chain_spec(SEPOLIA.clone());
1539        let static_file_provider = factory.static_file_provider();
1540        let rocksdb_provider = factory.rocksdb_provider();
1541        init_genesis(&factory).unwrap();
1542
1543        // Try to init db with a different genesis block
1544        let genesis_hash = init_genesis(
1545            &ProviderFactory::<MockNodeTypesWithDB>::new(
1546                factory.into_db(),
1547                MAINNET.clone(),
1548                static_file_provider,
1549                rocksdb_provider,
1550                reth_tasks::Runtime::test(),
1551            )
1552            .unwrap(),
1553        );
1554
1555        assert!(matches!(
1556            genesis_hash.unwrap_err(),
1557            InitStorageError::GenesisHashMismatch {
1558                chainspec_hash: MAINNET_GENESIS_HASH,
1559                storage_hash: SEPOLIA_GENESIS_HASH
1560            }
1561        ))
1562    }
1563
1564    #[test]
1565    fn skip_genesis_hash_validation_accepts_mismatched_db() {
1566        let factory = create_test_provider_factory_with_chain_spec(SEPOLIA.clone());
1567        let static_file_provider = factory.static_file_provider();
1568        let rocksdb_provider = factory.rocksdb_provider();
1569        init_genesis(&factory).unwrap();
1570
1571        let result = init_genesis_with_settings_and_validate(
1572            &ProviderFactory::<MockNodeTypesWithDB>::new(
1573                factory.into_db(),
1574                MAINNET.clone(),
1575                static_file_provider,
1576                rocksdb_provider,
1577                reth_tasks::Runtime::test(),
1578            )
1579            .unwrap(),
1580            StorageSettings::base(),
1581            false,
1582        );
1583
1584        let returned = result.expect("skip_genesis_validation should suppress mismatch error");
1585        assert_eq!(
1586            returned, SEPOLIA_GENESIS_HASH,
1587            "bypass returns the DB-resident hash, not the chainspec hash",
1588        );
1589    }
1590
1591    #[test]
1592    fn init_genesis_history() {
1593        let address_with_balance = Address::with_last_byte(1);
1594        let address_with_storage = Address::with_last_byte(2);
1595        let storage_key = B256::with_last_byte(1);
1596        let chain_spec = Arc::new(ChainSpec {
1597            chain: Chain::from_id(1),
1598            genesis: Genesis {
1599                alloc: BTreeMap::from([
1600                    (
1601                        address_with_balance,
1602                        GenesisAccount { balance: U256::from(1), ..Default::default() },
1603                    ),
1604                    (
1605                        address_with_storage,
1606                        GenesisAccount {
1607                            storage: Some(BTreeMap::from([(storage_key, B256::random())])),
1608                            ..Default::default()
1609                        },
1610                    ),
1611                ]),
1612                ..Default::default()
1613            },
1614            hardforks: Default::default(),
1615            paris_block_and_final_difficulty: None,
1616            deposit_contract: None,
1617            ..Default::default()
1618        });
1619
1620        let factory = create_test_provider_factory_with_chain_spec(chain_spec);
1621        init_genesis(&factory).unwrap();
1622
1623        let expected_accounts = vec![
1624            (ShardedKey::new(address_with_balance, u64::MAX), IntegerList::new([0]).unwrap()),
1625            (ShardedKey::new(address_with_storage, u64::MAX), IntegerList::new([0]).unwrap()),
1626        ];
1627        let expected_storages = vec![(
1628            StorageShardedKey::new(address_with_storage, storage_key, u64::MAX),
1629            IntegerList::new([0]).unwrap(),
1630        )];
1631
1632        let collect_from_mdbx = |factory: &ProviderFactory<MockNodeTypesWithDB>| {
1633            let provider = factory.provider().unwrap();
1634            let tx = provider.tx_ref();
1635            (
1636                collect_table_entries::<DatabaseEnv, tables::AccountsHistory>(tx).unwrap(),
1637                collect_table_entries::<DatabaseEnv, tables::StoragesHistory>(tx).unwrap(),
1638            )
1639        };
1640
1641        {
1642            let settings = factory.cached_storage_settings();
1643            let rocksdb = factory.rocksdb_provider();
1644
1645            let collect_rocksdb = |rocksdb: &reth_provider::providers::RocksDBProvider| {
1646                (
1647                    rocksdb
1648                        .iter::<tables::AccountsHistory>()
1649                        .unwrap()
1650                        .collect::<Result<Vec<_>, _>>()
1651                        .unwrap(),
1652                    rocksdb
1653                        .iter::<tables::StoragesHistory>()
1654                        .unwrap()
1655                        .collect::<Result<Vec<_>, _>>()
1656                        .unwrap(),
1657                )
1658            };
1659
1660            let (accounts, storages) = if settings.storage_v2 {
1661                collect_rocksdb(&rocksdb)
1662            } else {
1663                collect_from_mdbx(&factory)
1664            };
1665            assert_eq!(accounts, expected_accounts);
1666            assert_eq!(storages, expected_storages);
1667        }
1668    }
1669
1670    #[test]
1671    fn warn_storage_settings_mismatch() {
1672        let factory = create_test_provider_factory_with_chain_spec(MAINNET.clone());
1673        init_genesis_with_settings(&factory, StorageSettings::v1()).unwrap();
1674
1675        // Request different settings - should warn but succeed
1676        let result = init_genesis_with_settings(&factory, StorageSettings::v2());
1677
1678        // Should succeed (warning is logged, not an error)
1679        assert!(result.is_ok());
1680    }
1681
1682    #[test]
1683    fn allow_same_storage_settings() {
1684        let factory = create_test_provider_factory_with_chain_spec(MAINNET.clone());
1685        let settings = StorageSettings::v2();
1686        init_genesis_with_settings(&factory, settings).unwrap();
1687
1688        let result = init_genesis_with_settings(&factory, settings);
1689
1690        assert!(result.is_ok());
1691    }
1692}