Skip to main content

reth_bench/bench/
generate_big_block.rs

1//! Command for generating large blocks by merging transactions from consecutive real blocks.
2//!
3//! This command fetches consecutive blocks from an RPC until a target gas usage is reached,
4//! takes block 0 as the "base" payload, concatenates transactions from subsequent blocks,
5//! and saves the result to disk as a [`BigBlockData`] JSON file containing the merged
6//! [`ExecutionData`] and environment switches at each block boundary.
7
8use alloy_consensus::TxEnvelope;
9use alloy_eips::{
10    eip7928::{AccountChanges, BlockAccessList, SlotChanges},
11    Typed2718,
12};
13use alloy_primitives::{Bytes, B256};
14use alloy_provider::{
15    network::{AnyNetwork, AnyRpcBlock},
16    Provider, RootProvider,
17};
18use alloy_rpc_client::ClientBuilder;
19use alloy_rpc_types_engine::{ExecutionData, ExecutionPayload};
20use clap::Parser;
21use eyre::Context;
22use futures::{stream, Stream, StreamExt};
23use reth_chainspec::EthChainSpec;
24use reth_cli::chainspec::ChainSpecParser;
25use reth_cli_runner::CliContext;
26use reth_engine_primitives::{BigBlockData, ExecutionPayload as _};
27use reth_ethereum_cli::chainspec::EthereumChainSpecParser;
28use std::{
29    collections::{HashMap, HashSet},
30    future::Future,
31};
32use tracing::{info, warn};
33
34/// A single transaction with its gas used and raw encoded bytes.
35#[derive(Debug, Clone)]
36pub struct RawTransaction {
37    /// The actual gas used by the transaction (from receipt).
38    pub gas_used: u64,
39    /// The transaction type (e.g., 3 for EIP-4844 blob txs).
40    pub tx_type: u8,
41    /// The raw RLP-encoded transaction bytes.
42    pub raw: Bytes,
43}
44
45/// Abstraction over sources of transactions for big block generation.
46///
47/// Implementors provide transactions from different sources (RPC, database, files, etc.)
48pub trait TransactionSource {
49    /// Fetch transactions from a specific block number.
50    ///
51    /// Returns `Ok(None)` if the block doesn't exist.
52    /// Returns `Ok(Some((transactions, gas_used)))` with the block's transactions and total gas.
53    fn fetch_block_transactions(
54        &self,
55        block_number: u64,
56    ) -> impl Future<Output = eyre::Result<Option<(Vec<RawTransaction>, u64)>>> + Send;
57}
58
59/// RPC-based transaction source that fetches from a remote node.
60#[derive(Debug)]
61pub struct RpcTransactionSource {
62    provider: RootProvider<AnyNetwork>,
63}
64
65impl RpcTransactionSource {
66    /// Create a new RPC transaction source.
67    pub const fn new(provider: RootProvider<AnyNetwork>) -> Self {
68        Self { provider }
69    }
70
71    /// Create from an RPC URL with retry backoff.
72    pub fn from_url(rpc_url: &str) -> eyre::Result<Self> {
73        let client = ClientBuilder::default()
74            .layer(alloy_transport::layers::RetryBackoffLayer::new(10, 800, u64::MAX))
75            .http(rpc_url.parse()?);
76        let provider = RootProvider::<AnyNetwork>::new(client);
77        Ok(Self { provider })
78    }
79}
80
81impl TransactionSource for RpcTransactionSource {
82    async fn fetch_block_transactions(
83        &self,
84        block_number: u64,
85    ) -> eyre::Result<Option<(Vec<RawTransaction>, u64)>> {
86        // Fetch block and receipts in parallel
87        let (block, receipts) = tokio::try_join!(
88            self.provider.get_block_by_number(block_number.into()).full(),
89            self.provider.get_block_receipts(block_number.into())
90        )?;
91
92        let Some(block) = block else {
93            return Ok(None);
94        };
95
96        let Some(receipts) = receipts else {
97            return Err(eyre::eyre!("Receipts not found for block {}", block_number));
98        };
99
100        let block_gas_used = block.header.gas_used;
101
102        // Convert cumulative gas from receipts to per-tx gas_used
103        let mut prev_cumulative = 0u64;
104        let transactions: Vec<RawTransaction> = block
105            .transactions
106            .txns()
107            .zip(receipts.iter())
108            .map(|(tx, receipt)| {
109                let cumulative = receipt.inner.inner.inner.receipt.cumulative_gas_used;
110                let gas_used = cumulative - prev_cumulative;
111                prev_cumulative = cumulative;
112
113                let with_encoded = tx.inner.inner.clone().into_encoded();
114                RawTransaction {
115                    gas_used,
116                    tx_type: tx.inner.ty(),
117                    raw: with_encoded.encoded_bytes().clone(),
118                }
119            })
120            .collect();
121
122        Ok(Some((transactions, block_gas_used)))
123    }
124}
125
126/// Collects transactions from a source up to a target gas usage.
127#[derive(Debug)]
128pub struct TransactionCollector<S> {
129    source: S,
130    target_gas: u64,
131}
132
133impl<S: TransactionSource> TransactionCollector<S> {
134    /// Create a new transaction collector.
135    pub const fn new(source: S, target_gas: u64) -> Self {
136        Self { source, target_gas }
137    }
138
139    /// Collect transactions starting from the given block number.
140    ///
141    /// Skips blob transactions (type 3) and collects until target gas is reached.
142    /// Returns a `CollectionResult` with transactions, gas info, and next block.
143    pub async fn collect(&self, start_block: u64) -> eyre::Result<CollectionResult> {
144        self.collect_gas(start_block, self.target_gas).await
145    }
146
147    /// Collect transactions up to a specific gas target.
148    ///
149    /// This is used both for initial collection and for retry top-ups.
150    pub async fn collect_gas(
151        &self,
152        start_block: u64,
153        gas_target: u64,
154    ) -> eyre::Result<CollectionResult> {
155        let mut transactions: Vec<RawTransaction> = Vec::new();
156        let mut total_gas: u64 = 0;
157        let mut current_block = start_block;
158
159        while total_gas < gas_target {
160            let Some((block_txs, _)) = self.source.fetch_block_transactions(current_block).await?
161            else {
162                tracing::warn!(target: "reth-bench", block = current_block, "Block not found, stopping");
163                break;
164            };
165
166            for tx in block_txs {
167                // Skip blob transactions (EIP-4844, type 3)
168                if tx.tx_type == 3 {
169                    continue;
170                }
171
172                if total_gas + tx.gas_used <= gas_target {
173                    total_gas += tx.gas_used;
174                    transactions.push(tx);
175                }
176
177                if total_gas >= gas_target {
178                    break;
179                }
180            }
181
182            current_block += 1;
183
184            // Stop early if remaining gas is under 1M (close enough to target)
185            let remaining_gas = gas_target.saturating_sub(total_gas);
186            if remaining_gas < 1_000_000 {
187                break;
188            }
189        }
190
191        info!(
192            target: "reth-bench",
193            total_txs = transactions.len(),
194            gas_sent = total_gas,
195            next_block = current_block,
196            "Finished collecting transactions"
197        );
198
199        Ok(CollectionResult { transactions, gas_sent: total_gas, next_block: current_block })
200    }
201}
202
203/// Result of collecting transactions from blocks.
204#[derive(Debug)]
205pub struct CollectionResult {
206    /// Collected transactions with their gas info.
207    pub transactions: Vec<RawTransaction>,
208    /// Total gas sent (sum of historical `gas_used` for all collected txs).
209    pub gas_sent: u64,
210    /// Next block number to continue collecting from.
211    pub next_block: u64,
212}
213
214/// State used to continue generated big block replay from the benchmark node's current tip.
215#[derive(Debug, Clone, Default)]
216pub(crate) struct BigBlocksInitialState {
217    /// Real block hashes from previous big blocks, used for BLOCKHASH lookups.
218    pub prior_block_hashes: Vec<(u64, B256)>,
219    /// Block number to assign to the first generated synthetic block.
220    pub next_synthetic_block_number: u64,
221}
222
223/// `reth bench generate-big-block` command
224///
225/// Generates a large block by fetching consecutive blocks from an RPC, merging their
226/// transactions into a single payload, and saving the result to disk.
227#[derive(Debug, Parser)]
228pub struct Command {
229    /// The RPC URL to use for fetching blocks.
230    #[arg(long, value_name = "RPC_URL")]
231    rpc_url: String,
232
233    /// The chain name or path to a chain spec JSON file.
234    #[arg(long, value_name = "CHAIN", default_value = "mainnet")]
235    chain: String,
236
237    /// Block number to start from.
238    #[arg(long, value_name = "FROM_BLOCK")]
239    from_block: u64,
240
241    /// Target gas usage per big block. Consecutive real blocks are merged until
242    /// this gas target is reached (or exceeded by the last included block).
243    /// Accepts optional suffixes: K (thousand), M (million), G (billion).
244    #[arg(long, value_name = "TARGET_GAS", value_parser = super::helpers::parse_gas_limit)]
245    target_gas: u64,
246
247    /// Number of sequential big blocks to generate.
248    ///
249    /// Each big block merges real blocks until `--target-gas` is reached.
250    /// Sequential big blocks are chained: block N+1's `parent_hash` is set to
251    /// block N's computed hash.
252    #[arg(long, value_name = "NUM_BIG_BLOCKS", default_value = "1")]
253    num_big_blocks: u64,
254
255    /// Output directory for generated payloads.
256    #[arg(long, value_name = "OUTPUT_DIR")]
257    output_dir: std::path::PathBuf,
258
259    /// Query `eth_getBlockAccessListByBlockNumber` for each fetched block and persist
260    /// the flattened BAL on the stored payload.
261    #[arg(long, default_value_t = false)]
262    bal: bool,
263
264    /// Maximum number of in-flight RPC fetches to keep buffered ahead of the merger.
265    ///
266    /// Each entry is one full per-block fetch (block + receipts, plus BAL when `--bal` is
267    /// set). Larger values absorb RPC latency at the cost of more concurrent connections
268    /// and memory; the buffer persists across `--num-big-blocks` so prefetching continues
269    /// across big-block boundaries.
270    #[arg(long, value_name = "PREFETCH_BUFFER", default_value_t = 32)]
271    prefetch_buffer: usize,
272}
273
274impl Command {
275    /// Execute the `generate-big-block` command.
276    pub async fn execute(self, _ctx: CliContext) -> eyre::Result<()> {
277        if self.target_gas == 0 {
278            return Err(eyre::eyre!("--target-gas must be greater than 0"));
279        }
280        if self.num_big_blocks == 0 {
281            return Err(eyre::eyre!("--num-big-blocks must be at least 1"));
282        }
283
284        // Resolve chain spec for blob params lookup
285        let chain_spec = EthereumChainSpecParser::parse(&self.chain)
286            .wrap_err_with(|| format!("Failed to parse chain spec: {}", self.chain))?;
287
288        info!(
289            target: "reth-bench",
290            from_block = self.from_block,
291            target_gas = self.target_gas,
292            num_big_blocks = self.num_big_blocks,
293            include_bal = self.bal,
294            chain = %chain_spec.chain(),
295            output_dir = %self.output_dir.display(),
296            "Generating big block payloads"
297        );
298
299        // Create output directory
300        std::fs::create_dir_all(&self.output_dir).wrap_err_with(|| {
301            format!("Failed to create output directory: {:?}", self.output_dir)
302        })?;
303
304        // Set up RPC provider
305        let client = ClientBuilder::default()
306            .layer(alloy_transport::layers::RetryBackoffLayer::new(10, 800, u64::MAX))
307            .http(self.rpc_url.parse()?);
308        let provider = RootProvider::<AnyNetwork>::new(client);
309
310        // Persistent prefetch stream: keeps `prefetch_buffer` per-block fetches in flight
311        // ahead of the merger across all big blocks. Each item is a fully materialized
312        // `FetchedBlock` (or `None` once the chain tip is reached on this fetch).
313        let prefetch_buffer = self.prefetch_buffer.max(1);
314        let bal_enabled = self.bal;
315        let block_stream = stream::iter(self.from_block..)
316            .map(|block_number| {
317                let provider = provider.clone();
318                async move { fetch_one_block(provider, block_number, bal_enabled).await }
319            })
320            .buffered(prefetch_buffer);
321
322        let mut big_blocks_stream =
323            Box::pin(big_blocks_stream(self.num_big_blocks, self.target_gas, block_stream, None));
324
325        while let Some(big_block) = big_blocks_stream.next().await {
326            let big_block = big_block?;
327            // Save to disk
328            let range_start = big_block.env_switches[0].block_number();
329            let range_end = big_block.env_switches.last().unwrap().block_number();
330            let block_hash = big_block.block_hash();
331            let filename = format!("big_block_{range_start}_to_{range_end}.json");
332            let filepath = self.output_dir.join(&filename);
333            let json = serde_json::to_string_pretty(&big_block)?;
334            std::fs::write(&filepath, &json)
335                .wrap_err_with(|| format!("Failed to write payload to {:?}", filepath))?;
336
337            info!(
338                target: "reth-bench",
339                path = %filepath.display(),
340                block_hash = %block_hash,
341                "Big block payload saved"
342            );
343        }
344
345        Ok(())
346    }
347}
348
349/// Produces a stream of big block payloads given a stream of regular blocks.
350pub(crate) fn big_blocks_stream(
351    num_big_blocks: u64,
352    target_gas: u64,
353    block_stream: impl Stream<Item = eyre::Result<Option<(AnyRpcBlock, Option<BlockAccessList>)>>>
354        + Unpin,
355    initial_state: Option<BigBlocksInitialState>,
356) -> impl Stream<Item = eyre::Result<BigBlockData<ExecutionData>>> {
357    futures::stream::try_unfold(
358        (
359            block_stream,
360            initial_state.as_ref().map(|s| s.prior_block_hashes.clone()).unwrap_or_default(),
361            0,
362            initial_state.as_ref().map(|s| s.next_synthetic_block_number),
363            false,
364            0,
365        ),
366        move |(
367            mut block_stream,
368            mut accumulated_block_hashes,
369            mut big_block_idx,
370            next_synthetic_block_number,
371            mut reached_chain_tip,
372            mut first_block,
373        )| async move {
374            if reached_chain_tip || num_big_blocks == big_block_idx {
375                warn!(
376                    target: "reth-bench",
377                    generated = big_block_idx + 1,
378                    requested = num_big_blocks,
379                    "Reached chain tip, stopping generation early"
380                );
381                return Ok(None);
382            }
383
384            // Drain the prefetch stream until the gas target is reached for this big block.
385            let mut blocks = Vec::new();
386            let mut block_access_lists: Vec<Option<BlockAccessList>> = Vec::new();
387            let mut accumulated_block_gas: u64 = 0;
388
389            while accumulated_block_gas < target_gas {
390                let (block, block_access_list) = match block_stream.next().await {
391                    Some(Ok(Some(fetched))) => fetched,
392                    Some(Ok(None)) => {
393                        warn!(
394                            target: "reth-bench",
395                            "Block not found — reached chain tip"
396                        );
397                        reached_chain_tip = true;
398                        break;
399                    }
400                    Some(Err(e)) => return Err(e),
401                    // The block-number stream is open-ended; this only fires if the
402                    // upstream `iter(from..)` is somehow exhausted.
403                    None => {
404                        reached_chain_tip = true;
405                        break;
406                    }
407                };
408
409                if first_block == 0 {
410                    first_block = block.header.number;
411                }
412
413                let block = block
414                    .into_inner()
415                    .map_header(|header| header.map(|h| h.into_header_with_defaults()))
416                    .try_map_transactions(|tx| -> eyre::Result<TxEnvelope> {
417                        tx.try_into().map_err(|_| eyre::eyre!("unsupported tx type"))
418                    })?
419                    .into_consensus();
420
421                let (payload, sidecar) = ExecutionPayload::from_block_slow(&block);
422                let execution_data = ExecutionData { payload, sidecar };
423
424                let block_gas = execution_data.payload.as_v1().gas_used;
425
426                accumulated_block_gas += block_gas;
427                blocks.push(execution_data);
428                block_access_lists.push(block_access_list);
429            }
430
431            // If we hit the chain tip without fetching any blocks, stop generating.
432            if blocks.is_empty() {
433                warn!(
434                    target: "reth-bench",
435                    big_block = big_block_idx,
436                    requested = num_big_blocks,
437                    "No blocks available, stopping generation early"
438                );
439                return Ok(None);
440            }
441
442            let mut merged_block_access_list = None;
443            let mut cumulative_tx_count = 0;
444
445            for (block_idx, (block_data, block_access_list)) in
446                blocks.iter().zip(block_access_lists).enumerate()
447            {
448                if let Some(block_access_list) = block_access_list {
449                    merge_block_access_list(
450                        merged_block_access_list.get_or_insert_with(Default::default),
451                        block_access_list,
452                        cumulative_tx_count as u64,
453                        block_idx as u64,
454                    );
455                }
456
457                cumulative_tx_count += block_data.transaction_count();
458            }
459
460            if let Some(block_access_list) = &mut merged_block_access_list {
461                block_access_list.sort_unstable_by_key(|account| account.address);
462                for account in block_access_list {
463                    account.sort();
464                }
465            }
466
467            let big_block = BigBlockData {
468                env_switches: blocks,
469                prior_block_hashes: accumulated_block_hashes.clone(),
470                block_number: next_synthetic_block_number.unwrap_or(first_block) + big_block_idx,
471                merged_block_access_list: merged_block_access_list
472                    .as_ref()
473                    .map(|list| alloy_rlp::encode(list).into()),
474            };
475
476            // Accumulate real block hashes from this big block's env_switches for
477            // subsequent big blocks' BLOCKHASH lookups. Cap at 256 entries since the
478            // BLOCKHASH opcode only looks back 256 blocks.
479            for switch_data in &big_block.env_switches {
480                let block_number = switch_data.block_number();
481                let block_hash = switch_data.block_hash();
482                accumulated_block_hashes.push((block_number, block_hash));
483            }
484            if accumulated_block_hashes.len() > 256 {
485                let excess = accumulated_block_hashes.len() - 256;
486                accumulated_block_hashes.drain(..excess);
487            }
488
489            info!(
490                target: "reth-bench",
491                block_hash = %big_block.block_hash(),
492                total_txs = %big_block.transaction_count(),
493                total_gas_used = %big_block.gas_used(),
494                env_switches = %big_block.env_switches.len(),
495                prior_block_hashes = %big_block.prior_block_hashes.len(),
496                bal_accounts = %merged_block_access_list.as_ref().map_or(0, Vec::len),
497                "Generated big block"
498            );
499
500            big_block_idx += 1;
501            Ok(Some((
502                big_block,
503                (
504                    block_stream,
505                    accumulated_block_hashes,
506                    big_block_idx,
507                    next_synthetic_block_number,
508                    reached_chain_tip,
509                    first_block,
510                ),
511            )))
512        },
513    )
514}
515
516/// Fetches one block + receipts (and optionally its BAL) from the RPC. Returns `Ok(None)`
517/// when the block doesn't exist yet (chain-tip reached).
518async fn fetch_one_block(
519    provider: RootProvider<AnyNetwork>,
520    block_number: u64,
521    bal_enabled: bool,
522) -> eyre::Result<Option<(AnyRpcBlock, Option<BlockAccessList>)>> {
523    let (rpc_block, block_access_list) =
524        tokio::try_join!(provider.get_block_by_number(block_number.into()).full(), async {
525            if bal_enabled {
526                provider.get_block_access_list_by_number(block_number.into()).await
527            } else {
528                Ok(None)
529            }
530        })?;
531    let Some(rpc_block) = rpc_block else {
532        return Ok(None);
533    };
534
535    Ok(Some((rpc_block, block_access_list)))
536}
537
538fn merge_block_access_list(
539    merged: &mut BlockAccessList,
540    incoming: BlockAccessList,
541    tx_index_offset: u64,
542    segment_idx: u64,
543) {
544    let mut account_positions = merged
545        .iter()
546        .enumerate()
547        .map(|(idx, account)| (account.address, idx))
548        .collect::<HashMap<_, _>>();
549
550    for mut account_changes in incoming {
551        shift_account_changes(&mut account_changes, tx_index_offset, segment_idx);
552
553        if let Some(&idx) = account_positions.get(&account_changes.address) {
554            merge_account_changes(&mut merged[idx], account_changes);
555        } else {
556            account_positions.insert(account_changes.address, merged.len());
557            merged.push(account_changes);
558        }
559    }
560}
561
562fn shift_account_changes(
563    account_changes: &mut AccountChanges,
564    tx_index_offset: u64,
565    segment_idx: u64,
566) {
567    // Per-block BALs use block_access_index = 0 for pre-execution writes
568    // (system contract calls before any tx), 1..tx_count for tx commits, and
569    // tx_count+1 for post-execution.
570    //
571    // Renumbering: each segment boundary reserves two distinct bal_indexes —
572    // one for the prior segment's `finish()` (post-execution withdrawals +
573    // EIP-7002/7251 system calls) and one for the new segment's
574    // `apply_pre_execution_changes()` (EIP-2935/EIP-4788). The renumbered
575    // bal_index for a block-local idx in segment `k` is
576    // `idx + tx_index_offset + 2*k`. This ensures BAL workers reading via
577    // `BalWrites::get` (strict less-than) see all prior segments' boundary
578    // writes.
579    let shift = tx_index_offset + 2 * segment_idx;
580    for slot_changes in &mut account_changes.storage_changes {
581        for change in &mut slot_changes.changes {
582            change.block_access_index += shift;
583        }
584    }
585    for change in &mut account_changes.balance_changes {
586        change.block_access_index += shift;
587    }
588    for change in &mut account_changes.nonce_changes {
589        change.block_access_index += shift;
590    }
591    for change in &mut account_changes.code_changes {
592        change.block_access_index += shift;
593    }
594}
595
596fn merge_account_changes(existing: &mut AccountChanges, incoming: AccountChanges) {
597    merge_slot_changes(&mut existing.storage_changes, incoming.storage_changes);
598    existing.storage_reads.extend(incoming.storage_reads);
599    existing.balance_changes.extend(incoming.balance_changes);
600    existing.nonce_changes.extend(incoming.nonce_changes);
601    existing.code_changes.extend(incoming.code_changes);
602
603    // EIP-7928 invariant: a slot must appear in either storage_changes or storage_reads,
604    // not both. Per-block BALs respect this, but merging blocks can produce a slot
605    // that is read in one block and changed in another. Without this normalization,
606    // an empty read entry can shadow the real writes during BAL deserialization,
607    // making reads of that slot fall through to stale snapshot state.
608    let written: HashSet<_> =
609        existing.storage_changes.iter().map(|slot_changes| slot_changes.slot).collect();
610    existing.storage_reads.retain(|slot| !written.contains(slot));
611    let mut seen = HashSet::with_capacity(existing.storage_reads.len());
612    existing.storage_reads.retain(|slot| seen.insert(*slot));
613}
614
615fn merge_slot_changes(existing: &mut Vec<SlotChanges>, incoming: Vec<SlotChanges>) {
616    let mut slot_positions = existing
617        .iter()
618        .enumerate()
619        .map(|(idx, slot_changes)| (slot_changes.slot, idx))
620        .collect::<HashMap<_, _>>();
621
622    for slot_changes in incoming {
623        if let Some(&idx) = slot_positions.get(&slot_changes.slot) {
624            existing[idx].changes.extend(slot_changes.changes);
625        } else {
626            slot_positions.insert(slot_changes.slot, existing.len());
627            existing.push(slot_changes);
628        }
629    }
630}
631
632/// Computes the block hash for an [`ExecutionData`] by converting it to a raw block
633/// and hashing the header.
634pub fn compute_payload_block_hash(data: &ExecutionData) -> eyre::Result<B256> {
635    let block = data
636        .payload
637        .clone()
638        .into_block_with_sidecar_raw(&data.sidecar)
639        .wrap_err("failed to convert payload to block for hash computation")?;
640    Ok(block.header.hash_slow())
641}
642
643#[cfg(test)]
644mod tests {
645    use super::*;
646    use alloy_eips::eip7928::{BalanceChange, CodeChange, NonceChange, StorageChange};
647    use alloy_primitives::{Address, U256};
648
649    #[test]
650    fn merge_block_access_list_offsets_and_merges_accounts() {
651        let shared = Address::repeat_byte(0x11);
652        let other = Address::repeat_byte(0x22);
653
654        let mut merged = vec![AccountChanges {
655            address: shared,
656            storage_changes: vec![SlotChanges::new(
657                U256::from(1),
658                vec![StorageChange::new(0, U256::from(10))],
659            )],
660            storage_reads: vec![U256::from(3)],
661            balance_changes: vec![BalanceChange::new(1, U256::from(100))],
662            nonce_changes: vec![NonceChange::new(2, 7)],
663            code_changes: vec![],
664        }];
665
666        let incoming = vec![
667            AccountChanges {
668                address: shared,
669                storage_changes: vec![
670                    SlotChanges::new(U256::from(1), vec![StorageChange::new(1, U256::from(20))]),
671                    SlotChanges::new(U256::from(2), vec![StorageChange::new(2, U256::from(30))]),
672                ],
673                storage_reads: vec![U256::from(4)],
674                balance_changes: vec![BalanceChange::new(0, U256::from(150))],
675                nonce_changes: vec![NonceChange::new(2, 8)],
676                code_changes: vec![CodeChange::new(1, Bytes::from_static(&[0xaa]))],
677            },
678            AccountChanges {
679                address: other,
680                storage_changes: vec![SlotChanges::new(
681                    U256::from(9),
682                    vec![StorageChange::new(0, U256::from(90))],
683                )],
684                storage_reads: vec![],
685                balance_changes: vec![],
686                nonce_changes: vec![],
687                code_changes: vec![],
688            },
689        ];
690
691        merge_block_access_list(&mut merged, incoming, 3, 0);
692
693        assert_eq!(merged.len(), 2);
694
695        let shared = &merged[0];
696        assert_eq!(shared.storage_reads, vec![U256::from(3), U256::from(4)]);
697        assert_eq!(
698            shared
699                .balance_changes
700                .iter()
701                .map(|change| change.block_access_index)
702                .collect::<Vec<_>>(),
703            vec![1, 3]
704        );
705        assert_eq!(
706            shared.nonce_changes.iter().map(|change| change.block_access_index).collect::<Vec<_>>(),
707            vec![2, 5]
708        );
709        assert_eq!(shared.code_changes[0].block_access_index, 4);
710
711        let slot_one = shared
712            .storage_changes
713            .iter()
714            .find(|slot_changes| slot_changes.slot == U256::from(1))
715            .unwrap();
716        assert_eq!(
717            slot_one.changes.iter().map(|change| change.block_access_index).collect::<Vec<_>>(),
718            vec![0, 4]
719        );
720
721        let slot_two = shared
722            .storage_changes
723            .iter()
724            .find(|slot_changes| slot_changes.slot == U256::from(2))
725            .unwrap();
726        assert_eq!(slot_two.changes[0].block_access_index, 5);
727
728        let other = &merged[1];
729        assert_eq!(other.address, Address::repeat_byte(0x22));
730        assert_eq!(other.storage_changes[0].changes[0].block_access_index, 3);
731    }
732
733    #[test]
734    fn merge_account_changes_normalizes_storage_reads_after_cross_block_merge() {
735        let address = Address::repeat_byte(0x33);
736        const A: U256 = U256::from_limbs([1, 0, 0, 0]);
737        const B: U256 = U256::from_limbs([2, 0, 0, 0]);
738        const C: U256 = U256::from_limbs([3, 0, 0, 0]);
739        const D: U256 = U256::from_limbs([4, 0, 0, 0]);
740
741        // Each AccountChanges value is valid on its own: storage slots only appear in
742        // either reads or changes. The invalid read/change overlap is introduced when
743        // these per-block BAL entries are merged for a standalone big block.
744        let mut existing = AccountChanges {
745            address,
746            storage_changes: vec![SlotChanges::new(A, vec![StorageChange::new(0, U256::from(10))])],
747            storage_reads: vec![B, C],
748            balance_changes: vec![],
749            nonce_changes: vec![],
750            code_changes: vec![],
751        };
752
753        // B is read before it is written by the incoming block, and A is written before
754        // it appears as a read in the incoming block. C is read in both blocks, so the
755        // merge should also dedupe it. D remains read-only.
756        let incoming = AccountChanges {
757            address,
758            storage_changes: vec![SlotChanges::new(B, vec![StorageChange::new(1, U256::from(20))])],
759            storage_reads: vec![A, C, D],
760            balance_changes: vec![],
761            nonce_changes: vec![],
762            code_changes: vec![],
763        };
764
765        merge_account_changes(&mut existing, incoming);
766
767        // Written slots remain represented by storage_changes, while storage_reads only
768        // keeps unique read-only slots in first-seen order.
769        assert_eq!(
770            existing
771                .storage_changes
772                .iter()
773                .map(|slot_changes| slot_changes.slot)
774                .collect::<Vec<_>>(),
775            vec![A, B]
776        );
777        assert_eq!(existing.storage_reads, vec![C, D]);
778        assert!(existing.storage_reads.iter().all(|read_slot| {
779            !existing.storage_changes.iter().any(|slot_changes| slot_changes.slot == *read_slot)
780        }));
781    }
782}