reth_engine_tree/tree/payload_processor/
bal_prewarm_pool.rs

1use alloy_primitives::{Address, StorageKey};
2use reth_execution_cache::{CachedStateProvider, ExecutionCache, TxPoolPrewarmCacheSnapshot};
3use reth_provider::{
4    AccountReader, BytecodeReader, ProviderResult, StateProvider, StateProviderBox,
5};
6use std::{
7    sync::{
8        atomic::{AtomicUsize, Ordering},
9        Arc,
10    },
11    thread::JoinHandle,
12};
13use tokio::sync::oneshot;
14use tracing::trace;
15
16/// Builds a fresh `StateProviderBox` over the block's parent state. Type-erased so the pool is not
17/// generic over the provider factory; each worker builds its own per block.
18type BuildProviderFn = dyn Fn() -> ProviderResult<StateProviderBox> + Send + Sync;
19
20/// A single warm request: a whole account (basic account + its bytecode) or one storage slot.
21enum PrewarmTarget {
22    Account(Address),
23    Storage(Address, StorageKey),
24}
25
26/// A message in a worker's queue. The per-block lifecycle is explicit and ordered (the queue is
27/// FIFO): one `BeginBlock`, then the worker's share of `Warm`s, then one `EndBlock`.
28enum PrewarmMsg {
29    /// Open a read txn for the new block: build a provider over the parent state and hold it.
30    BeginBlock {
31        build: Arc<BuildProviderFn>,
32        caches: ExecutionCache,
33        txpool_snapshot: Option<TxPoolPrewarmCacheSnapshot>,
34    },
35    /// Warm one target into the held provider's cache. Ignored if no provider is held.
36    Warm(PrewarmTarget),
37    /// Drop the held provider (and its read txn).
38    EndBlock(Arc<SendOnDrop>),
39}
40
41/// Long-lived pool of blocking threads that warm the BAL read-set into the shared execution cache.
42#[derive(Debug)]
43pub(crate) struct BalPrewarmPool {
44    /// One queue per worker. `BeginBlock`/`EndBlock` are broadcast to all; `Warm`s round-robin.
45    workers: Vec<crossbeam_channel::Sender<PrewarmMsg>>,
46    /// Round-robin cursor for distributing warm requests across workers.
47    next: AtomicUsize,
48    _handles: Vec<JoinHandle<()>>,
49}
50
51impl BalPrewarmPool {
52    /// Spawns `num_threads` long-lived blocking worker threads. Owned by the
53    /// [`PayloadProcessor`](super::PayloadProcessor); the threads exit when the pool is dropped.
54    pub(crate) fn new(num_threads: usize) -> Arc<Self> {
55        let mut workers = Vec::with_capacity(num_threads);
56        let mut handles = Vec::with_capacity(num_threads);
57        for i in 0..num_threads {
58            let (tx, rx) = crossbeam_channel::unbounded::<PrewarmMsg>();
59            workers.push(tx);
60            handles.push(
61                std::thread::Builder::new()
62                    .name(format!("bal-prewarm-{i:03}"))
63                    .spawn(move || prewarm_loop(rx))
64                    .expect("spawn bal-prewarm thread"),
65            );
66        }
67        trace!(target: "engine::tree::bal_prewarm_pool", num_threads, "BalPrewarmPool spawned");
68        Arc::new(Self { workers, next: AtomicUsize::new(0), _handles: handles })
69    }
70
71    /// Begins a block: hands every worker the provider builder and shared cache so each opens its
72    /// own read txn over the parent state. Pair with [`end_block`](Self::end_block).
73    pub(crate) fn begin_block(
74        &self,
75        build: Arc<BuildProviderFn>,
76        caches: ExecutionCache,
77        txpool_snapshot: Option<TxPoolPrewarmCacheSnapshot>,
78    ) {
79        for worker in &self.workers {
80            let _ = worker.send(PrewarmMsg::BeginBlock {
81                build: build.clone(),
82                caches: caches.clone(),
83                txpool_snapshot: txpool_snapshot.clone(),
84            });
85        }
86    }
87
88    /// Fire-and-forget: warm an account (basic account + bytecode) on some worker.
89    pub(crate) fn warm_account(&self, addr: Address) {
90        self.send_warm(PrewarmTarget::Account(addr));
91    }
92
93    /// Fire-and-forget: warm one storage slot on some worker.
94    pub(crate) fn warm_storage(&self, addr: Address, slot: StorageKey) {
95        self.send_warm(PrewarmTarget::Storage(addr, slot));
96    }
97
98    /// Ends the block: every worker drops its provider (and read txn) once it has drained the warm
99    /// requests queued ahead of this message.
100    ///
101    /// Blocks until all workers processed the end block message.
102    pub(crate) fn end_block(&self) {
103        let (tx, rx) = oneshot::channel();
104        let tx = Arc::new(SendOnDrop { sender: Some(tx) });
105
106        for worker in &self.workers {
107            let _ = worker.send(PrewarmMsg::EndBlock(tx.clone()));
108        }
109
110        drop(tx);
111        rx.blocking_recv().expect("BAL prewarm pool dropped without signaling completion");
112    }
113
114    fn send_warm(&self, target: PrewarmTarget) {
115        let i = self.next.fetch_add(1, Ordering::Relaxed) % self.workers.len();
116        let _ = self.workers[i].send(PrewarmMsg::Warm(target));
117    }
118}
119
120/// Number of warming threads.
121///
122/// The work performed on those threads boils down mostly to MDBX reads. An MDBX read consists of
123/// a tree traversal and major page faults causing I/O.
124///
125/// In order to utilize the parallelism of `NVMe` we have to give it enough work, or equally,
126/// maintain a high queue depth. Modern `NVMe` devices require in between 64-128 requests in-flight
127/// to achieve its peak performance. Ideally we don't grow past that but it's OK to do so, it just
128/// means that a request is going to wait in the `NVMe` queue rather than in memory.
129///
130/// MDBX piggy-backs on the OS page cache for its buffers. Oftentimes, the hit rate reaches 90-99%
131/// hit rate. At that point, the workload can be classified as CPU-bound. In that case, having
132/// a high number of threads is counterproductive due to the effects of context switching, core
133/// migration, contention, etc.
134///
135/// However, that overhead is considered negligible compared to the benefits of fully utilizing
136/// `NVMe` resources. For example, with request latency of 100µs, 100k IO requests the expected
137/// time to finish is 312.5ms at QD=32 and 156.26ms at QD=64.
138///
139/// This should explain why this particular value is picked.
140pub(crate) const DEFAULT_BAL_PREWARM_THREADS: usize = 128;
141
142fn prewarm_loop(rx: crossbeam_channel::Receiver<PrewarmMsg>) {
143    // The provider (and its MDBX read txn) held for the current block, between `BeginBlock` and
144    // `EndBlock`. `None` while idle, so no read txn is pinned across the inter-block gap.
145    let mut provider: Option<CachedStateProvider<StateProviderBox>> = None;
146
147    // Blocks when idle; the channel disconnects (and the loop ends) when the pool is dropped.
148    while let Ok(msg) = rx.recv() {
149        match msg {
150            PrewarmMsg::BeginBlock { build, caches, txpool_snapshot } => {
151                provider = match (build)() {
152                    Ok(inner) => Some(
153                        CachedStateProvider::new_prewarm(inner, caches)
154                            .with_txpool_snapshot(txpool_snapshot),
155                    ),
156                    Err(err) => {
157                        trace!(target: "engine::tree::bal_prewarm_pool", %err, "failed to build provider");
158                        None
159                    }
160                };
161            }
162            PrewarmMsg::Warm(target) => {
163                let Some(provider) = provider.as_ref() else { continue };
164                match target {
165                    PrewarmTarget::Account(addr) => {
166                        if let Ok(Some(account)) = provider.basic_account(&addr) &&
167                            let Some(code_hash) = account.bytecode_hash &&
168                            code_hash != alloy_consensus::constants::KECCAK_EMPTY
169                        {
170                            let _ = provider.bytecode_by_hash(&code_hash);
171                        }
172                    }
173                    PrewarmTarget::Storage(addr, slot) => {
174                        let _ = provider.storage(addr, slot);
175                    }
176                }
177            }
178            PrewarmMsg::EndBlock(end_tx) => {
179                provider = None;
180                drop(end_tx);
181            }
182        }
183    }
184}
185
186struct SendOnDrop {
187    sender: Option<oneshot::Sender<()>>,
188}
189
190impl Drop for SendOnDrop {
191    fn drop(&mut self) {
192        if let Some(sender) = self.sender.take() {
193            let _ = sender.send(());
194        }
195    }
196}
reth_engine_tree/tree/payload_processor/bal_prewarm_pool.rs

reth_engine_tree/tree/payload_processor/
bal_prewarm_pool.rs