reth_engine_tree/tree/
cached_state.rs

1//! Implements a state provider that has a shared cache in front of it.
2use alloy_primitives::{Address, StorageKey, StorageValue, B256};
3use metrics::Gauge;
4use mini_moka::sync::CacheBuilder;
5use reth_errors::ProviderResult;
6use reth_metrics::Metrics;
7use reth_primitives_traits::{Account, Bytecode};
8use reth_provider::{
9    AccountReader, BlockHashReader, HashedPostStateProvider, StateProofProvider, StateProvider,
10    StateRootProvider, StorageRootProvider,
11};
12use reth_revm::db::BundleState;
13use reth_trie::{
14    updates::TrieUpdates, AccountProof, HashedPostState, HashedStorage, MultiProof,
15    MultiProofTargets, StorageMultiProof, StorageProof, TrieInput,
16};
17use revm_primitives::map::DefaultHashBuilder;
18use std::time::Duration;
19use tracing::trace;
20
21pub(crate) type Cache<K, V> =
22    mini_moka::sync::Cache<K, V, alloy_primitives::map::DefaultHashBuilder>;
23
24/// A wrapper of a state provider and a shared cache.
25pub(crate) struct CachedStateProvider<S> {
26    /// The state provider
27    state_provider: S,
28
29    /// The caches used for the provider
30    caches: ProviderCaches,
31
32    /// Metrics for the cached state provider
33    metrics: CachedStateMetrics,
34}
35
36impl<S> CachedStateProvider<S>
37where
38    S: StateProvider,
39{
40    /// Creates a new [`CachedStateProvider`] from a [`ProviderCaches`], state provider, and
41    /// [`CachedStateMetrics`].
42    pub(crate) const fn new_with_caches(
43        state_provider: S,
44        caches: ProviderCaches,
45        metrics: CachedStateMetrics,
46    ) -> Self {
47        Self { state_provider, caches, metrics }
48    }
49}
50
51/// Metrics for the cached state provider, showing hits / misses for each cache
52#[derive(Metrics, Clone)]
53#[metrics(scope = "sync.caching")]
54pub(crate) struct CachedStateMetrics {
55    /// Code cache hits
56    code_cache_hits: Gauge,
57
58    /// Code cache misses
59    code_cache_misses: Gauge,
60
61    /// Code cache size
62    ///
63    /// NOTE: this uses the moka caches' `entry_count`, NOT the `weighted_size` method to calculate
64    /// size.
65    code_cache_size: Gauge,
66
67    /// Storage cache hits
68    storage_cache_hits: Gauge,
69
70    /// Storage cache misses
71    storage_cache_misses: Gauge,
72
73    /// Storage cache size
74    ///
75    /// NOTE: this uses the moka caches' `entry_count`, NOT the `weighted_size` method to calculate
76    /// size.
77    storage_cache_size: Gauge,
78
79    /// Account cache hits
80    account_cache_hits: Gauge,
81
82    /// Account cache misses
83    account_cache_misses: Gauge,
84
85    /// Account cache size
86    ///
87    /// NOTE: this uses the moka caches' `entry_count`, NOT the `weighted_size` method to calculate
88    /// size.
89    account_cache_size: Gauge,
90}
91
92impl CachedStateMetrics {
93    /// Sets all values to zero, indicating that a new block is being executed.
94    pub(crate) fn reset(&self) {
95        // code cache
96        self.code_cache_hits.set(0);
97        self.code_cache_misses.set(0);
98
99        // storage cache
100        self.storage_cache_hits.set(0);
101        self.storage_cache_misses.set(0);
102
103        // account cache
104        self.account_cache_hits.set(0);
105        self.account_cache_misses.set(0);
106    }
107
108    /// Returns a new zeroed-out instance of [`CachedStateMetrics`].
109    pub(crate) fn zeroed() -> Self {
110        let zeroed = Self::default();
111        zeroed.reset();
112        zeroed
113    }
114}
115
116impl<S: AccountReader> AccountReader for CachedStateProvider<S> {
117    fn basic_account(&self, address: &Address) -> ProviderResult<Option<Account>> {
118        if let Some(res) = self.caches.account_cache.get(address) {
119            self.metrics.account_cache_hits.increment(1);
120            return Ok(res)
121        }
122
123        self.metrics.account_cache_misses.increment(1);
124
125        let res = self.state_provider.basic_account(address)?;
126        self.caches.account_cache.insert(*address, res);
127        Ok(res)
128    }
129}
130
131impl<S: StateProvider> StateProvider for CachedStateProvider<S> {
132    fn storage(
133        &self,
134        account: Address,
135        storage_key: StorageKey,
136    ) -> ProviderResult<Option<StorageValue>> {
137        if let Some(res) = self.caches.get_storage(&account, &storage_key) {
138            self.metrics.storage_cache_hits.increment(1);
139            return Ok(res)
140        }
141
142        self.metrics.storage_cache_misses.increment(1);
143
144        let final_res = self.state_provider.storage(account, storage_key)?;
145        self.caches.insert_storage(account, storage_key, final_res);
146        Ok(final_res)
147    }
148
149    fn bytecode_by_hash(&self, code_hash: &B256) -> ProviderResult<Option<Bytecode>> {
150        if let Some(res) = self.caches.code_cache.get(code_hash) {
151            self.metrics.code_cache_hits.increment(1);
152            return Ok(res)
153        }
154
155        self.metrics.code_cache_misses.increment(1);
156
157        let final_res = self.state_provider.bytecode_by_hash(code_hash)?;
158        self.caches.code_cache.insert(*code_hash, final_res.clone());
159        Ok(final_res)
160    }
161}
162
163impl<S: StateRootProvider> StateRootProvider for CachedStateProvider<S> {
164    fn state_root(&self, hashed_state: HashedPostState) -> ProviderResult<B256> {
165        self.state_provider.state_root(hashed_state)
166    }
167
168    fn state_root_from_nodes(&self, input: TrieInput) -> ProviderResult<B256> {
169        self.state_provider.state_root_from_nodes(input)
170    }
171
172    fn state_root_with_updates(
173        &self,
174        hashed_state: HashedPostState,
175    ) -> ProviderResult<(B256, TrieUpdates)> {
176        self.state_provider.state_root_with_updates(hashed_state)
177    }
178
179    fn state_root_from_nodes_with_updates(
180        &self,
181        input: TrieInput,
182    ) -> ProviderResult<(B256, TrieUpdates)> {
183        self.state_provider.state_root_from_nodes_with_updates(input)
184    }
185}
186
187impl<S: StateProofProvider> StateProofProvider for CachedStateProvider<S> {
188    fn proof(
189        &self,
190        input: TrieInput,
191        address: Address,
192        slots: &[B256],
193    ) -> ProviderResult<AccountProof> {
194        self.state_provider.proof(input, address, slots)
195    }
196
197    fn multiproof(
198        &self,
199        input: TrieInput,
200        targets: MultiProofTargets,
201    ) -> ProviderResult<MultiProof> {
202        self.state_provider.multiproof(input, targets)
203    }
204
205    fn witness(
206        &self,
207        input: TrieInput,
208        target: HashedPostState,
209    ) -> ProviderResult<Vec<alloy_primitives::Bytes>> {
210        self.state_provider.witness(input, target)
211    }
212}
213
214impl<S: StorageRootProvider> StorageRootProvider for CachedStateProvider<S> {
215    fn storage_root(
216        &self,
217        address: Address,
218        hashed_storage: HashedStorage,
219    ) -> ProviderResult<B256> {
220        self.state_provider.storage_root(address, hashed_storage)
221    }
222
223    fn storage_proof(
224        &self,
225        address: Address,
226        slot: B256,
227        hashed_storage: HashedStorage,
228    ) -> ProviderResult<StorageProof> {
229        self.state_provider.storage_proof(address, slot, hashed_storage)
230    }
231
232    fn storage_multiproof(
233        &self,
234        address: Address,
235        slots: &[B256],
236        hashed_storage: HashedStorage,
237    ) -> ProviderResult<StorageMultiProof> {
238        self.state_provider.storage_multiproof(address, slots, hashed_storage)
239    }
240}
241
242impl<S: BlockHashReader> BlockHashReader for CachedStateProvider<S> {
243    fn block_hash(&self, number: alloy_primitives::BlockNumber) -> ProviderResult<Option<B256>> {
244        self.state_provider.block_hash(number)
245    }
246
247    fn canonical_hashes_range(
248        &self,
249        start: alloy_primitives::BlockNumber,
250        end: alloy_primitives::BlockNumber,
251    ) -> ProviderResult<Vec<B256>> {
252        self.state_provider.canonical_hashes_range(start, end)
253    }
254}
255
256impl<S: HashedPostStateProvider> HashedPostStateProvider for CachedStateProvider<S> {
257    fn hashed_post_state(&self, bundle_state: &reth_revm::db::BundleState) -> HashedPostState {
258        self.state_provider.hashed_post_state(bundle_state)
259    }
260}
261
262/// The set of caches that are used in the [`CachedStateProvider`].
263#[derive(Debug, Clone)]
264pub(crate) struct ProviderCaches {
265    /// The cache for bytecode
266    code_cache: Cache<B256, Option<Bytecode>>,
267
268    /// The cache for storage, organized hierarchically by account
269    storage_cache: Cache<Address, AccountStorageCache>,
270
271    /// The cache for basic accounts
272    account_cache: Cache<Address, Option<Account>>,
273}
274
275impl ProviderCaches {
276    /// Get storage value from hierarchical cache
277    pub(crate) fn get_storage(
278        &self,
279        address: &Address,
280        key: &StorageKey,
281    ) -> Option<Option<StorageValue>> {
282        self.storage_cache.get(address).and_then(|account_cache| account_cache.get_storage(key))
283    }
284
285    /// Insert storage value into hierarchical cache
286    pub(crate) fn insert_storage(
287        &self,
288        address: Address,
289        key: StorageKey,
290        value: Option<StorageValue>,
291    ) {
292        let account_cache = self.storage_cache.get(&address).unwrap_or_else(|| {
293            let account_cache = AccountStorageCache::default();
294            self.storage_cache.insert(address, account_cache.clone());
295            account_cache
296        });
297        account_cache.insert_storage(key, value);
298    }
299
300    /// Invalidate storage for specific account
301    pub(crate) fn invalidate_account_storage(&self, address: &Address) {
302        self.storage_cache.invalidate(address);
303    }
304
305    /// Returns the total number of storage slots cached across all accounts
306    pub(crate) fn total_storage_slots(&self) -> usize {
307        self.storage_cache.iter().map(|addr| addr.len()).sum()
308    }
309
310    /// Inserts the [`BundleState`] entries into the cache.
311    ///
312    /// Returns an error if state can't be cached and the should be discarded.
313    pub(crate) fn insert_state(&self, state_updates: &BundleState) -> Result<(), ()> {
314        for (addr, account) in &state_updates.state {
315            // If the account was not modified, as in not changed and not destroyed, then we have
316            // nothing to do w.r.t. this particular account and can move on
317            if account.status.is_not_modified() {
318                continue
319            }
320
321            // if the account was destroyed, invalidate from the account / storage caches
322            if account.was_destroyed() {
323                // invalidate the account cache entry if destroyed
324                self.account_cache.invalidate(addr);
325
326                self.invalidate_account_storage(addr);
327                continue
328            }
329
330            // if we have an account that was modified, but it has a `None` account info, some wild
331            // error has occurred because this state should be unrepresentable. An account with
332            // `None` current info, should be destroyed.
333            let Some(ref account_info) = account.info else {
334                trace!(target: "engine::caching", ?account, "Account with None account info found in state updates");
335                return Err(())
336            };
337
338            // insert will update if present, so we just use the new account info as the new value
339            // for the account cache
340            self.account_cache.insert(*addr, Some(Account::from(account_info)));
341
342            // now we iterate over all storage and make updates to the cached storage values
343            for (storage_key, slot) in &account.storage {
344                // we convert the storage key from U256 to B256 because that is how it's represented
345                // in the cache
346                self.insert_storage(*addr, (*storage_key).into(), Some(slot.present_value));
347            }
348        }
349
350        Ok(())
351    }
352}
353
354/// A builder for [`ProviderCaches`].
355#[derive(Debug)]
356pub(crate) struct ProviderCacheBuilder {
357    /// Code cache entries
358    code_cache_entries: u64,
359
360    /// Storage cache entries
361    storage_cache_entries: u64,
362
363    /// Account cache entries
364    account_cache_entries: u64,
365}
366
367impl ProviderCacheBuilder {
368    /// Build a [`ProviderCaches`] struct, so that provider caches can be easily cloned.
369    pub(crate) fn build_caches(self, total_cache_size: u64) -> ProviderCaches {
370        let storage_cache_size = (total_cache_size * 8888) / 10000; // 88.88% of total
371        let account_cache_size = (total_cache_size * 556) / 10000; // 5.56% of total
372        let code_cache_size = (total_cache_size * 556) / 10000; // 5.56% of total
373
374        const EXPIRY_TIME: Duration = Duration::from_secs(7200); // 2 hours
375        const TIME_TO_IDLE: Duration = Duration::from_secs(3600); // 1 hour
376
377        let storage_cache = CacheBuilder::new(self.storage_cache_entries)
378            .weigher(|_key: &Address, value: &AccountStorageCache| -> u32 {
379                // values based on results from measure_storage_cache_overhead test
380                let base_weight = 39_000;
381                let slots_weight = value.len() * 218;
382                (base_weight + slots_weight) as u32
383            })
384            .max_capacity(storage_cache_size)
385            .time_to_live(EXPIRY_TIME)
386            .time_to_idle(TIME_TO_IDLE)
387            .build_with_hasher(DefaultHashBuilder::default());
388
389        let account_cache = CacheBuilder::new(self.account_cache_entries)
390            .weigher(|_key: &Address, value: &Option<Account>| -> u32 {
391                match value {
392                    Some(account) => {
393                        let mut weight = 40;
394                        if account.nonce != 0 {
395                            weight += 32;
396                        }
397                        if !account.balance.is_zero() {
398                            weight += 32;
399                        }
400                        if account.bytecode_hash.is_some() {
401                            weight += 33; // size of Option<B256>
402                        } else {
403                            weight += 8; // size of None variant
404                        }
405                        weight as u32
406                    }
407                    None => 8, // size of None variant
408                }
409            })
410            .max_capacity(account_cache_size)
411            .time_to_live(EXPIRY_TIME)
412            .time_to_idle(TIME_TO_IDLE)
413            .build_with_hasher(DefaultHashBuilder::default());
414
415        let code_cache = CacheBuilder::new(self.code_cache_entries)
416            .weigher(|_key: &B256, value: &Option<Bytecode>| -> u32 {
417                match value {
418                    Some(bytecode) => {
419                        // base weight + actual bytecode size
420                        (40 + bytecode.len()) as u32
421                    }
422                    None => 8, // size of None variant
423                }
424            })
425            .max_capacity(code_cache_size)
426            .time_to_live(EXPIRY_TIME)
427            .time_to_idle(TIME_TO_IDLE)
428            .build_with_hasher(DefaultHashBuilder::default());
429
430        ProviderCaches { code_cache, storage_cache, account_cache }
431    }
432}
433
434impl Default for ProviderCacheBuilder {
435    fn default() -> Self {
436        // With weigher and max_capacity in place, these numbers represent
437        // the maximum number of entries that can be stored, not the actual
438        // memory usage which is controlled by max_capacity.
439        //
440        // Code cache: up to 10M entries but limited to 0.5GB
441        // Storage cache: up to 10M accounts but limited to 8GB
442        // Account cache: up to 10M accounts but limited to 0.5GB
443        Self {
444            code_cache_entries: 10_000_000,
445            storage_cache_entries: 10_000_000,
446            account_cache_entries: 10_000_000,
447        }
448    }
449}
450
451/// A saved cache that has been used for executing a specific block, which has been updated for its
452/// execution.
453#[derive(Debug, Clone)]
454pub(crate) struct SavedCache {
455    /// The hash of the block these caches were used to execute.
456    hash: B256,
457
458    /// The caches used for the provider.
459    caches: ProviderCaches,
460
461    /// Metrics for the cached state provider
462    metrics: CachedStateMetrics,
463}
464
465impl SavedCache {
466    /// Creates a new instance with the internals
467    pub(super) const fn new(
468        hash: B256,
469        caches: ProviderCaches,
470        metrics: CachedStateMetrics,
471    ) -> Self {
472        Self { hash, caches, metrics }
473    }
474
475    /// Returns the hash for this cache
476    pub(crate) const fn executed_block_hash(&self) -> B256 {
477        self.hash
478    }
479
480    /// Splits the cache into its caches and metrics, consuming it.
481    pub(crate) fn split(self) -> (ProviderCaches, CachedStateMetrics) {
482        (self.caches, self.metrics)
483    }
484
485    /// Returns the [`ProviderCaches`] belonging to the tracked hash.
486    pub(crate) fn cache(&self) -> &ProviderCaches {
487        &self.caches
488    }
489
490    /// Updates the metrics for the [`ProviderCaches`].
491    pub(crate) fn update_metrics(&self) {
492        self.metrics.storage_cache_size.set(self.caches.total_storage_slots() as f64);
493        self.metrics.account_cache_size.set(self.caches.account_cache.entry_count() as f64);
494        self.metrics.code_cache_size.set(self.caches.code_cache.entry_count() as f64);
495    }
496}
497
498/// Cache for an account's storage slots
499#[derive(Debug, Clone)]
500pub(crate) struct AccountStorageCache {
501    /// The storage slots for this account
502    slots: Cache<StorageKey, Option<StorageValue>>,
503}
504
505impl AccountStorageCache {
506    /// Create a new [`AccountStorageCache`]
507    pub(crate) fn new(max_slots: u64) -> Self {
508        Self {
509            slots: CacheBuilder::new(max_slots).build_with_hasher(DefaultHashBuilder::default()),
510        }
511    }
512
513    /// Get a storage value
514    pub(crate) fn get_storage(&self, key: &StorageKey) -> Option<Option<StorageValue>> {
515        self.slots.get(key)
516    }
517
518    /// Insert a storage value
519    pub(crate) fn insert_storage(&self, key: StorageKey, value: Option<StorageValue>) {
520        self.slots.insert(key, value);
521    }
522
523    /// Returns the number of slots in the cache
524    pub(crate) fn len(&self) -> usize {
525        self.slots.entry_count() as usize
526    }
527}
528
529impl Default for AccountStorageCache {
530    fn default() -> Self {
531        // With weigher and max_capacity in place, this number represents
532        // the maximum number of entries that can be stored, not the actual
533        // memory usage which is controlled by storage cache's max_capacity.
534        Self::new(1_000_000)
535    }
536}
537
538#[cfg(test)]
539mod tests {
540    use super::*;
541    use rand::Rng;
542    use std::mem::size_of;
543
544    mod tracking_allocator {
545        use std::{
546            alloc::{GlobalAlloc, Layout, System},
547            sync::atomic::{AtomicUsize, Ordering},
548        };
549
550        #[derive(Debug)]
551        pub(crate) struct TrackingAllocator {
552            allocated: AtomicUsize,
553            total_allocated: AtomicUsize,
554            inner: System,
555        }
556
557        impl TrackingAllocator {
558            pub(crate) const fn new() -> Self {
559                Self {
560                    allocated: AtomicUsize::new(0),
561                    total_allocated: AtomicUsize::new(0),
562                    inner: System,
563                }
564            }
565
566            pub(crate) fn reset(&self) {
567                self.allocated.store(0, Ordering::SeqCst);
568                self.total_allocated.store(0, Ordering::SeqCst);
569            }
570
571            pub(crate) fn total_allocated(&self) -> usize {
572                self.total_allocated.load(Ordering::SeqCst)
573            }
574        }
575
576        unsafe impl GlobalAlloc for TrackingAllocator {
577            unsafe fn alloc(&self, layout: Layout) -> *mut u8 {
578                let ret = self.inner.alloc(layout);
579                if !ret.is_null() {
580                    self.allocated.fetch_add(layout.size(), Ordering::SeqCst);
581                    self.total_allocated.fetch_add(layout.size(), Ordering::SeqCst);
582                }
583                ret
584            }
585
586            unsafe fn dealloc(&self, ptr: *mut u8, layout: Layout) {
587                self.allocated.fetch_sub(layout.size(), Ordering::SeqCst);
588                self.inner.dealloc(ptr, layout)
589            }
590        }
591    }
592
593    use tracking_allocator::TrackingAllocator;
594
595    #[global_allocator]
596    static ALLOCATOR: TrackingAllocator = TrackingAllocator::new();
597
598    fn measure_allocation<T, F>(f: F) -> (usize, T)
599    where
600        F: FnOnce() -> T,
601    {
602        ALLOCATOR.reset();
603        let result = f();
604        let total = ALLOCATOR.total_allocated();
605        (total, result)
606    }
607
608    #[test]
609    fn measure_storage_cache_overhead() {
610        let (base_overhead, cache) = measure_allocation(|| AccountStorageCache::new(1000));
611        println!("Base AccountStorageCache overhead: {} bytes", base_overhead);
612        let mut rng = rand::thread_rng();
613
614        let key = StorageKey::random();
615        let value = StorageValue::from(rng.gen::<u128>());
616        let (first_slot, _) = measure_allocation(|| {
617            cache.insert_storage(key, Some(value));
618        });
619        println!("First slot insertion overhead: {} bytes", first_slot);
620
621        const TOTAL_SLOTS: usize = 10_000;
622        let (test_slots, _) = measure_allocation(|| {
623            for _ in 0..TOTAL_SLOTS {
624                let key = StorageKey::random();
625                let value = StorageValue::from(rng.gen::<u128>());
626                cache.insert_storage(key, Some(value));
627            }
628        });
629        println!("Average overhead over {} slots: {} bytes", TOTAL_SLOTS, test_slots / TOTAL_SLOTS);
630
631        println!("\nTheoretical sizes:");
632        println!("StorageKey size: {} bytes", size_of::<StorageKey>());
633        println!("StorageValue size: {} bytes", size_of::<StorageValue>());
634        println!("Option<StorageValue> size: {} bytes", size_of::<Option<StorageValue>>());
635        println!("Option<B256> size: {} bytes", size_of::<Option<B256>>());
636    }
637}