Skip to main content

reth_engine_tree/tree/
metrics.rs

1use crate::tree::{error::InsertBlockFatalError, TreeOutcome};
2use alloy_rpc_types_engine::{PayloadStatus, PayloadStatusEnum};
3use reth_engine_primitives::{ForkchoiceStatus, OnForkChoiceUpdated};
4use reth_errors::ProviderError;
5use reth_evm::metrics::ExecutorMetrics;
6use reth_execution_types::BlockExecutionOutput;
7use reth_metrics::{
8    metrics::{Counter, Gauge, Histogram},
9    Metrics,
10};
11use reth_primitives_traits::constants::gas_units::MEGAGAS;
12use reth_trie::updates::TrieUpdates;
13use std::time::{Duration, Instant};
14
15/// Upper bounds for each gas bucket. The last bucket is a catch-all for
16/// everything above the final threshold: <5M, 5-10M, 10-20M, 20-30M, 30-40M, >40M.
17const GAS_BUCKET_THRESHOLDS: [u64; 5] =
18    [5 * MEGAGAS, 10 * MEGAGAS, 20 * MEGAGAS, 30 * MEGAGAS, 40 * MEGAGAS];
19
20/// Total number of gas buckets (thresholds + 1 catch-all).
21const NUM_GAS_BUCKETS: usize = GAS_BUCKET_THRESHOLDS.len() + 1;
22
23/// Metrics for the `EngineApi`.
24#[derive(Debug, Default)]
25pub struct EngineApiMetrics {
26    /// Engine API-specific metrics.
27    pub engine: EngineMetrics,
28    /// Block executor metrics.
29    pub executor: ExecutorMetrics,
30    /// Metrics for block validation
31    pub block_validation: BlockValidationMetrics,
32    /// Canonical chain and reorg related metrics
33    pub tree: TreeMetrics,
34    /// Metrics for EIP-7928 Block-Level Access Lists (BAL).
35    #[allow(dead_code)]
36    pub(crate) bal: BalMetrics,
37}
38
39impl EngineApiMetrics {
40    /// Records metrics for block execution.
41    ///
42    /// This method updates metrics for execution time, gas usage, and the number
43    /// of accounts, storage slots and bytecodes updated.
44    pub fn record_block_execution<R>(
45        &self,
46        output: &BlockExecutionOutput<R>,
47        execution_duration: Duration,
48    ) {
49        let execution_secs = execution_duration.as_secs_f64();
50        let gas_used = output.result.gas_used;
51
52        // Update gas metrics
53        self.executor.gas_processed_total.increment(gas_used);
54        self.executor.gas_per_second.set(gas_used as f64 / execution_secs);
55        self.executor.gas_used_histogram.record(gas_used as f64);
56        self.executor.execution_histogram.record(execution_secs);
57        self.executor.execution_duration.set(execution_secs);
58
59        // Update the metrics for the number of accounts, storage slots and bytecodes
60        let accounts = output.state.state.len();
61        let storage_slots =
62            output.state.state.values().map(|account| account.storage.len()).sum::<usize>();
63        let bytecodes = output.state.contracts.len();
64
65        self.executor.accounts_updated_histogram.record(accounts as f64);
66        self.executor.storage_slots_updated_histogram.record(storage_slots as f64);
67        self.executor.bytecodes_updated_histogram.record(bytecodes as f64);
68    }
69
70    /// Returns a reference to the executor metrics for use in state hooks.
71    pub const fn executor_metrics(&self) -> &ExecutorMetrics {
72        &self.executor
73    }
74
75    /// Records the duration of block pre-execution changes (e.g., beacon root update).
76    pub fn record_pre_execution(&self, elapsed: Duration) {
77        self.executor.pre_execution_histogram.record(elapsed);
78    }
79
80    /// Records the duration of block post-execution changes (e.g., finalization).
81    pub fn record_post_execution(&self, elapsed: Duration) {
82        self.executor.post_execution_histogram.record(elapsed);
83    }
84
85    /// Records the time spent waiting for the next transaction from the iterator.
86    pub fn record_transaction_wait(&self, elapsed: Duration) {
87        self.executor.transaction_wait_histogram.record(elapsed);
88    }
89
90    /// Records the duration of a single transaction execution.
91    pub fn record_transaction_execution(&self, elapsed: Duration) {
92        self.executor.transaction_execution_histogram.record(elapsed);
93    }
94}
95
96/// Metrics for the entire blockchain tree
97#[derive(Metrics)]
98#[metrics(scope = "blockchain_tree")]
99pub struct TreeMetrics {
100    /// The highest block number in the canonical chain
101    pub canonical_chain_height: Gauge,
102    /// Metrics for reorgs.
103    #[metric(skip)]
104    pub reorgs: ReorgMetrics,
105    /// The latest reorg depth
106    pub latest_reorg_depth: Gauge,
107    /// The current safe block height (this is required by optimism)
108    pub safe_block_height: Gauge,
109    /// The current finalized block height (this is required by optimism)
110    pub finalized_block_height: Gauge,
111}
112
113/// Metrics for reorgs.
114#[derive(Debug)]
115pub struct ReorgMetrics {
116    /// The number of head block reorgs
117    pub head: Counter,
118    /// The number of safe block reorgs
119    pub safe: Counter,
120    /// The number of finalized block reorgs
121    pub finalized: Counter,
122}
123
124impl Default for ReorgMetrics {
125    fn default() -> Self {
126        Self {
127            head: metrics::counter!("blockchain_tree_reorgs", "commitment" => "head"),
128            safe: metrics::counter!("blockchain_tree_reorgs", "commitment" => "safe"),
129            finalized: metrics::counter!("blockchain_tree_reorgs", "commitment" => "finalized"),
130        }
131    }
132}
133
134/// Metrics for the `EngineApi`.
135#[derive(Metrics)]
136#[metrics(scope = "consensus.engine.beacon")]
137pub struct EngineMetrics {
138    /// Engine API forkchoiceUpdated response type metrics
139    #[metric(skip)]
140    pub(crate) forkchoice_updated: ForkchoiceUpdatedMetrics,
141    /// Engine API newPayload response type metrics
142    #[metric(skip)]
143    pub(crate) new_payload: NewPayloadStatusMetrics,
144    /// How many executed blocks are currently stored.
145    pub(crate) executed_blocks: Gauge,
146    /// How many already executed blocks were directly inserted into the tree.
147    pub(crate) inserted_already_executed_blocks: Counter,
148    /// The number of times the pipeline was run.
149    pub(crate) pipeline_runs: Counter,
150    /// Newly arriving block hash is not present in executed blocks cache storage
151    pub(crate) executed_new_block_cache_miss: Counter,
152    /// Histogram of persistence operation durations (in seconds)
153    pub(crate) persistence_duration: Histogram,
154    /// Tracks the how often we failed to deliver a newPayload response.
155    ///
156    /// This effectively tracks how often the message sender dropped the channel and indicates a CL
157    /// request timeout (e.g. it took more than 8s to send the response and the CL terminated the
158    /// request which resulted in a closed channel).
159    pub(crate) failed_new_payload_response_deliveries: Counter,
160    /// Tracks the how often we failed to deliver a forkchoice update response.
161    pub(crate) failed_forkchoice_updated_response_deliveries: Counter,
162    /// block insert duration
163    pub(crate) block_insert_total_duration: Histogram,
164}
165
166/// Metrics for engine forkchoiceUpdated responses.
167#[derive(Metrics)]
168#[metrics(scope = "consensus.engine.beacon")]
169pub(crate) struct ForkchoiceUpdatedMetrics {
170    /// Finish time of the latest forkchoice updated call.
171    #[metric(skip)]
172    pub(crate) latest_finish_at: Option<Instant>,
173    /// Start time of the latest forkchoice updated call.
174    #[metric(skip)]
175    pub(crate) latest_start_at: Option<Instant>,
176    /// The total count of forkchoice updated messages received.
177    pub(crate) forkchoice_updated_messages: Counter,
178    /// The total count of forkchoice updated messages with payload received.
179    pub(crate) forkchoice_with_attributes_updated_messages: Counter,
180    /// The total count of forkchoice updated messages that we responded to with
181    /// [`Valid`](ForkchoiceStatus::Valid).
182    pub(crate) forkchoice_updated_valid: Counter,
183    /// The total count of forkchoice updated messages that we responded to with
184    /// [`Invalid`](ForkchoiceStatus::Invalid).
185    pub(crate) forkchoice_updated_invalid: Counter,
186    /// The total count of forkchoice updated messages that we responded to with
187    /// [`Syncing`](ForkchoiceStatus::Syncing).
188    pub(crate) forkchoice_updated_syncing: Counter,
189    /// The total count of forkchoice updated messages that were unsuccessful, i.e. we responded
190    /// with an error type that is not a [`PayloadStatusEnum`].
191    pub(crate) forkchoice_updated_error: Counter,
192    /// Latency for the forkchoice updated calls.
193    pub(crate) forkchoice_updated_latency: Histogram,
194    /// Latency for the last forkchoice updated call.
195    pub(crate) forkchoice_updated_last: Gauge,
196    /// Time diff between new payload call response and the next forkchoice updated call request.
197    pub(crate) new_payload_forkchoice_updated_time_diff: Histogram,
198    /// Time from previous forkchoice updated finish to current forkchoice updated start (idle
199    /// time).
200    pub(crate) time_between_forkchoice_updated: Histogram,
201    /// Time from previous forkchoice updated start to current forkchoice updated start (total
202    /// interval).
203    pub(crate) forkchoice_updated_interval: Histogram,
204}
205
206impl ForkchoiceUpdatedMetrics {
207    /// Increment the forkchoiceUpdated counter based on the given result
208    pub(crate) fn update_response_metrics(
209        &mut self,
210        start: Instant,
211        latest_new_payload_at: &mut Option<Instant>,
212        has_attrs: bool,
213        result: &Result<TreeOutcome<OnForkChoiceUpdated>, ProviderError>,
214    ) {
215        let finish = Instant::now();
216        let elapsed = finish - start;
217
218        if let Some(prev_finish) = self.latest_finish_at {
219            self.time_between_forkchoice_updated.record(start - prev_finish);
220        }
221        if let Some(prev_start) = self.latest_start_at {
222            self.forkchoice_updated_interval.record(start - prev_start);
223        }
224        self.latest_finish_at = Some(finish);
225        self.latest_start_at = Some(start);
226
227        match result {
228            Ok(outcome) => match outcome.outcome.forkchoice_status() {
229                ForkchoiceStatus::Valid => self.forkchoice_updated_valid.increment(1),
230                ForkchoiceStatus::Invalid => self.forkchoice_updated_invalid.increment(1),
231                ForkchoiceStatus::Syncing => self.forkchoice_updated_syncing.increment(1),
232            },
233            Err(_) => self.forkchoice_updated_error.increment(1),
234        }
235        self.forkchoice_updated_messages.increment(1);
236        if has_attrs {
237            self.forkchoice_with_attributes_updated_messages.increment(1);
238        }
239        self.forkchoice_updated_latency.record(elapsed);
240        self.forkchoice_updated_last.set(elapsed);
241        if let Some(latest_new_payload_at) = latest_new_payload_at.take() {
242            self.new_payload_forkchoice_updated_time_diff.record(start - latest_new_payload_at);
243        }
244    }
245}
246
247/// Per-gas-bucket newPayload metrics, initialized once via [`Self::new_with_labels`].
248#[derive(Clone, Metrics)]
249#[metrics(scope = "consensus.engine.beacon")]
250pub(crate) struct NewPayloadGasBucketMetrics {
251    /// Latency for new payload calls in this gas bucket.
252    pub(crate) new_payload_gas_bucket_latency: Histogram,
253    /// Gas per second for new payload calls in this gas bucket.
254    pub(crate) new_payload_gas_bucket_gas_per_second: Histogram,
255}
256
257/// Holds pre-initialized [`NewPayloadGasBucketMetrics`] instances, one per gas bucket.
258#[derive(Debug)]
259pub(crate) struct GasBucketMetrics {
260    buckets: [NewPayloadGasBucketMetrics; NUM_GAS_BUCKETS],
261}
262
263impl Default for GasBucketMetrics {
264    fn default() -> Self {
265        Self {
266            buckets: std::array::from_fn(|i| {
267                let label = Self::bucket_label(i);
268                NewPayloadGasBucketMetrics::new_with_labels(&[("gas_bucket", label)])
269            }),
270        }
271    }
272}
273
274impl GasBucketMetrics {
275    fn record(&self, gas_used: u64, elapsed: Duration) {
276        let idx = Self::bucket_index(gas_used);
277        self.buckets[idx].new_payload_gas_bucket_latency.record(elapsed);
278        self.buckets[idx]
279            .new_payload_gas_bucket_gas_per_second
280            .record(gas_used as f64 / elapsed.as_secs_f64());
281    }
282
283    fn bucket_index(gas_used: u64) -> usize {
284        GAS_BUCKET_THRESHOLDS
285            .iter()
286            .position(|&threshold| gas_used < threshold)
287            .unwrap_or(GAS_BUCKET_THRESHOLDS.len())
288    }
289
290    /// Returns a human-readable label like `<5M`, `5-10M`, … `>40M`.
291    fn bucket_label(index: usize) -> String {
292        if index == 0 {
293            let hi = GAS_BUCKET_THRESHOLDS[0] / MEGAGAS;
294            format!("<{hi}M")
295        } else if index < GAS_BUCKET_THRESHOLDS.len() {
296            let lo = GAS_BUCKET_THRESHOLDS[index - 1] / MEGAGAS;
297            let hi = GAS_BUCKET_THRESHOLDS[index] / MEGAGAS;
298            format!("{lo}-{hi}M")
299        } else {
300            let lo = GAS_BUCKET_THRESHOLDS[GAS_BUCKET_THRESHOLDS.len() - 1] / MEGAGAS;
301            format!(">{lo}M")
302        }
303    }
304}
305
306/// Metrics for engine newPayload responses.
307#[derive(Metrics)]
308#[metrics(scope = "consensus.engine.beacon")]
309pub(crate) struct NewPayloadStatusMetrics {
310    /// Finish time of the latest new payload call.
311    #[metric(skip)]
312    pub(crate) latest_finish_at: Option<Instant>,
313    /// Start time of the latest new payload call.
314    #[metric(skip)]
315    pub(crate) latest_start_at: Option<Instant>,
316    /// Gas-bucket-labeled latency and gas/s histograms.
317    #[metric(skip)]
318    pub(crate) gas_bucket: GasBucketMetrics,
319    /// The total count of new payload messages received.
320    pub(crate) new_payload_messages: Counter,
321    /// The total count of new payload messages that we responded to with
322    /// [Valid](PayloadStatusEnum::Valid).
323    pub(crate) new_payload_valid: Counter,
324    /// The total count of new payload messages that we responded to with
325    /// [Invalid](PayloadStatusEnum::Invalid).
326    pub(crate) new_payload_invalid: Counter,
327    /// The total count of new payload messages that we responded to with
328    /// [Syncing](PayloadStatusEnum::Syncing).
329    pub(crate) new_payload_syncing: Counter,
330    /// The total count of new payload messages that we responded to with
331    /// [Accepted](PayloadStatusEnum::Accepted).
332    pub(crate) new_payload_accepted: Counter,
333    /// The total count of new payload messages that were unsuccessful, i.e. we responded with an
334    /// error type that is not a [`PayloadStatusEnum`].
335    pub(crate) new_payload_error: Counter,
336    /// The total gas of valid new payload messages received.
337    pub(crate) new_payload_total_gas: Histogram,
338    /// The gas used for the last valid new payload.
339    pub(crate) new_payload_total_gas_last: Gauge,
340    /// The gas per second of valid new payload messages received.
341    pub(crate) new_payload_gas_per_second: Histogram,
342    /// The gas per second for the last new payload call.
343    pub(crate) new_payload_gas_per_second_last: Gauge,
344    /// Latency for the new payload calls.
345    pub(crate) new_payload_latency: Histogram,
346    /// Latency for the last new payload call.
347    pub(crate) new_payload_last: Gauge,
348    /// Time from previous payload finish to current payload start (idle time).
349    pub(crate) time_between_new_payloads: Histogram,
350    /// Time from previous payload start to current payload start (total interval).
351    pub(crate) new_payload_interval: Histogram,
352    /// Time diff between forkchoice updated call response and the next new payload call request.
353    pub(crate) forkchoice_updated_new_payload_time_diff: Histogram,
354}
355
356impl NewPayloadStatusMetrics {
357    /// Increment the newPayload counter based on the given result
358    pub(crate) fn update_response_metrics(
359        &mut self,
360        start: Instant,
361        latest_forkchoice_updated_at: &mut Option<Instant>,
362        result: &Result<TreeOutcome<PayloadStatus>, InsertBlockFatalError>,
363        gas_used: u64,
364    ) {
365        let finish = Instant::now();
366        let elapsed = finish - start;
367
368        if let Some(prev_finish) = self.latest_finish_at {
369            self.time_between_new_payloads.record(start - prev_finish);
370        }
371        if let Some(prev_start) = self.latest_start_at {
372            self.new_payload_interval.record(start - prev_start);
373        }
374        self.latest_finish_at = Some(finish);
375        self.latest_start_at = Some(start);
376        match result {
377            Ok(outcome) => match outcome.outcome.status {
378                PayloadStatusEnum::Valid => {
379                    self.new_payload_valid.increment(1);
380                    self.new_payload_total_gas.record(gas_used as f64);
381                    self.new_payload_total_gas_last.set(gas_used as f64);
382                    let gas_per_second = gas_used as f64 / elapsed.as_secs_f64();
383                    self.new_payload_gas_per_second.record(gas_per_second);
384                    self.new_payload_gas_per_second_last.set(gas_per_second);
385                }
386                PayloadStatusEnum::Syncing => self.new_payload_syncing.increment(1),
387                PayloadStatusEnum::Accepted => self.new_payload_accepted.increment(1),
388                PayloadStatusEnum::Invalid { .. } => self.new_payload_invalid.increment(1),
389            },
390            Err(_) => self.new_payload_error.increment(1),
391        }
392        self.new_payload_messages.increment(1);
393        self.new_payload_latency.record(elapsed);
394        self.new_payload_last.set(elapsed);
395        self.gas_bucket.record(gas_used, elapsed);
396        if let Some(latest_forkchoice_updated_at) = latest_forkchoice_updated_at.take() {
397            self.forkchoice_updated_new_payload_time_diff
398                .record(start - latest_forkchoice_updated_at);
399        }
400    }
401}
402
403/// Metrics for EIP-7928 Block-Level Access Lists (BAL).
404///
405/// See also <https://github.com/ethereum/execution-metrics/issues/5>
406#[allow(dead_code)]
407#[derive(Metrics, Clone)]
408#[metrics(scope = "execution.block_access_list")]
409pub(crate) struct BalMetrics {
410    /// Size of the BAL in bytes for the current block.
411    pub(crate) size_bytes: Gauge,
412    /// Total number of blocks with valid BALs.
413    pub(crate) valid_total: Counter,
414    /// Total number of blocks with invalid BALs.
415    pub(crate) invalid_total: Counter,
416    /// Time taken to validate the BAL against actual execution.
417    pub(crate) validation_time_seconds: Histogram,
418    /// Number of account changes in the BAL.
419    pub(crate) account_changes: Gauge,
420    /// Number of storage changes in the BAL.
421    pub(crate) storage_changes: Gauge,
422    /// Number of balance changes in the BAL.
423    pub(crate) balance_changes: Gauge,
424    /// Number of nonce changes in the BAL.
425    pub(crate) nonce_changes: Gauge,
426    /// Number of code changes in the BAL.
427    pub(crate) code_changes: Gauge,
428}
429
430/// Metrics for non-execution related block validation.
431#[derive(Metrics, Clone)]
432#[metrics(scope = "sync.block_validation")]
433pub struct BlockValidationMetrics {
434    /// Total number of storage tries updated in the state root calculation
435    pub state_root_storage_tries_updated_total: Counter,
436    /// Total number of times the parallel state root computation fell back to regular.
437    pub state_root_parallel_fallback_total: Counter,
438    /// Total number of times the state root task failed but the fallback succeeded.
439    pub state_root_task_fallback_success_total: Counter,
440    /// Total number of times the state root task timed out and a sequential fallback was spawned.
441    pub state_root_task_timeout_total: Counter,
442    /// Latest state root duration, ie the time spent blocked waiting for the state root.
443    pub state_root_duration: Gauge,
444    /// Histogram for state root duration ie the time spent blocked waiting for the state root
445    pub state_root_histogram: Histogram,
446    /// Histogram of deferred trie computation duration.
447    pub deferred_trie_compute_duration: Histogram,
448    /// Payload conversion and validation latency
449    pub payload_validation_duration: Gauge,
450    /// Histogram of payload validation latency
451    pub payload_validation_histogram: Histogram,
452    /// Payload processor spawning duration
453    pub spawn_payload_processor: Histogram,
454    /// Post-execution validation duration
455    pub post_execution_validation_duration: Histogram,
456    /// Total duration of the new payload call
457    pub total_duration: Histogram,
458    /// Size of `HashedPostStateSorted` (`total_len`)
459    pub hashed_post_state_size: Histogram,
460    /// Size of `TrieUpdatesSorted` (`total_len`)
461    pub trie_updates_sorted_size: Histogram,
462    /// Size of `AnchoredTrieInput` overlay `TrieUpdatesSorted` (`total_len`)
463    pub anchored_overlay_trie_updates_size: Histogram,
464    /// Size of `AnchoredTrieInput` overlay `HashedPostStateSorted` (`total_len`)
465    pub anchored_overlay_hashed_state_size: Histogram,
466}
467
468impl BlockValidationMetrics {
469    /// Records a new state root time, updating both the histogram and state root gauge
470    pub fn record_state_root(&self, trie_output: &TrieUpdates, elapsed_as_secs: f64) {
471        self.state_root_storage_tries_updated_total
472            .increment(trie_output.storage_tries_ref().len() as u64);
473        self.state_root_duration.set(elapsed_as_secs);
474        self.state_root_histogram.record(elapsed_as_secs);
475    }
476
477    /// Records a new payload validation time, updating both the histogram and the payload
478    /// validation gauge
479    pub fn record_payload_validation(&self, elapsed_as_secs: f64) {
480        self.payload_validation_duration.set(elapsed_as_secs);
481        self.payload_validation_histogram.record(elapsed_as_secs);
482    }
483}
484
485/// Metrics for the blockchain tree block buffer
486#[derive(Metrics)]
487#[metrics(scope = "blockchain_tree.block_buffer")]
488pub(crate) struct BlockBufferMetrics {
489    /// Total blocks in the block buffer
490    pub blocks: Gauge,
491}
492
493#[cfg(test)]
494mod tests {
495    use super::*;
496    use alloy_eips::eip7685::Requests;
497    use metrics_util::debugging::{DebuggingRecorder, Snapshotter};
498    use reth_ethereum_primitives::Receipt;
499    use reth_execution_types::BlockExecutionResult;
500    use reth_revm::db::BundleState;
501
502    fn setup_test_recorder() -> Snapshotter {
503        let recorder = DebuggingRecorder::new();
504        let snapshotter = recorder.snapshotter();
505        recorder.install().unwrap();
506        snapshotter
507    }
508
509    #[test]
510    fn test_record_block_execution_metrics() {
511        let snapshotter = setup_test_recorder();
512        let metrics = EngineApiMetrics::default();
513
514        // Pre-populate some metrics to ensure they exist
515        metrics.executor.gas_processed_total.increment(0);
516        metrics.executor.gas_per_second.set(0.0);
517        metrics.executor.gas_used_histogram.record(0.0);
518
519        let output = BlockExecutionOutput::<Receipt> {
520            state: BundleState::default(),
521            result: BlockExecutionResult {
522                receipts: vec![],
523                requests: Requests::default(),
524                gas_used: 21000,
525                blob_gas_used: 0,
526            },
527        };
528
529        metrics.record_block_execution(&output, Duration::from_millis(100));
530
531        let snapshot = snapshotter.snapshot().into_vec();
532
533        // Verify that metrics were registered
534        let mut found_metrics = false;
535        for (key, _unit, _desc, _value) in snapshot {
536            let metric_name = key.key().name();
537            if metric_name.starts_with("sync.execution") {
538                found_metrics = true;
539                break;
540            }
541        }
542
543        assert!(found_metrics, "Expected to find sync.execution metrics");
544    }
545}