reth_bench_compare/
comparison.rs

1//! Results comparison and report generation.
2
3use crate::cli::Args;
4use chrono::{DateTime, Utc};
5use csv::Reader;
6use eyre::{eyre, Result, WrapErr};
7use serde::{Deserialize, Serialize};
8use std::{
9    collections::HashMap,
10    fs,
11    path::{Path, PathBuf},
12};
13use tracing::{info, warn};
14
15/// Manages comparison between baseline and feature reference results
16pub(crate) struct ComparisonGenerator {
17    output_dir: PathBuf,
18    timestamp: String,
19    baseline_ref_name: String,
20    feature_ref_name: String,
21    baseline_results: Option<BenchmarkResults>,
22    feature_results: Option<BenchmarkResults>,
23}
24
25/// Represents the results from a single benchmark run
26#[derive(Debug, Clone)]
27pub(crate) struct BenchmarkResults {
28    pub ref_name: String,
29    pub combined_latency_data: Vec<CombinedLatencyRow>,
30    pub summary: BenchmarkSummary,
31    pub start_timestamp: Option<DateTime<Utc>>,
32    pub end_timestamp: Option<DateTime<Utc>>,
33}
34
35/// Combined latency CSV row structure
36#[derive(Debug, Clone, Deserialize, Serialize)]
37pub(crate) struct CombinedLatencyRow {
38    pub block_number: u64,
39    pub transaction_count: u64,
40    pub gas_used: u64,
41    pub new_payload_latency: u128,
42}
43
44/// Total gas CSV row structure
45#[derive(Debug, Clone, Deserialize, Serialize)]
46pub(crate) struct TotalGasRow {
47    pub block_number: u64,
48    pub transaction_count: u64,
49    pub gas_used: u64,
50    pub time: u128,
51}
52
53/// Summary statistics for a benchmark run
54#[derive(Debug, Clone, Serialize)]
55pub(crate) struct BenchmarkSummary {
56    pub total_blocks: u64,
57    pub total_gas_used: u64,
58    pub total_duration_ms: u128,
59    pub avg_new_payload_latency_ms: f64,
60    pub gas_per_second: f64,
61    pub blocks_per_second: f64,
62    pub min_block_number: u64,
63    pub max_block_number: u64,
64}
65
66/// Comparison report between two benchmark runs
67#[derive(Debug, Serialize)]
68pub(crate) struct ComparisonReport {
69    pub timestamp: String,
70    pub baseline: RefInfo,
71    pub feature: RefInfo,
72    pub comparison_summary: ComparisonSummary,
73    pub per_block_comparisons: Vec<BlockComparison>,
74}
75
76/// Information about a reference in the comparison
77#[derive(Debug, Serialize)]
78pub(crate) struct RefInfo {
79    pub ref_name: String,
80    pub summary: BenchmarkSummary,
81    pub start_timestamp: Option<DateTime<Utc>>,
82    pub end_timestamp: Option<DateTime<Utc>>,
83}
84
85/// Summary of the comparison between references
86#[derive(Debug, Serialize)]
87pub(crate) struct ComparisonSummary {
88    pub new_payload_latency_change_percent: f64,
89    pub gas_per_second_change_percent: f64,
90    pub blocks_per_second_change_percent: f64,
91}
92
93/// Per-block comparison data
94#[derive(Debug, Serialize)]
95pub(crate) struct BlockComparison {
96    pub block_number: u64,
97    pub transaction_count: u64,
98    pub gas_used: u64,
99    pub baseline_new_payload_latency: u128,
100    pub feature_new_payload_latency: u128,
101    pub new_payload_latency_change_percent: f64,
102}
103
104impl ComparisonGenerator {
105    /// Create a new comparison generator
106    pub(crate) fn new(args: &Args) -> Self {
107        let now: DateTime<Utc> = Utc::now();
108        let timestamp = now.format("%Y%m%d_%H%M%S").to_string();
109
110        Self {
111            output_dir: args.output_dir_path(),
112            timestamp,
113            baseline_ref_name: args.baseline_ref.clone(),
114            feature_ref_name: args.feature_ref.clone(),
115            baseline_results: None,
116            feature_results: None,
117        }
118    }
119
120    /// Get the output directory for a specific reference
121    pub(crate) fn get_ref_output_dir(&self, ref_type: &str) -> PathBuf {
122        self.output_dir.join("results").join(&self.timestamp).join(ref_type)
123    }
124
125    /// Get the main output directory for this comparison run
126    pub(crate) fn get_output_dir(&self) -> PathBuf {
127        self.output_dir.join("results").join(&self.timestamp)
128    }
129
130    /// Add benchmark results for a reference
131    pub(crate) fn add_ref_results(&mut self, ref_type: &str, output_path: &Path) -> Result<()> {
132        let ref_name = match ref_type {
133            "baseline" => &self.baseline_ref_name,
134            "feature" => &self.feature_ref_name,
135            _ => return Err(eyre!("Unknown reference type: {}", ref_type)),
136        };
137
138        let results = self.load_benchmark_results(ref_name, output_path)?;
139
140        match ref_type {
141            "baseline" => self.baseline_results = Some(results),
142            "feature" => self.feature_results = Some(results),
143            _ => return Err(eyre!("Unknown reference type: {}", ref_type)),
144        }
145
146        info!("Loaded benchmark results for {} reference", ref_type);
147
148        Ok(())
149    }
150
151    /// Set the benchmark run timestamps for a reference
152    pub(crate) fn set_ref_timestamps(
153        &mut self,
154        ref_type: &str,
155        start: DateTime<Utc>,
156        end: DateTime<Utc>,
157    ) -> Result<()> {
158        match ref_type {
159            "baseline" => {
160                if let Some(ref mut results) = self.baseline_results {
161                    results.start_timestamp = Some(start);
162                    results.end_timestamp = Some(end);
163                } else {
164                    return Err(eyre!("Baseline results not loaded yet"));
165                }
166            }
167            "feature" => {
168                if let Some(ref mut results) = self.feature_results {
169                    results.start_timestamp = Some(start);
170                    results.end_timestamp = Some(end);
171                } else {
172                    return Err(eyre!("Feature results not loaded yet"));
173                }
174            }
175            _ => return Err(eyre!("Unknown reference type: {}", ref_type)),
176        }
177
178        Ok(())
179    }
180
181    /// Generate the final comparison report
182    pub(crate) async fn generate_comparison_report(&self) -> Result<()> {
183        info!("Generating comparison report...");
184
185        let baseline =
186            self.baseline_results.as_ref().ok_or_else(|| eyre!("Baseline results not loaded"))?;
187
188        let feature =
189            self.feature_results.as_ref().ok_or_else(|| eyre!("Feature results not loaded"))?;
190
191        // Generate comparison
192        let comparison_summary =
193            self.calculate_comparison_summary(&baseline.summary, &feature.summary)?;
194        let per_block_comparisons = self.calculate_per_block_comparisons(baseline, feature)?;
195
196        let report = ComparisonReport {
197            timestamp: self.timestamp.clone(),
198            baseline: RefInfo {
199                ref_name: baseline.ref_name.clone(),
200                summary: baseline.summary.clone(),
201                start_timestamp: baseline.start_timestamp,
202                end_timestamp: baseline.end_timestamp,
203            },
204            feature: RefInfo {
205                ref_name: feature.ref_name.clone(),
206                summary: feature.summary.clone(),
207                start_timestamp: feature.start_timestamp,
208                end_timestamp: feature.end_timestamp,
209            },
210            comparison_summary,
211            per_block_comparisons,
212        };
213
214        // Write reports
215        self.write_comparison_reports(&report).await?;
216
217        // Print summary to console
218        self.print_comparison_summary(&report);
219
220        Ok(())
221    }
222
223    /// Load benchmark results from CSV files
224    fn load_benchmark_results(
225        &self,
226        ref_name: &str,
227        output_path: &Path,
228    ) -> Result<BenchmarkResults> {
229        let combined_latency_path = output_path.join("combined_latency.csv");
230        let total_gas_path = output_path.join("total_gas.csv");
231
232        let combined_latency_data = self.load_combined_latency_csv(&combined_latency_path)?;
233        let total_gas_data = self.load_total_gas_csv(&total_gas_path)?;
234
235        let summary = self.calculate_summary(&combined_latency_data, &total_gas_data)?;
236
237        Ok(BenchmarkResults {
238            ref_name: ref_name.to_string(),
239            combined_latency_data,
240            summary,
241            start_timestamp: None,
242            end_timestamp: None,
243        })
244    }
245
246    /// Load combined latency CSV data
247    fn load_combined_latency_csv(&self, path: &Path) -> Result<Vec<CombinedLatencyRow>> {
248        let mut reader = Reader::from_path(path)
249            .wrap_err_with(|| format!("Failed to open combined latency CSV: {path:?}"))?;
250
251        let mut rows = Vec::new();
252        for result in reader.deserialize() {
253            let row: CombinedLatencyRow = result
254                .wrap_err_with(|| format!("Failed to parse combined latency row in {path:?}"))?;
255            rows.push(row);
256        }
257
258        if rows.is_empty() {
259            return Err(eyre!("No data found in combined latency CSV: {:?}", path));
260        }
261
262        Ok(rows)
263    }
264
265    /// Load total gas CSV data
266    fn load_total_gas_csv(&self, path: &Path) -> Result<Vec<TotalGasRow>> {
267        let mut reader = Reader::from_path(path)
268            .wrap_err_with(|| format!("Failed to open total gas CSV: {path:?}"))?;
269
270        let mut rows = Vec::new();
271        for result in reader.deserialize() {
272            let row: TotalGasRow =
273                result.wrap_err_with(|| format!("Failed to parse total gas row in {path:?}"))?;
274            rows.push(row);
275        }
276
277        if rows.is_empty() {
278            return Err(eyre!("No data found in total gas CSV: {:?}", path));
279        }
280
281        Ok(rows)
282    }
283
284    /// Calculate summary statistics for a benchmark run
285    fn calculate_summary(
286        &self,
287        combined_data: &[CombinedLatencyRow],
288        total_gas_data: &[TotalGasRow],
289    ) -> Result<BenchmarkSummary> {
290        if combined_data.is_empty() || total_gas_data.is_empty() {
291            return Err(eyre!("Cannot calculate summary for empty data"));
292        }
293
294        let total_blocks = combined_data.len() as u64;
295        let total_gas_used: u64 = combined_data.iter().map(|r| r.gas_used).sum();
296
297        let total_duration_ms = total_gas_data.last().unwrap().time / 1000; // Convert microseconds to milliseconds
298
299        let avg_new_payload_latency_ms: f64 =
300            combined_data.iter().map(|r| r.new_payload_latency as f64 / 1000.0).sum::<f64>() /
301                total_blocks as f64;
302
303        let total_duration_seconds = total_duration_ms as f64 / 1000.0;
304        let gas_per_second = if total_duration_seconds > f64::EPSILON {
305            total_gas_used as f64 / total_duration_seconds
306        } else {
307            0.0
308        };
309
310        let blocks_per_second = if total_duration_seconds > f64::EPSILON {
311            total_blocks as f64 / total_duration_seconds
312        } else {
313            0.0
314        };
315
316        let min_block_number = combined_data.first().unwrap().block_number;
317        let max_block_number = combined_data.last().unwrap().block_number;
318
319        Ok(BenchmarkSummary {
320            total_blocks,
321            total_gas_used,
322            total_duration_ms,
323            avg_new_payload_latency_ms,
324            gas_per_second,
325            blocks_per_second,
326            min_block_number,
327            max_block_number,
328        })
329    }
330
331    /// Calculate comparison summary between baseline and feature
332    fn calculate_comparison_summary(
333        &self,
334        baseline: &BenchmarkSummary,
335        feature: &BenchmarkSummary,
336    ) -> Result<ComparisonSummary> {
337        let calc_percent_change = |baseline: f64, feature: f64| -> f64 {
338            if baseline.abs() > f64::EPSILON {
339                ((feature - baseline) / baseline) * 100.0
340            } else {
341                0.0
342            }
343        };
344
345        Ok(ComparisonSummary {
346            new_payload_latency_change_percent: calc_percent_change(
347                baseline.avg_new_payload_latency_ms,
348                feature.avg_new_payload_latency_ms,
349            ),
350            gas_per_second_change_percent: calc_percent_change(
351                baseline.gas_per_second,
352                feature.gas_per_second,
353            ),
354            blocks_per_second_change_percent: calc_percent_change(
355                baseline.blocks_per_second,
356                feature.blocks_per_second,
357            ),
358        })
359    }
360
361    /// Calculate per-block comparisons
362    fn calculate_per_block_comparisons(
363        &self,
364        baseline: &BenchmarkResults,
365        feature: &BenchmarkResults,
366    ) -> Result<Vec<BlockComparison>> {
367        let mut baseline_map: HashMap<u64, &CombinedLatencyRow> = HashMap::new();
368        for row in &baseline.combined_latency_data {
369            baseline_map.insert(row.block_number, row);
370        }
371
372        let mut comparisons = Vec::new();
373        for feature_row in &feature.combined_latency_data {
374            if let Some(baseline_row) = baseline_map.get(&feature_row.block_number) {
375                let calc_percent_change = |baseline: u128, feature: u128| -> f64 {
376                    if baseline > 0 {
377                        ((feature as f64 - baseline as f64) / baseline as f64) * 100.0
378                    } else {
379                        0.0
380                    }
381                };
382
383                let comparison = BlockComparison {
384                    block_number: feature_row.block_number,
385                    transaction_count: feature_row.transaction_count,
386                    gas_used: feature_row.gas_used,
387                    baseline_new_payload_latency: baseline_row.new_payload_latency,
388                    feature_new_payload_latency: feature_row.new_payload_latency,
389                    new_payload_latency_change_percent: calc_percent_change(
390                        baseline_row.new_payload_latency,
391                        feature_row.new_payload_latency,
392                    ),
393                };
394                comparisons.push(comparison);
395            } else {
396                warn!("Block {} not found in baseline data", feature_row.block_number);
397            }
398        }
399
400        Ok(comparisons)
401    }
402
403    /// Write comparison reports to files
404    async fn write_comparison_reports(&self, report: &ComparisonReport) -> Result<()> {
405        let report_dir = self.output_dir.join("results").join(&self.timestamp);
406        fs::create_dir_all(&report_dir)
407            .wrap_err_with(|| format!("Failed to create report directory: {report_dir:?}"))?;
408
409        // Write JSON report
410        let json_path = report_dir.join("comparison_report.json");
411        let json_content = serde_json::to_string_pretty(report)
412            .wrap_err("Failed to serialize comparison report to JSON")?;
413        fs::write(&json_path, json_content)
414            .wrap_err_with(|| format!("Failed to write JSON report: {json_path:?}"))?;
415
416        // Write CSV report for per-block comparisons
417        let csv_path = report_dir.join("per_block_comparison.csv");
418        let mut writer = csv::Writer::from_path(&csv_path)
419            .wrap_err_with(|| format!("Failed to create CSV writer: {csv_path:?}"))?;
420
421        for comparison in &report.per_block_comparisons {
422            writer.serialize(comparison).wrap_err("Failed to write comparison row to CSV")?;
423        }
424        writer.flush().wrap_err("Failed to flush CSV writer")?;
425
426        info!("Comparison reports written to: {:?}", report_dir);
427        Ok(())
428    }
429
430    /// Print comparison summary to console
431    fn print_comparison_summary(&self, report: &ComparisonReport) {
432        // Parse and format timestamp nicely
433        let formatted_timestamp = if let Ok(dt) = chrono::DateTime::parse_from_str(
434            &format!("{} +0000", report.timestamp.replace('_', " ")),
435            "%Y%m%d %H%M%S %z",
436        ) {
437            dt.format("%Y-%m-%d %H:%M:%S UTC").to_string()
438        } else {
439            // Fallback to original if parsing fails
440            report.timestamp.clone()
441        };
442
443        println!("\n=== BENCHMARK COMPARISON SUMMARY ===");
444        println!("Timestamp: {formatted_timestamp}");
445        println!("Baseline: {}", report.baseline.ref_name);
446        println!("Feature:  {}", report.feature.ref_name);
447        println!();
448
449        let summary = &report.comparison_summary;
450
451        println!("Performance Changes:");
452        println!(
453            "  NewPayload Latency: {:+.2}% (total avg change)",
454            summary.new_payload_latency_change_percent
455        );
456        println!(
457            "  Gas/Second:         {:+.2}% (total avg change)",
458            summary.gas_per_second_change_percent
459        );
460        println!(
461            "  Blocks/Second:      {:+.2}% (total avg change)",
462            summary.blocks_per_second_change_percent
463        );
464        println!();
465
466        println!("Baseline Summary:");
467        let baseline = &report.baseline.summary;
468        println!(
469            "  Blocks: {} (blocks {} to {}), Gas: {}, Duration: {:.2}s",
470            baseline.total_blocks,
471            baseline.min_block_number,
472            baseline.max_block_number,
473            baseline.total_gas_used,
474            baseline.total_duration_ms as f64 / 1000.0
475        );
476        println!("  Avg NewPayload: {:.2}ms", baseline.avg_new_payload_latency_ms);
477        if let (Some(start), Some(end)) =
478            (&report.baseline.start_timestamp, &report.baseline.end_timestamp)
479        {
480            println!(
481                "  Started: {}, Ended: {}",
482                start.format("%Y-%m-%d %H:%M:%S UTC"),
483                end.format("%Y-%m-%d %H:%M:%S UTC")
484            );
485        }
486        println!();
487
488        println!("Feature Summary:");
489        let feature = &report.feature.summary;
490        println!(
491            "  Blocks: {} (blocks {} to {}), Gas: {}, Duration: {:.2}s",
492            feature.total_blocks,
493            feature.min_block_number,
494            feature.max_block_number,
495            feature.total_gas_used,
496            feature.total_duration_ms as f64 / 1000.0
497        );
498        println!("  Avg NewPayload: {:.2}ms", feature.avg_new_payload_latency_ms);
499        if let (Some(start), Some(end)) =
500            (&report.feature.start_timestamp, &report.feature.end_timestamp)
501        {
502            println!(
503                "  Started: {}, Ended: {}",
504                start.format("%Y-%m-%d %H:%M:%S UTC"),
505                end.format("%Y-%m-%d %H:%M:%S UTC")
506            );
507        }
508        println!();
509    }
510}