1use crate::cli::Args;
4use chrono::{DateTime, Utc};
5use csv::Reader;
6use eyre::{eyre, Result, WrapErr};
7use serde::{Deserialize, Serialize};
8use std::{
9 cmp::Ordering,
10 collections::HashMap,
11 fs,
12 path::{Path, PathBuf},
13};
14use tracing::{info, warn};
15
16pub(crate) struct ComparisonGenerator {
18 output_dir: PathBuf,
19 timestamp: String,
20 baseline_ref_name: String,
21 feature_ref_name: String,
22 baseline_results: Option<BenchmarkResults>,
23 feature_results: Option<BenchmarkResults>,
24 baseline_command: Option<String>,
25 feature_command: Option<String>,
26}
27
28#[derive(Debug, Clone)]
30pub(crate) struct BenchmarkResults {
31 pub ref_name: String,
32 pub combined_latency_data: Vec<CombinedLatencyRow>,
33 pub summary: BenchmarkSummary,
34 pub start_timestamp: Option<DateTime<Utc>>,
35 pub end_timestamp: Option<DateTime<Utc>>,
36}
37
38#[derive(Debug, Clone, Deserialize, Serialize)]
40pub(crate) struct CombinedLatencyRow {
41 pub block_number: u64,
42 pub transaction_count: u64,
43 pub gas_used: u64,
44 pub new_payload_latency: u128,
45}
46
47#[derive(Debug, Clone, Deserialize, Serialize)]
49pub(crate) struct TotalGasRow {
50 pub block_number: u64,
51 pub transaction_count: u64,
52 pub gas_used: u64,
53 pub time: u128,
54}
55
56#[derive(Debug, Clone, Serialize)]
63pub(crate) struct BenchmarkSummary {
64 pub total_blocks: u64,
65 pub total_gas_used: u64,
66 pub total_duration_ms: u128,
67 pub mean_new_payload_latency_ms: f64,
68 pub median_new_payload_latency_ms: f64,
69 pub p90_new_payload_latency_ms: f64,
70 pub p99_new_payload_latency_ms: f64,
71 pub gas_per_second: f64,
72 pub blocks_per_second: f64,
73 pub min_block_number: u64,
74 pub max_block_number: u64,
75}
76
77#[derive(Debug, Serialize)]
79pub(crate) struct ComparisonReport {
80 pub timestamp: String,
81 pub baseline: RefInfo,
82 pub feature: RefInfo,
83 pub comparison_summary: ComparisonSummary,
84 pub per_block_comparisons: Vec<BlockComparison>,
85}
86
87#[derive(Debug, Serialize)]
89pub(crate) struct RefInfo {
90 pub ref_name: String,
91 pub summary: BenchmarkSummary,
92 pub start_timestamp: Option<DateTime<Utc>>,
93 pub end_timestamp: Option<DateTime<Utc>>,
94 pub reth_command: Option<String>,
95}
96
97#[derive(Debug, Serialize)]
112pub(crate) struct ComparisonSummary {
113 pub per_block_latency_change_mean_percent: f64,
114 pub per_block_latency_change_median_percent: f64,
115 pub per_block_latency_change_std_dev_percent: f64,
116 pub new_payload_total_latency_change_percent: f64,
117 pub new_payload_latency_p50_change_percent: f64,
118 pub new_payload_latency_p90_change_percent: f64,
119 pub new_payload_latency_p99_change_percent: f64,
120 pub gas_per_second_change_percent: f64,
121 pub blocks_per_second_change_percent: f64,
122}
123
124#[derive(Debug, Serialize)]
126pub(crate) struct BlockComparison {
127 pub block_number: u64,
128 pub transaction_count: u64,
129 pub gas_used: u64,
130 pub baseline_new_payload_latency: u128,
131 pub feature_new_payload_latency: u128,
132 pub new_payload_latency_change_percent: f64,
133}
134
135impl ComparisonGenerator {
136 pub(crate) fn new(args: &Args) -> Self {
138 let now: DateTime<Utc> = Utc::now();
139 let timestamp = now.format("%Y%m%d_%H%M%S").to_string();
140
141 Self {
142 output_dir: args.output_dir_path(),
143 timestamp,
144 baseline_ref_name: args.baseline_ref.clone(),
145 feature_ref_name: args.feature_ref.clone(),
146 baseline_results: None,
147 feature_results: None,
148 baseline_command: None,
149 feature_command: None,
150 }
151 }
152
153 pub(crate) fn get_ref_output_dir(&self, ref_type: &str) -> PathBuf {
155 self.output_dir.join("results").join(&self.timestamp).join(ref_type)
156 }
157
158 pub(crate) fn get_output_dir(&self) -> PathBuf {
160 self.output_dir.join("results").join(&self.timestamp)
161 }
162
163 pub(crate) fn add_ref_results(&mut self, ref_type: &str, output_path: &Path) -> Result<()> {
165 let ref_name = match ref_type {
166 "baseline" => &self.baseline_ref_name,
167 "feature" => &self.feature_ref_name,
168 _ => return Err(eyre!("Unknown reference type: {}", ref_type)),
169 };
170
171 let results = self.load_benchmark_results(ref_name, output_path)?;
172
173 match ref_type {
174 "baseline" => self.baseline_results = Some(results),
175 "feature" => self.feature_results = Some(results),
176 _ => return Err(eyre!("Unknown reference type: {}", ref_type)),
177 }
178
179 info!("Loaded benchmark results for {} reference", ref_type);
180
181 Ok(())
182 }
183
184 pub(crate) fn set_ref_timestamps(
186 &mut self,
187 ref_type: &str,
188 start: DateTime<Utc>,
189 end: DateTime<Utc>,
190 ) -> Result<()> {
191 match ref_type {
192 "baseline" => {
193 if let Some(ref mut results) = self.baseline_results {
194 results.start_timestamp = Some(start);
195 results.end_timestamp = Some(end);
196 } else {
197 return Err(eyre!("Baseline results not loaded yet"));
198 }
199 }
200 "feature" => {
201 if let Some(ref mut results) = self.feature_results {
202 results.start_timestamp = Some(start);
203 results.end_timestamp = Some(end);
204 } else {
205 return Err(eyre!("Feature results not loaded yet"));
206 }
207 }
208 _ => return Err(eyre!("Unknown reference type: {}", ref_type)),
209 }
210
211 Ok(())
212 }
213
214 pub(crate) fn set_ref_command(&mut self, ref_type: &str, command: String) -> Result<()> {
216 match ref_type {
217 "baseline" => {
218 self.baseline_command = Some(command);
219 }
220 "feature" => {
221 self.feature_command = Some(command);
222 }
223 _ => return Err(eyre!("Unknown reference type: {}", ref_type)),
224 }
225
226 Ok(())
227 }
228
229 pub(crate) async fn generate_comparison_report(&self) -> Result<()> {
231 info!("Generating comparison report...");
232
233 let baseline =
234 self.baseline_results.as_ref().ok_or_else(|| eyre!("Baseline results not loaded"))?;
235
236 let feature =
237 self.feature_results.as_ref().ok_or_else(|| eyre!("Feature results not loaded"))?;
238
239 let per_block_comparisons = self.calculate_per_block_comparisons(baseline, feature)?;
240 let comparison_summary = self.calculate_comparison_summary(
241 &baseline.summary,
242 &feature.summary,
243 &per_block_comparisons,
244 )?;
245
246 let report = ComparisonReport {
247 timestamp: self.timestamp.clone(),
248 baseline: RefInfo {
249 ref_name: baseline.ref_name.clone(),
250 summary: baseline.summary.clone(),
251 start_timestamp: baseline.start_timestamp,
252 end_timestamp: baseline.end_timestamp,
253 reth_command: self.baseline_command.clone(),
254 },
255 feature: RefInfo {
256 ref_name: feature.ref_name.clone(),
257 summary: feature.summary.clone(),
258 start_timestamp: feature.start_timestamp,
259 end_timestamp: feature.end_timestamp,
260 reth_command: self.feature_command.clone(),
261 },
262 comparison_summary,
263 per_block_comparisons,
264 };
265
266 self.write_comparison_reports(&report).await?;
268
269 self.print_comparison_summary(&report);
271
272 Ok(())
273 }
274
275 fn load_benchmark_results(
277 &self,
278 ref_name: &str,
279 output_path: &Path,
280 ) -> Result<BenchmarkResults> {
281 let combined_latency_path = output_path.join("combined_latency.csv");
282 let total_gas_path = output_path.join("total_gas.csv");
283
284 let combined_latency_data = self.load_combined_latency_csv(&combined_latency_path)?;
285 let total_gas_data = self.load_total_gas_csv(&total_gas_path)?;
286
287 let summary = self.calculate_summary(&combined_latency_data, &total_gas_data)?;
288
289 Ok(BenchmarkResults {
290 ref_name: ref_name.to_string(),
291 combined_latency_data,
292 summary,
293 start_timestamp: None,
294 end_timestamp: None,
295 })
296 }
297
298 fn load_combined_latency_csv(&self, path: &Path) -> Result<Vec<CombinedLatencyRow>> {
300 let mut reader = Reader::from_path(path)
301 .wrap_err_with(|| format!("Failed to open combined latency CSV: {path:?}"))?;
302
303 let mut rows = Vec::new();
304 for result in reader.deserialize() {
305 let row: CombinedLatencyRow = result
306 .wrap_err_with(|| format!("Failed to parse combined latency row in {path:?}"))?;
307 rows.push(row);
308 }
309
310 if rows.is_empty() {
311 return Err(eyre!("No data found in combined latency CSV: {:?}", path));
312 }
313
314 Ok(rows)
315 }
316
317 fn load_total_gas_csv(&self, path: &Path) -> Result<Vec<TotalGasRow>> {
319 let mut reader = Reader::from_path(path)
320 .wrap_err_with(|| format!("Failed to open total gas CSV: {path:?}"))?;
321
322 let mut rows = Vec::new();
323 for result in reader.deserialize() {
324 let row: TotalGasRow =
325 result.wrap_err_with(|| format!("Failed to parse total gas row in {path:?}"))?;
326 rows.push(row);
327 }
328
329 if rows.is_empty() {
330 return Err(eyre!("No data found in total gas CSV: {:?}", path));
331 }
332
333 Ok(rows)
334 }
335
336 fn calculate_summary(
342 &self,
343 combined_data: &[CombinedLatencyRow],
344 total_gas_data: &[TotalGasRow],
345 ) -> Result<BenchmarkSummary> {
346 if combined_data.is_empty() || total_gas_data.is_empty() {
347 return Err(eyre!("Cannot calculate summary for empty data"));
348 }
349
350 let total_blocks = combined_data.len() as u64;
351 let total_gas_used: u64 = combined_data.iter().map(|r| r.gas_used).sum();
352
353 let total_duration_ms = total_gas_data.last().unwrap().time / 1000; let latencies_ms: Vec<f64> =
356 combined_data.iter().map(|r| r.new_payload_latency as f64 / 1000.0).collect();
357 let mean_new_payload_latency_ms: f64 =
358 latencies_ms.iter().sum::<f64>() / total_blocks as f64;
359
360 let mut sorted_latencies_ms = latencies_ms;
361 sorted_latencies_ms.sort_by(|a, b| a.partial_cmp(b).unwrap_or(Ordering::Equal));
362 let median_new_payload_latency_ms = percentile(&sorted_latencies_ms, 0.5);
363 let p90_new_payload_latency_ms = percentile(&sorted_latencies_ms, 0.9);
364 let p99_new_payload_latency_ms = percentile(&sorted_latencies_ms, 0.99);
365
366 let total_duration_seconds = total_duration_ms as f64 / 1000.0;
367 let gas_per_second = if total_duration_seconds > f64::EPSILON {
368 total_gas_used as f64 / total_duration_seconds
369 } else {
370 0.0
371 };
372
373 let blocks_per_second = if total_duration_seconds > f64::EPSILON {
374 total_blocks as f64 / total_duration_seconds
375 } else {
376 0.0
377 };
378
379 let min_block_number = combined_data.first().unwrap().block_number;
380 let max_block_number = combined_data.last().unwrap().block_number;
381
382 Ok(BenchmarkSummary {
383 total_blocks,
384 total_gas_used,
385 total_duration_ms,
386 mean_new_payload_latency_ms,
387 median_new_payload_latency_ms,
388 p90_new_payload_latency_ms,
389 p99_new_payload_latency_ms,
390 gas_per_second,
391 blocks_per_second,
392 min_block_number,
393 max_block_number,
394 })
395 }
396
397 fn calculate_comparison_summary(
399 &self,
400 baseline: &BenchmarkSummary,
401 feature: &BenchmarkSummary,
402 per_block_comparisons: &[BlockComparison],
403 ) -> Result<ComparisonSummary> {
404 let calc_percent_change = |baseline: f64, feature: f64| -> f64 {
405 if baseline.abs() > f64::EPSILON {
406 ((feature - baseline) / baseline) * 100.0
407 } else {
408 0.0
409 }
410 };
411
412 let per_block_percent_changes: Vec<f64> =
417 per_block_comparisons.iter().map(|c| c.new_payload_latency_change_percent).collect();
418 let per_block_latency_change_mean_percent = if per_block_percent_changes.is_empty() {
419 0.0
420 } else {
421 per_block_percent_changes.iter().sum::<f64>() / per_block_percent_changes.len() as f64
422 };
423 let per_block_latency_change_median_percent = if per_block_percent_changes.is_empty() {
424 0.0
425 } else {
426 let mut sorted = per_block_percent_changes.clone();
427 sorted.sort_by(|a, b| a.partial_cmp(b).unwrap_or(Ordering::Equal));
428 percentile(&sorted, 0.5)
429 };
430 let per_block_latency_change_std_dev_percent =
431 calculate_std_dev(&per_block_percent_changes, per_block_latency_change_mean_percent);
432
433 let baseline_total_latency_ms =
434 baseline.mean_new_payload_latency_ms * baseline.total_blocks as f64;
435 let feature_total_latency_ms =
436 feature.mean_new_payload_latency_ms * feature.total_blocks as f64;
437 let new_payload_total_latency_change_percent =
438 calc_percent_change(baseline_total_latency_ms, feature_total_latency_ms);
439
440 Ok(ComparisonSummary {
441 per_block_latency_change_mean_percent,
442 per_block_latency_change_median_percent,
443 per_block_latency_change_std_dev_percent,
444 new_payload_total_latency_change_percent,
445 new_payload_latency_p50_change_percent: calc_percent_change(
446 baseline.median_new_payload_latency_ms,
447 feature.median_new_payload_latency_ms,
448 ),
449 new_payload_latency_p90_change_percent: calc_percent_change(
450 baseline.p90_new_payload_latency_ms,
451 feature.p90_new_payload_latency_ms,
452 ),
453 new_payload_latency_p99_change_percent: calc_percent_change(
454 baseline.p99_new_payload_latency_ms,
455 feature.p99_new_payload_latency_ms,
456 ),
457 gas_per_second_change_percent: calc_percent_change(
458 baseline.gas_per_second,
459 feature.gas_per_second,
460 ),
461 blocks_per_second_change_percent: calc_percent_change(
462 baseline.blocks_per_second,
463 feature.blocks_per_second,
464 ),
465 })
466 }
467
468 fn calculate_per_block_comparisons(
470 &self,
471 baseline: &BenchmarkResults,
472 feature: &BenchmarkResults,
473 ) -> Result<Vec<BlockComparison>> {
474 let mut baseline_map: HashMap<u64, &CombinedLatencyRow> = HashMap::new();
475 for row in &baseline.combined_latency_data {
476 baseline_map.insert(row.block_number, row);
477 }
478
479 let mut comparisons = Vec::new();
480 for feature_row in &feature.combined_latency_data {
481 if let Some(baseline_row) = baseline_map.get(&feature_row.block_number) {
482 let calc_percent_change = |baseline: u128, feature: u128| -> f64 {
483 if baseline > 0 {
484 ((feature as f64 - baseline as f64) / baseline as f64) * 100.0
485 } else {
486 0.0
487 }
488 };
489
490 let comparison = BlockComparison {
491 block_number: feature_row.block_number,
492 transaction_count: feature_row.transaction_count,
493 gas_used: feature_row.gas_used,
494 baseline_new_payload_latency: baseline_row.new_payload_latency,
495 feature_new_payload_latency: feature_row.new_payload_latency,
496 new_payload_latency_change_percent: calc_percent_change(
497 baseline_row.new_payload_latency,
498 feature_row.new_payload_latency,
499 ),
500 };
501 comparisons.push(comparison);
502 } else {
503 warn!("Block {} not found in baseline data", feature_row.block_number);
504 }
505 }
506
507 Ok(comparisons)
508 }
509
510 async fn write_comparison_reports(&self, report: &ComparisonReport) -> Result<()> {
512 let report_dir = self.output_dir.join("results").join(&self.timestamp);
513 fs::create_dir_all(&report_dir)
514 .wrap_err_with(|| format!("Failed to create report directory: {report_dir:?}"))?;
515
516 let json_path = report_dir.join("comparison_report.json");
518 let json_content = serde_json::to_string_pretty(report)
519 .wrap_err("Failed to serialize comparison report to JSON")?;
520 fs::write(&json_path, json_content)
521 .wrap_err_with(|| format!("Failed to write JSON report: {json_path:?}"))?;
522
523 let csv_path = report_dir.join("per_block_comparison.csv");
525 let mut writer = csv::Writer::from_path(&csv_path)
526 .wrap_err_with(|| format!("Failed to create CSV writer: {csv_path:?}"))?;
527
528 for comparison in &report.per_block_comparisons {
529 writer.serialize(comparison).wrap_err("Failed to write comparison row to CSV")?;
530 }
531 writer.flush().wrap_err("Failed to flush CSV writer")?;
532
533 info!("Comparison reports written to: {:?}", report_dir);
534 Ok(())
535 }
536
537 fn print_comparison_summary(&self, report: &ComparisonReport) {
539 let formatted_timestamp = if let Ok(dt) = chrono::DateTime::parse_from_str(
541 &format!("{} +0000", report.timestamp.replace('_', " ")),
542 "%Y%m%d %H%M%S %z",
543 ) {
544 dt.format("%Y-%m-%d %H:%M:%S UTC").to_string()
545 } else {
546 report.timestamp.clone()
548 };
549
550 println!("\n=== BENCHMARK COMPARISON SUMMARY ===");
551 println!("Timestamp: {formatted_timestamp}");
552 println!("Baseline: {}", report.baseline.ref_name);
553 println!("Feature: {}", report.feature.ref_name);
554 println!();
555
556 let summary = &report.comparison_summary;
557
558 println!("Performance Changes:");
559 println!(
560 " NewPayload Latency per-block mean change: {:+.2}%",
561 summary.per_block_latency_change_mean_percent
562 );
563 println!(
564 " NewPayload Latency per-block median change: {:+.2}%",
565 summary.per_block_latency_change_median_percent
566 );
567 println!(
568 " NewPayload Latency per-block std dev: {:.2}%",
569 summary.per_block_latency_change_std_dev_percent
570 );
571 println!(
572 " Total newPayload time change: {:+.2}%",
573 summary.new_payload_total_latency_change_percent
574 );
575 println!(
576 " NewPayload Latency p50: {:+.2}%",
577 summary.new_payload_latency_p50_change_percent
578 );
579 println!(
580 " NewPayload Latency p90: {:+.2}%",
581 summary.new_payload_latency_p90_change_percent
582 );
583 println!(
584 " NewPayload Latency p99: {:+.2}%",
585 summary.new_payload_latency_p99_change_percent
586 );
587 println!(
588 " Gas/Second: {:+.2}%",
589 summary.gas_per_second_change_percent
590 );
591 println!(
592 " Blocks/Second: {:+.2}%",
593 summary.blocks_per_second_change_percent
594 );
595 println!();
596
597 println!("Baseline Summary:");
598 let baseline = &report.baseline.summary;
599 println!(
600 " Blocks: {} (blocks {} to {}), Gas: {}, Duration: {:.2}s",
601 baseline.total_blocks,
602 baseline.min_block_number,
603 baseline.max_block_number,
604 baseline.total_gas_used,
605 baseline.total_duration_ms as f64 / 1000.0
606 );
607 println!(" NewPayload latency (ms):");
608 println!(
609 " mean: {:.2}, p50: {:.2}, p90: {:.2}, p99: {:.2}",
610 baseline.mean_new_payload_latency_ms,
611 baseline.median_new_payload_latency_ms,
612 baseline.p90_new_payload_latency_ms,
613 baseline.p99_new_payload_latency_ms
614 );
615 if let (Some(start), Some(end)) =
616 (&report.baseline.start_timestamp, &report.baseline.end_timestamp)
617 {
618 println!(
619 " Started: {}, Ended: {}",
620 start.format("%Y-%m-%d %H:%M:%S UTC"),
621 end.format("%Y-%m-%d %H:%M:%S UTC")
622 );
623 }
624 if let Some(ref cmd) = report.baseline.reth_command {
625 println!(" Command: {}", cmd);
626 }
627 println!();
628
629 println!("Feature Summary:");
630 let feature = &report.feature.summary;
631 println!(
632 " Blocks: {} (blocks {} to {}), Gas: {}, Duration: {:.2}s",
633 feature.total_blocks,
634 feature.min_block_number,
635 feature.max_block_number,
636 feature.total_gas_used,
637 feature.total_duration_ms as f64 / 1000.0
638 );
639 println!(" NewPayload latency (ms):");
640 println!(
641 " mean: {:.2}, p50: {:.2}, p90: {:.2}, p99: {:.2}",
642 feature.mean_new_payload_latency_ms,
643 feature.median_new_payload_latency_ms,
644 feature.p90_new_payload_latency_ms,
645 feature.p99_new_payload_latency_ms
646 );
647 if let (Some(start), Some(end)) =
648 (&report.feature.start_timestamp, &report.feature.end_timestamp)
649 {
650 println!(
651 " Started: {}, Ended: {}",
652 start.format("%Y-%m-%d %H:%M:%S UTC"),
653 end.format("%Y-%m-%d %H:%M:%S UTC")
654 );
655 }
656 if let Some(ref cmd) = report.feature.reth_command {
657 println!(" Command: {}", cmd);
658 }
659 println!();
660 }
661}
662
663fn calculate_std_dev(values: &[f64], mean: f64) -> f64 {
670 if values.is_empty() {
671 return 0.0;
672 }
673
674 let variance = values
675 .iter()
676 .map(|x| {
677 let diff = x - mean;
678 diff * diff
679 })
680 .sum::<f64>() /
681 values.len() as f64;
682
683 variance.sqrt()
684}
685
686fn percentile(sorted_values: &[f64], percentile: f64) -> f64 {
694 if sorted_values.is_empty() {
695 return 0.0;
696 }
697
698 let clamped = percentile.clamp(0.0, 1.0);
699 let max_index = sorted_values.len() - 1;
700 let rank = clamped * max_index as f64;
701 let lower = rank.floor() as usize;
702 let upper = rank.ceil() as usize;
703
704 if lower == upper {
705 sorted_values[lower]
706 } else {
707 let weight = rank - lower as f64;
708 sorted_values[lower].mul_add(1.0 - weight, sorted_values[upper] * weight)
709 }
710}