1use crate::cli::Args;
4use chrono::{DateTime, Utc};
5use csv::Reader;
6use eyre::{eyre, Result, WrapErr};
7use serde::{Deserialize, Serialize};
8use std::{
9 cmp::Ordering,
10 collections::HashMap,
11 fs,
12 path::{Path, PathBuf},
13};
14use tracing::{info, warn};
15
16pub(crate) struct ComparisonGenerator {
18 output_dir: PathBuf,
19 timestamp: String,
20 baseline_ref_name: String,
21 feature_ref_name: String,
22 baseline_results: Option<BenchmarkResults>,
23 feature_results: Option<BenchmarkResults>,
24 baseline_command: Option<String>,
25 feature_command: Option<String>,
26}
27
28#[derive(Debug, Clone)]
30pub(crate) struct BenchmarkResults {
31 pub ref_name: String,
32 pub combined_latency_data: Vec<CombinedLatencyRow>,
33 pub summary: BenchmarkSummary,
34 pub start_timestamp: Option<DateTime<Utc>>,
35 pub end_timestamp: Option<DateTime<Utc>>,
36}
37
38#[derive(Debug, Clone, Deserialize, Serialize)]
40pub(crate) struct CombinedLatencyRow {
41 pub block_number: u64,
42 #[serde(default)]
43 pub transaction_count: Option<u64>,
44 pub gas_used: u64,
45 pub new_payload_latency: u128,
46}
47
48#[derive(Debug, Clone, Deserialize, Serialize)]
50pub(crate) struct TotalGasRow {
51 pub block_number: u64,
52 #[serde(default)]
53 pub transaction_count: Option<u64>,
54 pub gas_used: u64,
55 pub time: u128,
56}
57
58#[derive(Debug, Clone, Serialize)]
65pub(crate) struct BenchmarkSummary {
66 pub total_blocks: u64,
67 pub total_gas_used: u64,
68 pub total_duration_ms: u128,
69 pub mean_new_payload_latency_ms: f64,
70 pub median_new_payload_latency_ms: f64,
71 pub p90_new_payload_latency_ms: f64,
72 pub p99_new_payload_latency_ms: f64,
73 pub gas_per_second: f64,
74 pub blocks_per_second: f64,
75 pub min_block_number: u64,
76 pub max_block_number: u64,
77}
78
79#[derive(Debug, Serialize)]
81pub(crate) struct ComparisonReport {
82 pub timestamp: String,
83 pub baseline: RefInfo,
84 pub feature: RefInfo,
85 pub comparison_summary: ComparisonSummary,
86 pub per_block_comparisons: Vec<BlockComparison>,
87}
88
89#[derive(Debug, Serialize)]
91pub(crate) struct RefInfo {
92 pub ref_name: String,
93 pub summary: BenchmarkSummary,
94 pub start_timestamp: Option<DateTime<Utc>>,
95 pub end_timestamp: Option<DateTime<Utc>>,
96 pub reth_command: Option<String>,
97}
98
99#[derive(Debug, Serialize)]
114pub(crate) struct ComparisonSummary {
115 pub per_block_latency_change_mean_percent: f64,
116 pub per_block_latency_change_median_percent: f64,
117 pub per_block_latency_change_std_dev_percent: f64,
118 pub new_payload_total_latency_change_percent: f64,
119 pub new_payload_latency_p50_change_percent: f64,
120 pub new_payload_latency_p90_change_percent: f64,
121 pub new_payload_latency_p99_change_percent: f64,
122 pub gas_per_second_change_percent: f64,
123 pub blocks_per_second_change_percent: f64,
124}
125
126#[derive(Debug, Serialize)]
128pub(crate) struct BlockComparison {
129 pub block_number: u64,
130 #[serde(skip_serializing_if = "Option::is_none")]
131 pub transaction_count: Option<u64>,
132 pub gas_used: u64,
133 pub baseline_new_payload_latency: u128,
134 pub feature_new_payload_latency: u128,
135 pub new_payload_latency_change_percent: f64,
136}
137
138impl ComparisonGenerator {
139 pub(crate) fn new(args: &Args) -> Self {
141 let now: DateTime<Utc> = Utc::now();
142 let timestamp = now.format("%Y%m%d_%H%M%S").to_string();
143
144 Self {
145 output_dir: args.output_dir_path(),
146 timestamp,
147 baseline_ref_name: args.baseline_ref.clone(),
148 feature_ref_name: args.feature_ref.clone(),
149 baseline_results: None,
150 feature_results: None,
151 baseline_command: None,
152 feature_command: None,
153 }
154 }
155
156 pub(crate) fn get_ref_output_dir(&self, ref_type: &str) -> PathBuf {
158 self.output_dir.join("results").join(&self.timestamp).join(ref_type)
159 }
160
161 pub(crate) fn get_output_dir(&self) -> PathBuf {
163 self.output_dir.join("results").join(&self.timestamp)
164 }
165
166 pub(crate) fn add_ref_results(&mut self, ref_type: &str, output_path: &Path) -> Result<()> {
168 let ref_name = match ref_type {
169 "baseline" => &self.baseline_ref_name,
170 "feature" => &self.feature_ref_name,
171 _ => return Err(eyre!("Unknown reference type: {}", ref_type)),
172 };
173
174 let results = self.load_benchmark_results(ref_name, output_path)?;
175
176 match ref_type {
177 "baseline" => self.baseline_results = Some(results),
178 "feature" => self.feature_results = Some(results),
179 _ => return Err(eyre!("Unknown reference type: {}", ref_type)),
180 }
181
182 info!("Loaded benchmark results for {} reference", ref_type);
183
184 Ok(())
185 }
186
187 pub(crate) fn set_ref_timestamps(
189 &mut self,
190 ref_type: &str,
191 start: DateTime<Utc>,
192 end: DateTime<Utc>,
193 ) -> Result<()> {
194 match ref_type {
195 "baseline" => {
196 if let Some(ref mut results) = self.baseline_results {
197 results.start_timestamp = Some(start);
198 results.end_timestamp = Some(end);
199 } else {
200 return Err(eyre!("Baseline results not loaded yet"));
201 }
202 }
203 "feature" => {
204 if let Some(ref mut results) = self.feature_results {
205 results.start_timestamp = Some(start);
206 results.end_timestamp = Some(end);
207 } else {
208 return Err(eyre!("Feature results not loaded yet"));
209 }
210 }
211 _ => return Err(eyre!("Unknown reference type: {}", ref_type)),
212 }
213
214 Ok(())
215 }
216
217 pub(crate) fn set_ref_command(&mut self, ref_type: &str, command: String) -> Result<()> {
219 match ref_type {
220 "baseline" => {
221 self.baseline_command = Some(command);
222 }
223 "feature" => {
224 self.feature_command = Some(command);
225 }
226 _ => return Err(eyre!("Unknown reference type: {}", ref_type)),
227 }
228
229 Ok(())
230 }
231
232 pub(crate) async fn generate_comparison_report(&self) -> Result<()> {
234 info!("Generating comparison report...");
235
236 let baseline =
237 self.baseline_results.as_ref().ok_or_else(|| eyre!("Baseline results not loaded"))?;
238
239 let feature =
240 self.feature_results.as_ref().ok_or_else(|| eyre!("Feature results not loaded"))?;
241
242 let per_block_comparisons = self.calculate_per_block_comparisons(baseline, feature)?;
243 let comparison_summary = self.calculate_comparison_summary(
244 &baseline.summary,
245 &feature.summary,
246 &per_block_comparisons,
247 )?;
248
249 let report = ComparisonReport {
250 timestamp: self.timestamp.clone(),
251 baseline: RefInfo {
252 ref_name: baseline.ref_name.clone(),
253 summary: baseline.summary.clone(),
254 start_timestamp: baseline.start_timestamp,
255 end_timestamp: baseline.end_timestamp,
256 reth_command: self.baseline_command.clone(),
257 },
258 feature: RefInfo {
259 ref_name: feature.ref_name.clone(),
260 summary: feature.summary.clone(),
261 start_timestamp: feature.start_timestamp,
262 end_timestamp: feature.end_timestamp,
263 reth_command: self.feature_command.clone(),
264 },
265 comparison_summary,
266 per_block_comparisons,
267 };
268
269 self.write_comparison_reports(&report).await?;
271
272 self.print_comparison_summary(&report);
274
275 Ok(())
276 }
277
278 fn load_benchmark_results(
280 &self,
281 ref_name: &str,
282 output_path: &Path,
283 ) -> Result<BenchmarkResults> {
284 let combined_latency_path = output_path.join("combined_latency.csv");
285 let total_gas_path = output_path.join("total_gas.csv");
286
287 let combined_latency_data = self.load_combined_latency_csv(&combined_latency_path)?;
288 let total_gas_data = self.load_total_gas_csv(&total_gas_path)?;
289
290 let summary = self.calculate_summary(&combined_latency_data, &total_gas_data)?;
291
292 Ok(BenchmarkResults {
293 ref_name: ref_name.to_string(),
294 combined_latency_data,
295 summary,
296 start_timestamp: None,
297 end_timestamp: None,
298 })
299 }
300
301 fn load_combined_latency_csv(&self, path: &Path) -> Result<Vec<CombinedLatencyRow>> {
303 let mut reader = Reader::from_path(path)
304 .wrap_err_with(|| format!("Failed to open combined latency CSV: {path:?}"))?;
305
306 let mut rows = Vec::new();
307 for result in reader.deserialize() {
308 let row: CombinedLatencyRow = result
309 .wrap_err_with(|| format!("Failed to parse combined latency row in {path:?}"))?;
310 rows.push(row);
311 }
312
313 if rows.is_empty() {
314 return Err(eyre!("No data found in combined latency CSV: {:?}", path));
315 }
316
317 Ok(rows)
318 }
319
320 fn load_total_gas_csv(&self, path: &Path) -> Result<Vec<TotalGasRow>> {
322 let mut reader = Reader::from_path(path)
323 .wrap_err_with(|| format!("Failed to open total gas CSV: {path:?}"))?;
324
325 let mut rows = Vec::new();
326 for result in reader.deserialize() {
327 let row: TotalGasRow =
328 result.wrap_err_with(|| format!("Failed to parse total gas row in {path:?}"))?;
329 rows.push(row);
330 }
331
332 if rows.is_empty() {
333 return Err(eyre!("No data found in total gas CSV: {:?}", path));
334 }
335
336 Ok(rows)
337 }
338
339 fn calculate_summary(
345 &self,
346 combined_data: &[CombinedLatencyRow],
347 total_gas_data: &[TotalGasRow],
348 ) -> Result<BenchmarkSummary> {
349 if combined_data.is_empty() || total_gas_data.is_empty() {
350 return Err(eyre!("Cannot calculate summary for empty data"));
351 }
352
353 let total_blocks = combined_data.len() as u64;
354 let total_gas_used: u64 = combined_data.iter().map(|r| r.gas_used).sum();
355
356 let total_duration_ms = total_gas_data.last().unwrap().time / 1000; let latencies_ms: Vec<f64> =
359 combined_data.iter().map(|r| r.new_payload_latency as f64 / 1000.0).collect();
360 let mean_new_payload_latency_ms: f64 =
361 latencies_ms.iter().sum::<f64>() / total_blocks as f64;
362
363 let mut sorted_latencies_ms = latencies_ms;
364 sorted_latencies_ms.sort_by(|a, b| a.partial_cmp(b).unwrap_or(Ordering::Equal));
365 let median_new_payload_latency_ms = percentile(&sorted_latencies_ms, 0.5);
366 let p90_new_payload_latency_ms = percentile(&sorted_latencies_ms, 0.9);
367 let p99_new_payload_latency_ms = percentile(&sorted_latencies_ms, 0.99);
368
369 let total_duration_seconds = total_duration_ms as f64 / 1000.0;
370 let gas_per_second = if total_duration_seconds > f64::EPSILON {
371 total_gas_used as f64 / total_duration_seconds
372 } else {
373 0.0
374 };
375
376 let blocks_per_second = if total_duration_seconds > f64::EPSILON {
377 total_blocks as f64 / total_duration_seconds
378 } else {
379 0.0
380 };
381
382 let min_block_number = combined_data.first().unwrap().block_number;
383 let max_block_number = combined_data.last().unwrap().block_number;
384
385 Ok(BenchmarkSummary {
386 total_blocks,
387 total_gas_used,
388 total_duration_ms,
389 mean_new_payload_latency_ms,
390 median_new_payload_latency_ms,
391 p90_new_payload_latency_ms,
392 p99_new_payload_latency_ms,
393 gas_per_second,
394 blocks_per_second,
395 min_block_number,
396 max_block_number,
397 })
398 }
399
400 fn calculate_comparison_summary(
402 &self,
403 baseline: &BenchmarkSummary,
404 feature: &BenchmarkSummary,
405 per_block_comparisons: &[BlockComparison],
406 ) -> Result<ComparisonSummary> {
407 let calc_percent_change = |baseline: f64, feature: f64| -> f64 {
408 if baseline.abs() > f64::EPSILON {
409 ((feature - baseline) / baseline) * 100.0
410 } else {
411 0.0
412 }
413 };
414
415 let per_block_percent_changes: Vec<f64> =
420 per_block_comparisons.iter().map(|c| c.new_payload_latency_change_percent).collect();
421 let per_block_latency_change_mean_percent = if per_block_percent_changes.is_empty() {
422 0.0
423 } else {
424 per_block_percent_changes.iter().sum::<f64>() / per_block_percent_changes.len() as f64
425 };
426 let per_block_latency_change_median_percent = if per_block_percent_changes.is_empty() {
427 0.0
428 } else {
429 let mut sorted = per_block_percent_changes.clone();
430 sorted.sort_by(|a, b| a.partial_cmp(b).unwrap_or(Ordering::Equal));
431 percentile(&sorted, 0.5)
432 };
433 let per_block_latency_change_std_dev_percent =
434 calculate_std_dev(&per_block_percent_changes, per_block_latency_change_mean_percent);
435
436 let baseline_total_latency_ms =
437 baseline.mean_new_payload_latency_ms * baseline.total_blocks as f64;
438 let feature_total_latency_ms =
439 feature.mean_new_payload_latency_ms * feature.total_blocks as f64;
440 let new_payload_total_latency_change_percent =
441 calc_percent_change(baseline_total_latency_ms, feature_total_latency_ms);
442
443 Ok(ComparisonSummary {
444 per_block_latency_change_mean_percent,
445 per_block_latency_change_median_percent,
446 per_block_latency_change_std_dev_percent,
447 new_payload_total_latency_change_percent,
448 new_payload_latency_p50_change_percent: calc_percent_change(
449 baseline.median_new_payload_latency_ms,
450 feature.median_new_payload_latency_ms,
451 ),
452 new_payload_latency_p90_change_percent: calc_percent_change(
453 baseline.p90_new_payload_latency_ms,
454 feature.p90_new_payload_latency_ms,
455 ),
456 new_payload_latency_p99_change_percent: calc_percent_change(
457 baseline.p99_new_payload_latency_ms,
458 feature.p99_new_payload_latency_ms,
459 ),
460 gas_per_second_change_percent: calc_percent_change(
461 baseline.gas_per_second,
462 feature.gas_per_second,
463 ),
464 blocks_per_second_change_percent: calc_percent_change(
465 baseline.blocks_per_second,
466 feature.blocks_per_second,
467 ),
468 })
469 }
470
471 fn calculate_per_block_comparisons(
473 &self,
474 baseline: &BenchmarkResults,
475 feature: &BenchmarkResults,
476 ) -> Result<Vec<BlockComparison>> {
477 let mut baseline_map: HashMap<u64, &CombinedLatencyRow> = HashMap::new();
478 for row in &baseline.combined_latency_data {
479 baseline_map.insert(row.block_number, row);
480 }
481
482 let mut comparisons = Vec::new();
483 for feature_row in &feature.combined_latency_data {
484 if let Some(baseline_row) = baseline_map.get(&feature_row.block_number) {
485 let calc_percent_change = |baseline: u128, feature: u128| -> f64 {
486 if baseline > 0 {
487 ((feature as f64 - baseline as f64) / baseline as f64) * 100.0
488 } else {
489 0.0
490 }
491 };
492
493 let comparison = BlockComparison {
494 block_number: feature_row.block_number,
495 transaction_count: feature_row.transaction_count,
496 gas_used: feature_row.gas_used,
497 baseline_new_payload_latency: baseline_row.new_payload_latency,
498 feature_new_payload_latency: feature_row.new_payload_latency,
499 new_payload_latency_change_percent: calc_percent_change(
500 baseline_row.new_payload_latency,
501 feature_row.new_payload_latency,
502 ),
503 };
504 comparisons.push(comparison);
505 } else {
506 warn!("Block {} not found in baseline data", feature_row.block_number);
507 }
508 }
509
510 Ok(comparisons)
511 }
512
513 async fn write_comparison_reports(&self, report: &ComparisonReport) -> Result<()> {
515 let report_dir = self.output_dir.join("results").join(&self.timestamp);
516 fs::create_dir_all(&report_dir)
517 .wrap_err_with(|| format!("Failed to create report directory: {report_dir:?}"))?;
518
519 let json_path = report_dir.join("comparison_report.json");
521 let json_content = serde_json::to_string_pretty(report)
522 .wrap_err("Failed to serialize comparison report to JSON")?;
523 fs::write(&json_path, json_content)
524 .wrap_err_with(|| format!("Failed to write JSON report: {json_path:?}"))?;
525
526 let csv_path = report_dir.join("per_block_comparison.csv");
528 let mut writer = csv::Writer::from_path(&csv_path)
529 .wrap_err_with(|| format!("Failed to create CSV writer: {csv_path:?}"))?;
530
531 for comparison in &report.per_block_comparisons {
532 writer.serialize(comparison).wrap_err("Failed to write comparison row to CSV")?;
533 }
534 writer.flush().wrap_err("Failed to flush CSV writer")?;
535
536 info!("Comparison reports written to: {:?}", report_dir);
537 Ok(())
538 }
539
540 fn print_comparison_summary(&self, report: &ComparisonReport) {
542 let formatted_timestamp = if let Ok(dt) = chrono::DateTime::parse_from_str(
544 &format!("{} +0000", report.timestamp.replace('_', " ")),
545 "%Y%m%d %H%M%S %z",
546 ) {
547 dt.format("%Y-%m-%d %H:%M:%S UTC").to_string()
548 } else {
549 report.timestamp.clone()
551 };
552
553 println!("\n=== BENCHMARK COMPARISON SUMMARY ===");
554 println!("Timestamp: {formatted_timestamp}");
555 println!("Baseline: {}", report.baseline.ref_name);
556 println!("Feature: {}", report.feature.ref_name);
557 println!();
558
559 let summary = &report.comparison_summary;
560
561 println!("Performance Changes:");
562 println!(
563 " NewPayload Latency per-block mean change: {:+.2}%",
564 summary.per_block_latency_change_mean_percent
565 );
566 println!(
567 " NewPayload Latency per-block median change: {:+.2}%",
568 summary.per_block_latency_change_median_percent
569 );
570 println!(
571 " NewPayload Latency per-block std dev: {:.2}%",
572 summary.per_block_latency_change_std_dev_percent
573 );
574 println!(
575 " Total newPayload time change: {:+.2}%",
576 summary.new_payload_total_latency_change_percent
577 );
578 println!(
579 " NewPayload Latency p50: {:+.2}%",
580 summary.new_payload_latency_p50_change_percent
581 );
582 println!(
583 " NewPayload Latency p90: {:+.2}%",
584 summary.new_payload_latency_p90_change_percent
585 );
586 println!(
587 " NewPayload Latency p99: {:+.2}%",
588 summary.new_payload_latency_p99_change_percent
589 );
590 println!(
591 " Gas/Second: {:+.2}%",
592 summary.gas_per_second_change_percent
593 );
594 println!(
595 " Blocks/Second: {:+.2}%",
596 summary.blocks_per_second_change_percent
597 );
598 println!();
599
600 println!("Baseline Summary:");
601 let baseline = &report.baseline.summary;
602 println!(
603 " Blocks: {} (blocks {} to {}), Gas: {}, Duration: {:.2}s",
604 baseline.total_blocks,
605 baseline.min_block_number,
606 baseline.max_block_number,
607 baseline.total_gas_used,
608 baseline.total_duration_ms as f64 / 1000.0
609 );
610 println!(" NewPayload latency (ms):");
611 println!(
612 " mean: {:.2}, p50: {:.2}, p90: {:.2}, p99: {:.2}",
613 baseline.mean_new_payload_latency_ms,
614 baseline.median_new_payload_latency_ms,
615 baseline.p90_new_payload_latency_ms,
616 baseline.p99_new_payload_latency_ms
617 );
618 if let (Some(start), Some(end)) =
619 (&report.baseline.start_timestamp, &report.baseline.end_timestamp)
620 {
621 println!(
622 " Started: {}, Ended: {}",
623 start.format("%Y-%m-%d %H:%M:%S UTC"),
624 end.format("%Y-%m-%d %H:%M:%S UTC")
625 );
626 }
627 if let Some(ref cmd) = report.baseline.reth_command {
628 println!(" Command: {}", cmd);
629 }
630 println!();
631
632 println!("Feature Summary:");
633 let feature = &report.feature.summary;
634 println!(
635 " Blocks: {} (blocks {} to {}), Gas: {}, Duration: {:.2}s",
636 feature.total_blocks,
637 feature.min_block_number,
638 feature.max_block_number,
639 feature.total_gas_used,
640 feature.total_duration_ms as f64 / 1000.0
641 );
642 println!(" NewPayload latency (ms):");
643 println!(
644 " mean: {:.2}, p50: {:.2}, p90: {:.2}, p99: {:.2}",
645 feature.mean_new_payload_latency_ms,
646 feature.median_new_payload_latency_ms,
647 feature.p90_new_payload_latency_ms,
648 feature.p99_new_payload_latency_ms
649 );
650 if let (Some(start), Some(end)) =
651 (&report.feature.start_timestamp, &report.feature.end_timestamp)
652 {
653 println!(
654 " Started: {}, Ended: {}",
655 start.format("%Y-%m-%d %H:%M:%S UTC"),
656 end.format("%Y-%m-%d %H:%M:%S UTC")
657 );
658 }
659 if let Some(ref cmd) = report.feature.reth_command {
660 println!(" Command: {}", cmd);
661 }
662 println!();
663 }
664}
665
666fn calculate_std_dev(values: &[f64], mean: f64) -> f64 {
673 if values.is_empty() {
674 return 0.0;
675 }
676
677 let variance = values
678 .iter()
679 .map(|x| {
680 let diff = x - mean;
681 diff * diff
682 })
683 .sum::<f64>() /
684 values.len() as f64;
685
686 variance.sqrt()
687}
688
689fn percentile(sorted_values: &[f64], percentile: f64) -> f64 {
697 if sorted_values.is_empty() {
698 return 0.0;
699 }
700
701 let clamped = percentile.clamp(0.0, 1.0);
702 let max_index = sorted_values.len() - 1;
703 let rank = clamped * max_index as f64;
704 let lower = rank.floor() as usize;
705 let upper = rank.ceil() as usize;
706
707 if lower == upper {
708 sorted_values[lower]
709 } else {
710 let weight = rank - lower as f64;
711 sorted_values[lower].mul_add(1.0 - weight, sorted_values[upper] * weight)
712 }
713}