1use crate::cli::Args;
4use chrono::{DateTime, Utc};
5use csv::Reader;
6use eyre::{eyre, Result, WrapErr};
7use serde::{Deserialize, Serialize};
8use std::{
9 cmp::Ordering,
10 collections::HashMap,
11 fs,
12 path::{Path, PathBuf},
13};
14use tracing::{info, warn};
15
16pub(crate) struct ComparisonGenerator {
18 output_dir: PathBuf,
19 timestamp: String,
20 baseline_ref_name: String,
21 feature_ref_name: String,
22 baseline_results: Option<BenchmarkResults>,
23 feature_results: Option<BenchmarkResults>,
24 baseline_command: Option<String>,
25 feature_command: Option<String>,
26}
27
28#[derive(Debug, Clone)]
30pub(crate) struct BenchmarkResults {
31 pub ref_name: String,
32 pub combined_latency_data: Vec<CombinedLatencyRow>,
33 pub summary: BenchmarkSummary,
34 pub start_timestamp: Option<DateTime<Utc>>,
35 pub end_timestamp: Option<DateTime<Utc>>,
36}
37
38#[derive(Debug, Clone, Deserialize, Serialize)]
40pub(crate) struct CombinedLatencyRow {
41 pub block_number: u64,
42 #[serde(default)]
43 pub transaction_count: Option<u64>,
44 pub gas_used: u64,
45 pub new_payload_latency: u128,
46}
47
48#[derive(Debug, Clone, Deserialize, Serialize)]
50pub(crate) struct TotalGasRow {
51 pub block_number: u64,
52 #[serde(default)]
53 pub transaction_count: Option<u64>,
54 pub gas_used: u64,
55 pub time: u128,
56}
57
58#[derive(Debug, Clone, Serialize)]
65pub(crate) struct BenchmarkSummary {
66 pub total_blocks: u64,
67 pub total_gas_used: u64,
68 pub total_duration_ms: u128,
69 pub mean_new_payload_latency_ms: f64,
70 pub median_new_payload_latency_ms: f64,
71 pub p90_new_payload_latency_ms: f64,
72 pub p99_new_payload_latency_ms: f64,
73 pub gas_per_second: f64,
74 pub blocks_per_second: f64,
75 pub min_block_number: u64,
76 pub max_block_number: u64,
77}
78
79#[derive(Debug, Serialize)]
81pub(crate) struct ComparisonReport {
82 pub timestamp: String,
83 pub baseline: RefInfo,
84 pub feature: RefInfo,
85 pub comparison_summary: ComparisonSummary,
86 pub per_block_comparisons: Vec<BlockComparison>,
87}
88
89#[derive(Debug, Serialize)]
91pub(crate) struct RefInfo {
92 pub ref_name: String,
93 pub summary: BenchmarkSummary,
94 pub start_timestamp: Option<DateTime<Utc>>,
95 pub end_timestamp: Option<DateTime<Utc>>,
96 pub reth_command: Option<String>,
97}
98
99#[derive(Debug, Serialize)]
115pub(crate) struct ComparisonSummary {
116 pub per_block_latency_change_mean_percent: f64,
117 pub per_block_latency_change_median_percent: f64,
118 pub per_block_latency_change_std_dev_percent: f64,
119 pub new_payload_total_latency_change_percent: f64,
120 pub new_payload_latency_mean_change_percent: f64,
121 pub new_payload_latency_p50_change_percent: f64,
122 pub new_payload_latency_p90_change_percent: f64,
123 pub new_payload_latency_p99_change_percent: f64,
124 pub gas_per_second_change_percent: f64,
125 pub blocks_per_second_change_percent: f64,
126}
127
128#[derive(Debug, Serialize)]
130pub(crate) struct BlockComparison {
131 pub block_number: u64,
132 #[serde(skip_serializing_if = "Option::is_none")]
133 pub transaction_count: Option<u64>,
134 pub gas_used: u64,
135 pub baseline_new_payload_latency: u128,
136 pub feature_new_payload_latency: u128,
137 pub new_payload_latency_change_percent: f64,
138}
139
140impl ComparisonGenerator {
141 pub(crate) fn new(args: &Args) -> Self {
143 let now: DateTime<Utc> = Utc::now();
144 let timestamp = now.format("%Y%m%d_%H%M%S").to_string();
145
146 Self {
147 output_dir: args.output_dir_path(),
148 timestamp,
149 baseline_ref_name: args.baseline_ref.clone(),
150 feature_ref_name: args.feature_ref.clone(),
151 baseline_results: None,
152 feature_results: None,
153 baseline_command: None,
154 feature_command: None,
155 }
156 }
157
158 pub(crate) fn get_ref_output_dir(&self, ref_type: &str) -> PathBuf {
160 self.output_dir.join("results").join(&self.timestamp).join(ref_type)
161 }
162
163 pub(crate) fn get_output_dir(&self) -> PathBuf {
165 self.output_dir.join("results").join(&self.timestamp)
166 }
167
168 pub(crate) fn add_ref_results(&mut self, ref_type: &str, output_path: &Path) -> Result<()> {
170 let ref_name = match ref_type {
171 "baseline" => &self.baseline_ref_name,
172 "feature" => &self.feature_ref_name,
173 _ => return Err(eyre!("Unknown reference type: {}", ref_type)),
174 };
175
176 let results = self.load_benchmark_results(ref_name, output_path)?;
177
178 match ref_type {
179 "baseline" => self.baseline_results = Some(results),
180 "feature" => self.feature_results = Some(results),
181 _ => return Err(eyre!("Unknown reference type: {}", ref_type)),
182 }
183
184 info!("Loaded benchmark results for {} reference", ref_type);
185
186 Ok(())
187 }
188
189 pub(crate) fn set_ref_timestamps(
191 &mut self,
192 ref_type: &str,
193 start: DateTime<Utc>,
194 end: DateTime<Utc>,
195 ) -> Result<()> {
196 match ref_type {
197 "baseline" => {
198 if let Some(ref mut results) = self.baseline_results {
199 results.start_timestamp = Some(start);
200 results.end_timestamp = Some(end);
201 } else {
202 return Err(eyre!("Baseline results not loaded yet"));
203 }
204 }
205 "feature" => {
206 if let Some(ref mut results) = self.feature_results {
207 results.start_timestamp = Some(start);
208 results.end_timestamp = Some(end);
209 } else {
210 return Err(eyre!("Feature results not loaded yet"));
211 }
212 }
213 _ => return Err(eyre!("Unknown reference type: {}", ref_type)),
214 }
215
216 Ok(())
217 }
218
219 pub(crate) fn set_ref_command(&mut self, ref_type: &str, command: String) -> Result<()> {
221 match ref_type {
222 "baseline" => {
223 self.baseline_command = Some(command);
224 }
225 "feature" => {
226 self.feature_command = Some(command);
227 }
228 _ => return Err(eyre!("Unknown reference type: {}", ref_type)),
229 }
230
231 Ok(())
232 }
233
234 pub(crate) async fn generate_comparison_report(&self) -> Result<()> {
236 info!("Generating comparison report...");
237
238 let baseline =
239 self.baseline_results.as_ref().ok_or_else(|| eyre!("Baseline results not loaded"))?;
240
241 let feature =
242 self.feature_results.as_ref().ok_or_else(|| eyre!("Feature results not loaded"))?;
243
244 let per_block_comparisons = self.calculate_per_block_comparisons(baseline, feature)?;
245 let comparison_summary = self.calculate_comparison_summary(
246 &baseline.summary,
247 &feature.summary,
248 &per_block_comparisons,
249 )?;
250
251 let report = ComparisonReport {
252 timestamp: self.timestamp.clone(),
253 baseline: RefInfo {
254 ref_name: baseline.ref_name.clone(),
255 summary: baseline.summary.clone(),
256 start_timestamp: baseline.start_timestamp,
257 end_timestamp: baseline.end_timestamp,
258 reth_command: self.baseline_command.clone(),
259 },
260 feature: RefInfo {
261 ref_name: feature.ref_name.clone(),
262 summary: feature.summary.clone(),
263 start_timestamp: feature.start_timestamp,
264 end_timestamp: feature.end_timestamp,
265 reth_command: self.feature_command.clone(),
266 },
267 comparison_summary,
268 per_block_comparisons,
269 };
270
271 self.write_comparison_reports(&report).await?;
273
274 self.print_comparison_summary(&report);
276
277 Ok(())
278 }
279
280 fn load_benchmark_results(
282 &self,
283 ref_name: &str,
284 output_path: &Path,
285 ) -> Result<BenchmarkResults> {
286 let combined_latency_path = output_path.join("combined_latency.csv");
287 let total_gas_path = output_path.join("total_gas.csv");
288
289 let combined_latency_data = self.load_combined_latency_csv(&combined_latency_path)?;
290 let total_gas_data = self.load_total_gas_csv(&total_gas_path)?;
291
292 let summary = self.calculate_summary(&combined_latency_data, &total_gas_data)?;
293
294 Ok(BenchmarkResults {
295 ref_name: ref_name.to_string(),
296 combined_latency_data,
297 summary,
298 start_timestamp: None,
299 end_timestamp: None,
300 })
301 }
302
303 fn load_combined_latency_csv(&self, path: &Path) -> Result<Vec<CombinedLatencyRow>> {
305 let mut reader = Reader::from_path(path)
306 .wrap_err_with(|| format!("Failed to open combined latency CSV: {path:?}"))?;
307
308 let mut rows = Vec::new();
309 for result in reader.deserialize() {
310 let row: CombinedLatencyRow = result
311 .wrap_err_with(|| format!("Failed to parse combined latency row in {path:?}"))?;
312 rows.push(row);
313 }
314
315 if rows.is_empty() {
316 return Err(eyre!("No data found in combined latency CSV: {:?}", path));
317 }
318
319 Ok(rows)
320 }
321
322 fn load_total_gas_csv(&self, path: &Path) -> Result<Vec<TotalGasRow>> {
324 let mut reader = Reader::from_path(path)
325 .wrap_err_with(|| format!("Failed to open total gas CSV: {path:?}"))?;
326
327 let mut rows = Vec::new();
328 for result in reader.deserialize() {
329 let row: TotalGasRow =
330 result.wrap_err_with(|| format!("Failed to parse total gas row in {path:?}"))?;
331 rows.push(row);
332 }
333
334 if rows.is_empty() {
335 return Err(eyre!("No data found in total gas CSV: {:?}", path));
336 }
337
338 Ok(rows)
339 }
340
341 fn calculate_summary(
347 &self,
348 combined_data: &[CombinedLatencyRow],
349 total_gas_data: &[TotalGasRow],
350 ) -> Result<BenchmarkSummary> {
351 if combined_data.is_empty() || total_gas_data.is_empty() {
352 return Err(eyre!("Cannot calculate summary for empty data"));
353 }
354
355 let total_blocks = combined_data.len() as u64;
356 let total_gas_used: u64 = combined_data.iter().map(|r| r.gas_used).sum();
357
358 let total_duration_ms = total_gas_data.last().unwrap().time / 1000; let latencies_ms: Vec<f64> =
361 combined_data.iter().map(|r| r.new_payload_latency as f64 / 1000.0).collect();
362 let mean_new_payload_latency_ms: f64 =
363 latencies_ms.iter().sum::<f64>() / total_blocks as f64;
364
365 let mut sorted_latencies_ms = latencies_ms;
366 sorted_latencies_ms.sort_by(|a, b| a.partial_cmp(b).unwrap_or(Ordering::Equal));
367 let median_new_payload_latency_ms = percentile(&sorted_latencies_ms, 0.5);
368 let p90_new_payload_latency_ms = percentile(&sorted_latencies_ms, 0.9);
369 let p99_new_payload_latency_ms = percentile(&sorted_latencies_ms, 0.99);
370
371 let total_duration_seconds = total_duration_ms as f64 / 1000.0;
372 let gas_per_second = if total_duration_seconds > f64::EPSILON {
373 total_gas_used as f64 / total_duration_seconds
374 } else {
375 0.0
376 };
377
378 let blocks_per_second = if total_duration_seconds > f64::EPSILON {
379 total_blocks as f64 / total_duration_seconds
380 } else {
381 0.0
382 };
383
384 let min_block_number = combined_data.first().unwrap().block_number;
385 let max_block_number = combined_data.last().unwrap().block_number;
386
387 Ok(BenchmarkSummary {
388 total_blocks,
389 total_gas_used,
390 total_duration_ms,
391 mean_new_payload_latency_ms,
392 median_new_payload_latency_ms,
393 p90_new_payload_latency_ms,
394 p99_new_payload_latency_ms,
395 gas_per_second,
396 blocks_per_second,
397 min_block_number,
398 max_block_number,
399 })
400 }
401
402 fn calculate_comparison_summary(
404 &self,
405 baseline: &BenchmarkSummary,
406 feature: &BenchmarkSummary,
407 per_block_comparisons: &[BlockComparison],
408 ) -> Result<ComparisonSummary> {
409 let calc_percent_change = |baseline: f64, feature: f64| -> f64 {
410 if baseline.abs() > f64::EPSILON {
411 ((feature - baseline) / baseline) * 100.0
412 } else {
413 0.0
414 }
415 };
416
417 let per_block_percent_changes: Vec<f64> =
422 per_block_comparisons.iter().map(|c| c.new_payload_latency_change_percent).collect();
423 let per_block_latency_change_mean_percent = if per_block_percent_changes.is_empty() {
424 0.0
425 } else {
426 per_block_percent_changes.iter().sum::<f64>() / per_block_percent_changes.len() as f64
427 };
428 let per_block_latency_change_median_percent = if per_block_percent_changes.is_empty() {
429 0.0
430 } else {
431 let mut sorted = per_block_percent_changes.clone();
432 sorted.sort_by(|a, b| a.partial_cmp(b).unwrap_or(Ordering::Equal));
433 percentile(&sorted, 0.5)
434 };
435 let per_block_latency_change_std_dev_percent =
436 calculate_std_dev(&per_block_percent_changes, per_block_latency_change_mean_percent);
437
438 let baseline_total_latency_ms =
439 baseline.mean_new_payload_latency_ms * baseline.total_blocks as f64;
440 let feature_total_latency_ms =
441 feature.mean_new_payload_latency_ms * feature.total_blocks as f64;
442 let new_payload_total_latency_change_percent =
443 calc_percent_change(baseline_total_latency_ms, feature_total_latency_ms);
444
445 Ok(ComparisonSummary {
446 per_block_latency_change_mean_percent,
447 per_block_latency_change_median_percent,
448 per_block_latency_change_std_dev_percent,
449 new_payload_total_latency_change_percent,
450 new_payload_latency_mean_change_percent: calc_percent_change(
451 baseline.mean_new_payload_latency_ms,
452 feature.mean_new_payload_latency_ms,
453 ),
454 new_payload_latency_p50_change_percent: calc_percent_change(
455 baseline.median_new_payload_latency_ms,
456 feature.median_new_payload_latency_ms,
457 ),
458 new_payload_latency_p90_change_percent: calc_percent_change(
459 baseline.p90_new_payload_latency_ms,
460 feature.p90_new_payload_latency_ms,
461 ),
462 new_payload_latency_p99_change_percent: calc_percent_change(
463 baseline.p99_new_payload_latency_ms,
464 feature.p99_new_payload_latency_ms,
465 ),
466 gas_per_second_change_percent: calc_percent_change(
467 baseline.gas_per_second,
468 feature.gas_per_second,
469 ),
470 blocks_per_second_change_percent: calc_percent_change(
471 baseline.blocks_per_second,
472 feature.blocks_per_second,
473 ),
474 })
475 }
476
477 fn calculate_per_block_comparisons(
479 &self,
480 baseline: &BenchmarkResults,
481 feature: &BenchmarkResults,
482 ) -> Result<Vec<BlockComparison>> {
483 let mut baseline_map: HashMap<u64, &CombinedLatencyRow> = HashMap::new();
484 for row in &baseline.combined_latency_data {
485 baseline_map.insert(row.block_number, row);
486 }
487
488 let mut comparisons = Vec::new();
489 for feature_row in &feature.combined_latency_data {
490 if let Some(baseline_row) = baseline_map.get(&feature_row.block_number) {
491 let calc_percent_change = |baseline: u128, feature: u128| -> f64 {
492 if baseline > 0 {
493 ((feature as f64 - baseline as f64) / baseline as f64) * 100.0
494 } else {
495 0.0
496 }
497 };
498
499 let comparison = BlockComparison {
500 block_number: feature_row.block_number,
501 transaction_count: feature_row.transaction_count,
502 gas_used: feature_row.gas_used,
503 baseline_new_payload_latency: baseline_row.new_payload_latency,
504 feature_new_payload_latency: feature_row.new_payload_latency,
505 new_payload_latency_change_percent: calc_percent_change(
506 baseline_row.new_payload_latency,
507 feature_row.new_payload_latency,
508 ),
509 };
510 comparisons.push(comparison);
511 } else {
512 warn!("Block {} not found in baseline data", feature_row.block_number);
513 }
514 }
515
516 Ok(comparisons)
517 }
518
519 async fn write_comparison_reports(&self, report: &ComparisonReport) -> Result<()> {
521 let report_dir = self.output_dir.join("results").join(&self.timestamp);
522 fs::create_dir_all(&report_dir)
523 .wrap_err_with(|| format!("Failed to create report directory: {report_dir:?}"))?;
524
525 let json_path = report_dir.join("comparison_report.json");
527 let json_content = serde_json::to_string_pretty(report)
528 .wrap_err("Failed to serialize comparison report to JSON")?;
529 fs::write(&json_path, json_content)
530 .wrap_err_with(|| format!("Failed to write JSON report: {json_path:?}"))?;
531
532 let csv_path = report_dir.join("per_block_comparison.csv");
534 let mut writer = csv::Writer::from_path(&csv_path)
535 .wrap_err_with(|| format!("Failed to create CSV writer: {csv_path:?}"))?;
536
537 for comparison in &report.per_block_comparisons {
538 writer.serialize(comparison).wrap_err("Failed to write comparison row to CSV")?;
539 }
540 writer.flush().wrap_err("Failed to flush CSV writer")?;
541
542 info!("Comparison reports written to: {:?}", report_dir);
543 Ok(())
544 }
545
546 fn print_comparison_summary(&self, report: &ComparisonReport) {
548 let formatted_timestamp = if let Ok(dt) = chrono::DateTime::parse_from_str(
550 &format!("{} +0000", report.timestamp.replace('_', " ")),
551 "%Y%m%d %H%M%S %z",
552 ) {
553 dt.format("%Y-%m-%d %H:%M:%S UTC").to_string()
554 } else {
555 report.timestamp.clone()
557 };
558
559 println!("\n=== BENCHMARK COMPARISON SUMMARY ===");
560 println!("Timestamp: {formatted_timestamp}");
561 println!("Baseline: {}", report.baseline.ref_name);
562 println!("Feature: {}", report.feature.ref_name);
563 println!();
564
565 let summary = &report.comparison_summary;
566
567 println!("Performance Changes:");
568 println!(
569 " NewPayload Latency per-block mean change: {:+.2}%",
570 summary.per_block_latency_change_mean_percent
571 );
572 println!(
573 " NewPayload Latency per-block median change: {:+.2}%",
574 summary.per_block_latency_change_median_percent
575 );
576 println!(
577 " NewPayload Latency per-block std dev: {:.2}%",
578 summary.per_block_latency_change_std_dev_percent
579 );
580 println!(
581 " Total newPayload time change: {:+.2}%",
582 summary.new_payload_total_latency_change_percent
583 );
584 println!(
585 " NewPayload Latency mean: {:+.2}%",
586 summary.new_payload_latency_mean_change_percent
587 );
588 println!(
589 " NewPayload Latency p50: {:+.2}%",
590 summary.new_payload_latency_p50_change_percent
591 );
592 println!(
593 " NewPayload Latency p90: {:+.2}%",
594 summary.new_payload_latency_p90_change_percent
595 );
596 println!(
597 " NewPayload Latency p99: {:+.2}%",
598 summary.new_payload_latency_p99_change_percent
599 );
600 println!(
601 " Gas/Second: {:+.2}%",
602 summary.gas_per_second_change_percent
603 );
604 println!(
605 " Blocks/Second: {:+.2}%",
606 summary.blocks_per_second_change_percent
607 );
608 println!();
609
610 println!("Baseline Summary:");
611 let baseline = &report.baseline.summary;
612 println!(
613 " Blocks: {} (blocks {} to {}), Gas: {}, Duration: {:.2}s",
614 baseline.total_blocks,
615 baseline.min_block_number,
616 baseline.max_block_number,
617 baseline.total_gas_used,
618 baseline.total_duration_ms as f64 / 1000.0
619 );
620 println!(" NewPayload latency (ms):");
621 println!(
622 " mean: {:.2}, p50: {:.2}, p90: {:.2}, p99: {:.2}",
623 baseline.mean_new_payload_latency_ms,
624 baseline.median_new_payload_latency_ms,
625 baseline.p90_new_payload_latency_ms,
626 baseline.p99_new_payload_latency_ms
627 );
628 if let (Some(start), Some(end)) =
629 (&report.baseline.start_timestamp, &report.baseline.end_timestamp)
630 {
631 println!(
632 " Started: {}, Ended: {}",
633 start.format("%Y-%m-%d %H:%M:%S UTC"),
634 end.format("%Y-%m-%d %H:%M:%S UTC")
635 );
636 }
637 if let Some(ref cmd) = report.baseline.reth_command {
638 println!(" Command: {}", cmd);
639 }
640 println!();
641
642 println!("Feature Summary:");
643 let feature = &report.feature.summary;
644 println!(
645 " Blocks: {} (blocks {} to {}), Gas: {}, Duration: {:.2}s",
646 feature.total_blocks,
647 feature.min_block_number,
648 feature.max_block_number,
649 feature.total_gas_used,
650 feature.total_duration_ms as f64 / 1000.0
651 );
652 println!(" NewPayload latency (ms):");
653 println!(
654 " mean: {:.2}, p50: {:.2}, p90: {:.2}, p99: {:.2}",
655 feature.mean_new_payload_latency_ms,
656 feature.median_new_payload_latency_ms,
657 feature.p90_new_payload_latency_ms,
658 feature.p99_new_payload_latency_ms
659 );
660 if let (Some(start), Some(end)) =
661 (&report.feature.start_timestamp, &report.feature.end_timestamp)
662 {
663 println!(
664 " Started: {}, Ended: {}",
665 start.format("%Y-%m-%d %H:%M:%S UTC"),
666 end.format("%Y-%m-%d %H:%M:%S UTC")
667 );
668 }
669 if let Some(ref cmd) = report.feature.reth_command {
670 println!(" Command: {}", cmd);
671 }
672 println!();
673 }
674}
675
676fn calculate_std_dev(values: &[f64], mean: f64) -> f64 {
683 if values.is_empty() {
684 return 0.0;
685 }
686
687 let variance = values
688 .iter()
689 .map(|x| {
690 let diff = x - mean;
691 diff * diff
692 })
693 .sum::<f64>() /
694 values.len() as f64;
695
696 variance.sqrt()
697}
698
699fn percentile(sorted_values: &[f64], percentile: f64) -> f64 {
707 if sorted_values.is_empty() {
708 return 0.0;
709 }
710
711 let clamped = percentile.clamp(0.0, 1.0);
712 let max_index = sorted_values.len() - 1;
713 let rank = clamped * max_index as f64;
714 let lower = rank.floor() as usize;
715 let upper = rank.ceil() as usize;
716
717 if lower == upper {
718 sorted_values[lower]
719 } else {
720 let weight = rank - lower as f64;
721 sorted_values[lower].mul_add(1.0 - weight, sorted_values[upper] * weight)
722 }
723}