| """ |
| Benchmark script: Compare all approaches on test audio files. |
| |
| Runs all three approaches on each input file, collects metrics, |
| and produces a CSV comparison report. |
| |
| Usage: |
| uv run python scripts/benchmark.py --data-dir data --output-dir benchmark_results |
| """ |
|
|
| import argparse |
| import sys |
| import csv |
| import json |
| import logging |
| import time |
| from pathlib import Path |
| from datetime import datetime |
| from approaches import list_approaches, get_approach |
|
|
| logging.basicConfig( |
| level=logging.INFO, |
| format="%(asctime)s [%(levelname)s] %(message)s", |
| datefmt="%H:%M:%S", |
| ) |
| log = logging.getLogger("benchmark") |
|
|
| |
| sys.path.insert(0, str(Path(__file__).resolve().parent.parent)) |
|
|
|
|
|
|
| class BenchmarkRunner: |
| """Run benchmark across all approaches and audio files.""" |
|
|
| def __init__(self, data_dir: str, output_dir: str): |
| self.data_dir = Path(data_dir) |
| self.output_dir = Path(output_dir) |
| self.output_dir.mkdir(parents=True, exist_ok=True) |
|
|
| |
| self.results = [] |
|
|
| def find_test_files(self) -> list: |
| """Find all WAV files in data directory.""" |
| wav_files = sorted(self.data_dir.glob("*.wav")) |
| if not wav_files: |
| log.warning(f"No WAV files found in {self.data_dir}") |
| return wav_files |
|
|
| def run_benchmark(self, whisper_model: str = "base"): |
| """Run all approaches on all test files.""" |
| test_files = self.find_test_files() |
|
|
| if not test_files: |
| log.error(f"No test files found in {self.data_dir}") |
| return |
|
|
| log.info("="*70) |
| log.info(f"BENCHMARK: {len(list_approaches())} approaches × {len(test_files)} files") |
| log.info("="*70) |
|
|
| for input_file in test_files: |
| log.info(f"\n{'='*70}") |
| log.info(f"File: {input_file.name}") |
| log.info(f"{'='*70}") |
|
|
| for approach_name in list_approaches(): |
| log.info(f"\n Testing approach: {approach_name}") |
| log.info("-"*70) |
|
|
| try: |
| result = self._run_approach( |
| approach_name, |
| input_file, |
| whisper_model, |
| ) |
| self.results.append(result) |
|
|
| except Exception as e: |
| log.error(f" FAILED: {e}") |
| result = { |
| "timestamp": datetime.now().isoformat(), |
| "input_file": input_file.name, |
| "approach": approach_name, |
| "status": "FAILED", |
| "error": str(e), |
| } |
| self.results.append(result) |
|
|
| log.info(f"\n{'='*70}") |
| log.info("BENCHMARK COMPLETE") |
| log.info(f"{'='*70}") |
|
|
| |
| self._save_results() |
| self._print_summary() |
|
|
| def _run_approach(self, approach_name: str, input_file: Path, whisper_model: str): |
| """Run single approach on single file.""" |
|
|
| |
| output_subdir = self.output_dir / approach_name / input_file.stem |
| output_subdir.mkdir(parents=True, exist_ok=True) |
|
|
| |
| approach_class = get_approach(approach_name) |
| approach = approach_class() |
|
|
| |
| start_time = time.time() |
| pipeline_output = approach.run( |
| input_file=str(input_file), |
| output_dir=str(output_subdir), |
| whisper_model=whisper_model, |
| ) |
| execution_time = time.time() - start_time |
|
|
| result = { |
| "timestamp": datetime.now().isoformat(), |
| "input_file": input_file.name, |
| "input_size_mb": input_file.stat().st_size / (1024*1024), |
| "approach": approach_name, |
| "status": "SUCCESS", |
| "duration_seconds": pipeline_output.duration_seconds, |
| "execution_time_seconds": execution_time, |
| "samples_per_second": (pipeline_output.duration_seconds / execution_time) |
| if execution_time > 0 else 0, |
| "n_speakers": pipeline_output.n_speakers, |
| "talker_of_interest": pipeline_output.talker_of_interest, |
| "separation_method": pipeline_output.separation_method, |
| "doa_method": pipeline_output.doa_method, |
| "gender_method": pipeline_output.gender_method, |
| "asr_model": pipeline_output.asr_model, |
| "output_dir": str(output_subdir), |
| } |
|
|
| |
| log.info(" Status: SUCCESS") |
| log.info(f" Execution time: {execution_time:.2f}s") |
| log.info(f" Speakers: {pipeline_output.n_speakers}") |
| log.info(f" ToI: Speaker {pipeline_output.talker_of_interest}") |
| log.info(f" Output: {output_subdir}") |
|
|
| return result |
|
|
| def _save_results(self): |
| """Save results to CSV and JSON.""" |
|
|
| |
| csv_path = self.output_dir / "benchmark_results.csv" |
| if self.results: |
| fieldnames = self.results[0].keys() |
| with open(csv_path, 'w', newline='') as f: |
| writer = csv.DictWriter(f, fieldnames=fieldnames) |
| writer.writeheader() |
| writer.writerows(self.results) |
| log.info(f"\nSaved: {csv_path}") |
|
|
| |
| json_path = self.output_dir / "benchmark_results.json" |
| with open(json_path, 'w') as f: |
| json.dump(self.results, f, indent=2) |
| log.info(f"Saved: {json_path}") |
|
|
| def _print_summary(self): |
| """Print summary statistics.""" |
|
|
| if not self.results: |
| return |
|
|
| log.info("\n" + "="*70) |
| log.info("SUMMARY") |
| log.info("="*70) |
|
|
| |
| by_approach = {} |
| for result in self.results: |
| approach = result.get("approach") |
| if approach not in by_approach: |
| by_approach[approach] = [] |
| by_approach[approach].append(result) |
|
|
| |
| for approach, runs in sorted(by_approach.items()): |
| successful = [r for r in runs if r.get("status") == "SUCCESS"] |
| failed = [r for r in runs if r.get("status") == "FAILED"] |
|
|
| log.info(f"\nApproach: {approach}") |
| log.info(f" Successful: {len(successful)}/{len(runs)}") |
|
|
| if successful: |
| avg_exec_time = sum(r["execution_time_seconds"] for r in successful) / len(successful) |
| avg_speedup = sum(r.get("samples_per_second", 0) for r in successful) / len(successful) |
|
|
| log.info(f" Avg execution time: {avg_exec_time:.2f}s") |
| log.info(f" Avg speedup (samples/s): {avg_speedup:.1f}x") |
|
|
| if failed: |
| log.info(f" Failed runs: {len(failed)}") |
|
|
|
|
| def main(): |
| parser = argparse.ArgumentParser(description="Benchmark all approaches") |
| parser.add_argument("--data-dir", default="data", help="Directory with test WAV files") |
| parser.add_argument("--output-dir", default="benchmark_results", help="Output directory") |
| parser.add_argument("-w", "--whisper-model", default="base", help="Whisper model") |
| parser.add_argument("-v", "--verbose", action="store_true") |
|
|
| args = parser.parse_args() |
|
|
| if args.verbose: |
| logging.getLogger().setLevel(logging.DEBUG) |
|
|
| data_path = Path(args.data_dir) |
| if not data_path.exists(): |
| log.error(f"Data directory not found: {data_path}") |
| return 1 |
|
|
| runner = BenchmarkRunner(args.data_dir, args.output_dir) |
| runner.run_benchmark(args.whisper_model) |
|
|
| return 0 |
|
|
|
|
| if __name__ == "__main__": |
| sys.exit(main()) |
|
|