Spaces:

saadmannan
/

VAD-speakerDiarization

Sleeping

App Files Files Community

VAD-speakerDiarization / benchmarks /run_benchmarks.py

saadmannan

initial commit

b77cba7 2 months ago

raw

history blame contribute delete

8.9 kB

	#!/usr/bin/env python3
	"""
	Benchmark script for VAD + Speaker Diarization
	Tests performance on various audio conditions
	"""

	import sys
	from pathlib import Path
	sys.path.insert(0, str(Path(__file__).parent.parent))

	import time
	import json
	import numpy as np
	from typing import Dict, List
	import argparse

	from src.vad import SileroVAD
	from src.pipeline import VADDiarizationPipeline
	from src.utils import create_test_audio


	class Benchmark:
	"""Benchmark suite for VAD + Diarization."""

	def __init__(self, use_auth_token: str = None):
	"""Initialize benchmark."""
	self.use_auth_token = use_auth_token
	self.results = {}

	def benchmark_vad_latency(self, durations: List[float] = [1, 5, 10, 30, 60]):
	"""Benchmark VAD latency across different audio durations."""
	print("\n" + "="*60)
	print("VAD LATENCY BENCHMARK")
	print("="*60)

	vad = SileroVAD(threshold=0.5)
	results = []

	for duration in durations:
	print(f"\nTesting {duration}s audio...")
	metrics = vad.benchmark_latency(duration_seconds=duration)

	result = {
	'duration_s': duration,
	'processing_time_ms': metrics['total_processing_time_ms'],
	'latency_per_second_ms': metrics['latency_per_second_ms'],
	'real_time_factor': metrics['real_time_factor']
	}
	results.append(result)

	print(f" Processing time: {result['processing_time_ms']:.2f}ms")
	print(f" Latency/second: {result['latency_per_second_ms']:.2f}ms")
	print(f" Real-time factor: {result['real_time_factor']:.4f}x")

	# Check target
	if result['latency_per_second_ms'] < 100:
	print(" ✅ Target achieved (<100ms)")
	else:
	print(" ⚠️ Above target (>100ms)")

	self.results['vad_latency'] = results

	# Summary
	avg_latency = np.mean([r['latency_per_second_ms'] for r in results])
	print(f"\n📊 Average latency: {avg_latency:.2f}ms per second")

	return results

	def benchmark_vad_thresholds(self, thresholds: List[float] = [0.3, 0.5, 0.7]):
	"""Benchmark VAD with different sensitivity thresholds."""
	print("\n" + "="*60)
	print("VAD THRESHOLD BENCHMARK")
	print("="*60)

	# Create test audio
	test_audio = create_test_audio("test_threshold.wav", duration=10.0)
	results = []

	for threshold in thresholds:
	print(f"\nTesting threshold {threshold}...")
	vad = SileroVAD(threshold=threshold)

	timestamps, processing_time = vad.process_file(test_audio)

	result = {
	'threshold': threshold,
	'num_segments': len(timestamps),
	'processing_time_ms': processing_time,
	'total_speech_time_s': sum(ts['end'] - ts['start'] for ts in timestamps)
	}
	results.append(result)

	print(f" Segments detected: {result['num_segments']}")
	print(f" Total speech time: {result['total_speech_time_s']:.2f}s")
	print(f" Processing time: {result['processing_time_ms']:.2f}ms")

	self.results['vad_thresholds'] = results

	# Cleanup
	Path(test_audio).unlink(missing_ok=True)

	return results

	def benchmark_full_pipeline(self):
	"""Benchmark full VAD + Diarization pipeline."""
	print("\n" + "="*60)
	print("FULL PIPELINE BENCHMARK")
	print("="*60)

	if not self.use_auth_token:
	print("⚠️ No HF_TOKEN provided, skipping full pipeline benchmark")
	return None

	try:
	# Initialize pipeline
	print("\nInitializing pipeline...")
	pipeline = VADDiarizationPipeline(
	use_auth_token=self.use_auth_token,
	vad_threshold=0.5
	)

	# Create test audio
	test_audio = create_test_audio("test_pipeline.wav", duration=30.0)

	# Process
	print(f"\nProcessing {test_audio}...")
	result = pipeline.process_file(test_audio)

	benchmark_result = {
	'audio_duration_s': 30.0,
	'vad_time_ms': result['processing_time']['vad_ms'],
	'diarization_time_ms': result['processing_time']['diarization_ms'],
	'total_time_ms': result['processing_time']['total_ms'],
	'num_speakers': result['metadata']['num_speakers'],
	'num_segments': result['metadata']['num_segments']
	}

	print(f"\n📊 Results:")
	print(f" VAD time: {benchmark_result['vad_time_ms']:.2f}ms")
	print(f" Diarization time: {benchmark_result['diarization_time_ms']:.2f}ms")
	print(f" Total time: {benchmark_result['total_time_ms']:.2f}ms")
	print(f" Speakers: {benchmark_result['num_speakers']}")
	print(f" Segments: {benchmark_result['num_segments']}")

	self.results['full_pipeline'] = benchmark_result

	# Cleanup
	Path(test_audio).unlink(missing_ok=True)

	return benchmark_result

	except Exception as e:
	print(f"❌ Error: {e}")
	return None

	def benchmark_memory_usage(self):
	"""Benchmark memory usage."""
	print("\n" + "="*60)
	print("MEMORY USAGE BENCHMARK")
	print("="*60)

	import psutil
	import torch

	process = psutil.Process()

	# Initial memory
	initial_mem = process.memory_info().rss / 1024 / 1024 # MB
	print(f"\nInitial memory: {initial_mem:.2f} MB")

	# Load VAD
	print("\nLoading VAD...")
	vad = SileroVAD()
	vad_mem = process.memory_info().rss / 1024 / 1024
	print(f"After VAD: {vad_mem:.2f} MB (+{vad_mem - initial_mem:.2f} MB)")

	# GPU memory (if available)
	if torch.cuda.is_available():
	gpu_mem = torch.cuda.memory_allocated() / 1024 / 1024
	print(f"GPU memory: {gpu_mem:.2f} MB")

	result = {
	'initial_memory_mb': initial_mem,
	'vad_memory_mb': vad_mem,
	'vad_increase_mb': vad_mem - initial_mem
	}

	if torch.cuda.is_available():
	result['gpu_memory_mb'] = gpu_mem

	self.results['memory_usage'] = result

	return result

	def save_results(self, output_path: str = "benchmark_results.json"):
	"""Save benchmark results to file."""
	output_file = Path(__file__).parent / output_path

	with open(output_file, 'w') as f:
	json.dump(self.results, f, indent=2)

	print(f"\n✓ Results saved to: {output_file}")

	def run_all(self):
	"""Run all benchmarks."""
	print("\n" + "="*60)
	print("RUNNING ALL BENCHMARKS")
	print("="*60)

	# VAD latency
	self.benchmark_vad_latency()

	# VAD thresholds
	self.benchmark_vad_thresholds()

	# Memory usage
	self.benchmark_memory_usage()

	# Full pipeline (if token available)
	if self.use_auth_token:
	self.benchmark_full_pipeline()

	# Save results
	self.save_results()

	print("\n" + "="*60)
	print("✅ ALL BENCHMARKS COMPLETE")
	print("="*60)


	def main():
	"""Main benchmark runner."""
	parser = argparse.ArgumentParser(description="Run VAD + Diarization benchmarks")
	parser.add_argument(
	'--token',
	type=str,
	default=None,
	help='Hugging Face token for full pipeline benchmark'
	)
	parser.add_argument(
	'--output',
	type=str,
	default='benchmark_results.json',
	help='Output file for results'
	)
	parser.add_argument(
	'--quick',
	action='store_true',
	help='Run quick benchmark (VAD only)'
	)

	args = parser.parse_args()

	# Get token from args or environment
	token = args.token or os.environ.get('HF_TOKEN')

	# Initialize benchmark
	benchmark = Benchmark(use_auth_token=token)

	if args.quick:
	# Quick benchmark (VAD only)
	benchmark.benchmark_vad_latency(durations=[1, 5, 10])
	benchmark.save_results(args.output)
	else:
	# Full benchmark suite
	benchmark.run_all()


	if __name__ == "__main__":
	import os
	main()