Spaces:
Sleeping
Sleeping
| #!/usr/bin/env python3 | |
| """ | |
| Voice Activity Detection + Speaker Diarization | |
| Simple demo script using the modular pipeline | |
| """ | |
| import torch | |
| import librosa | |
| import numpy as np | |
| from pathlib import Path | |
| import os | |
| import sys | |
| # Import from modular components | |
| from src.vad import SileroVAD | |
| from src.diarization import SpeakerDiarization | |
| from src.pipeline import VADDiarizationPipeline | |
| from src.utils import create_test_audio | |
| def setup_vad(): | |
| """Setup Silero VAD using modular wrapper""" | |
| print("Setting up Voice Activity Detection...") | |
| vad = SileroVAD(threshold=0.5) | |
| print("✓ Silero VAD loaded (40 MB)") | |
| return vad | |
| def setup_diarization(): | |
| """Setup Speaker Diarization using modular wrapper""" | |
| print("Setting up Speaker Diarization...") | |
| print("⚠️ First download requires 1GB+ bandwidth (one-time)") | |
| # Get token from environment or use provided one | |
| token = os.environ.get('HF_TOKEN', 'your_token_here') | |
| try: | |
| diarization = SpeakerDiarization( | |
| model_name="pyannote/speaker-diarization-3.1", | |
| use_auth_token=token | |
| ) | |
| print("✓ Diarization pipeline loaded") | |
| return diarization | |
| except Exception as e: | |
| print(f"❌ Error: {e}") | |
| print("Get your HF token: https://huggingface.co/settings/tokens") | |
| print("Or set it: export HF_TOKEN='your_token_here'") | |
| return None | |
| def demo_vad(audio_path, vad_model): | |
| """Demo VAD on an audio file using modular wrapper""" | |
| print(f"\nVAD Analysis: {audio_path}") | |
| timestamps, processing_time = vad_model.process_file(audio_path) | |
| print(f"Found {len(timestamps)} speech segments:") | |
| print(f"Processing time: {processing_time:.2f}ms") | |
| for i, ts in enumerate(timestamps, 1): | |
| start_s = ts['start'] | |
| end_s = ts['end'] | |
| duration_s = end_s - start_s | |
| print(f" Segment {i}: {start_s:6.2f}s - {end_s:6.2f}s ({duration_s:6.2f}s)") | |
| return timestamps | |
| def demo_diarization(audio_path, diar_pipeline): | |
| """Demo Diarization on an audio file using modular wrapper""" | |
| print(f"\nDiarization Analysis: {audio_path}") | |
| segments, processing_time, metadata = diar_pipeline.process_file(audio_path) | |
| print(f"Found {metadata['num_speakers']} speakers") | |
| print(f"Processing time: {processing_time:.2f}ms") | |
| print("\nSpeaker timeline:") | |
| for seg in segments: | |
| print(f" {seg['start']:6.2f}s - {seg['end']:6.2f}s: {seg['speaker']}") | |
| def demo_full_pipeline(audio_path): | |
| """Demo the full integrated pipeline""" | |
| print(f"\n{'='*60}") | |
| print("FULL PIPELINE DEMO") | |
| print(f"{'='*60}") | |
| token = os.environ.get('HF_TOKEN') | |
| if not token: | |
| print("\n⚠️ No HF_TOKEN found. Running VAD only...") | |
| vad = SileroVAD() | |
| demo_vad(audio_path, vad) | |
| return | |
| try: | |
| # Initialize full pipeline | |
| pipeline = VADDiarizationPipeline( | |
| use_auth_token=token, | |
| vad_threshold=0.5 | |
| ) | |
| # Process file | |
| result = pipeline.process_file(audio_path) | |
| # Display formatted output | |
| print("\n" + pipeline.format_output(result, format='text')) | |
| except Exception as e: | |
| print(f"\n❌ Error: {e}") | |
| print("Falling back to VAD only...") | |
| vad = SileroVAD() | |
| demo_vad(audio_path, vad) | |
| def main(): | |
| print("\n" + "=" * 60) | |
| print("VOICE ACTIVITY DETECTION + SPEAKER DIARIZATION") | |
| print("=" * 60) | |
| # Create test audio | |
| print("\nCreating test audio...") | |
| audio_path = create_test_audio("test_audio.wav", duration=10.0) | |
| print(f"✓ Created {audio_path}") | |
| # Option 1: Quick VAD demo | |
| print("\n" + "=" * 60) | |
| print("OPTION 1: VAD ONLY (No HF token needed)") | |
| print("=" * 60) | |
| vad_model = setup_vad() | |
| demo_vad(audio_path, vad_model) | |
| # Option 2: Full pipeline (requires HF token) | |
| print("\n" + "=" * 60) | |
| print("OPTION 2: FULL PIPELINE (VAD + Diarization)") | |
| print("=" * 60) | |
| demo_full_pipeline(audio_path) | |
| print("\n" + "=" * 60) | |
| print("✅ Demo complete!") | |
| print("\nNext steps:") | |
| print("1. Set HF_TOKEN: export HF_TOKEN='your_token_here'") | |
| print("2. Run Gradio demo: python app.py") | |
| print("3. Test on real audio files") | |
| print("4. Deploy with Docker: docker build -t vad-diarization .") | |
| print("5. Check notebooks/demo.ipynb for detailed examples") | |
| print("=" * 60 + "\n") | |
| if __name__ == "__main__": | |
| main() | |