Spaces:
Build error
Build error
| """ | |
| Stage 2 — Audio Extraction & Preprocessing | |
| Extracts audio from video, creates STT-ready WAV and stereo backup. | |
| """ | |
| import logging | |
| import subprocess | |
| from pathlib import Path | |
| from config import AUDIO_SAMPLE_RATE, AUDIO_CHANNELS | |
| logger = logging.getLogger(__name__) | |
| def extract_audio_for_stt(video_path: Path, output_dir: Path) -> Path: | |
| """ | |
| Extract audio from video as 16kHz mono WAV for speech-to-text. | |
| """ | |
| output_path = output_dir / "audio_stt.wav" | |
| cmd = [ | |
| "ffmpeg", "-y", | |
| "-i", str(video_path), | |
| "-vn", # No video | |
| "-acodec", "pcm_s16le", # 16-bit PCM | |
| "-ar", str(AUDIO_SAMPLE_RATE), # 16kHz | |
| "-ac", str(AUDIO_CHANNELS), # Mono | |
| str(output_path) | |
| ] | |
| logger.info("Extracting audio for STT (16kHz mono WAV)...") | |
| result = subprocess.run(cmd, capture_output=True, text=True, timeout=600) | |
| if result.returncode != 0: | |
| raise RuntimeError(f"FFmpeg audio extraction failed: {result.stderr}") | |
| logger.info(f"STT audio: {output_path} ({output_path.stat().st_size / 1e6:.1f} MB)") | |
| return output_path | |
| def extract_audio_stereo(video_path: Path, output_dir: Path) -> Path: | |
| """ | |
| Extract original stereo audio (for background mixing later). | |
| """ | |
| output_path = output_dir / "audio_original_stereo.wav" | |
| cmd = [ | |
| "ffmpeg", "-y", | |
| "-i", str(video_path), | |
| "-vn", | |
| "-acodec", "pcm_s16le", | |
| "-ar", "44100", | |
| str(output_path) | |
| ] | |
| logger.info("Extracting stereo audio for background mixing...") | |
| result = subprocess.run(cmd, capture_output=True, text=True, timeout=600) | |
| if result.returncode != 0: | |
| raise RuntimeError(f"FFmpeg stereo extraction failed: {result.stderr}") | |
| logger.info(f"Stereo audio: {output_path}") | |
| return output_path | |
| def get_audio_duration(audio_path: Path) -> float: | |
| """Get audio duration in seconds using ffprobe.""" | |
| cmd = [ | |
| "ffprobe", | |
| "-v", "quiet", | |
| "-show_entries", "format=duration", | |
| "-of", "csv=p=0", | |
| str(audio_path) | |
| ] | |
| result = subprocess.run(cmd, capture_output=True, text=True, timeout=30) | |
| if result.returncode != 0: | |
| raise RuntimeError(f"ffprobe failed: {result.stderr}") | |
| return float(result.stdout.strip()) | |
| def get_video_duration(video_path: Path) -> float: | |
| """Get video duration in seconds.""" | |
| return get_audio_duration(video_path) # ffprobe works on video too | |