video-dubbing-agent / services /audio_extractor.py
dashhdata's picture
Upload folder using huggingface_hub
4ec3855 verified
"""
Stage 2 — Audio Extraction & Preprocessing
Extracts audio from video, creates STT-ready WAV and stereo backup.
"""
import logging
import subprocess
from pathlib import Path
from config import AUDIO_SAMPLE_RATE, AUDIO_CHANNELS
logger = logging.getLogger(__name__)
def extract_audio_for_stt(video_path: Path, output_dir: Path) -> Path:
"""
Extract audio from video as 16kHz mono WAV for speech-to-text.
"""
output_path = output_dir / "audio_stt.wav"
cmd = [
"ffmpeg", "-y",
"-i", str(video_path),
"-vn", # No video
"-acodec", "pcm_s16le", # 16-bit PCM
"-ar", str(AUDIO_SAMPLE_RATE), # 16kHz
"-ac", str(AUDIO_CHANNELS), # Mono
str(output_path)
]
logger.info("Extracting audio for STT (16kHz mono WAV)...")
result = subprocess.run(cmd, capture_output=True, text=True, timeout=600)
if result.returncode != 0:
raise RuntimeError(f"FFmpeg audio extraction failed: {result.stderr}")
logger.info(f"STT audio: {output_path} ({output_path.stat().st_size / 1e6:.1f} MB)")
return output_path
def extract_audio_stereo(video_path: Path, output_dir: Path) -> Path:
"""
Extract original stereo audio (for background mixing later).
"""
output_path = output_dir / "audio_original_stereo.wav"
cmd = [
"ffmpeg", "-y",
"-i", str(video_path),
"-vn",
"-acodec", "pcm_s16le",
"-ar", "44100",
str(output_path)
]
logger.info("Extracting stereo audio for background mixing...")
result = subprocess.run(cmd, capture_output=True, text=True, timeout=600)
if result.returncode != 0:
raise RuntimeError(f"FFmpeg stereo extraction failed: {result.stderr}")
logger.info(f"Stereo audio: {output_path}")
return output_path
def get_audio_duration(audio_path: Path) -> float:
"""Get audio duration in seconds using ffprobe."""
cmd = [
"ffprobe",
"-v", "quiet",
"-show_entries", "format=duration",
"-of", "csv=p=0",
str(audio_path)
]
result = subprocess.run(cmd, capture_output=True, text=True, timeout=30)
if result.returncode != 0:
raise RuntimeError(f"ffprobe failed: {result.stderr}")
return float(result.stdout.strip())
def get_video_duration(video_path: Path) -> float:
"""Get video duration in seconds."""
return get_audio_duration(video_path) # ffprobe works on video too