""" Stage 2B — Vocal/Background Separation using Demucs Separates vocals from background music/noise for cleaner dubbing. Falls back to using raw audio if demucs is not available. """ import logging import subprocess import shutil from pathlib import Path logger = logging.getLogger(__name__) def separate_vocals(audio_path: Path, output_dir: Path) -> dict: """ Use demucs to separate vocals from background audio. Returns dict with 'vocals' and 'background' paths. Falls back to raw audio if demucs is unavailable. """ vocals_dir = output_dir / "separated" vocals_dir.mkdir(exist_ok=True) # Check if demucs is available if not shutil.which("python") and not shutil.which("demucs"): logger.warning("Demucs not found. Using raw audio without separation.") return _fallback_no_separation(audio_path, output_dir) try: cmd = [ "python", "-m", "demucs", "--two-stems", "vocals", # Only separate vocals vs rest "-n", "htdemucs", # Best free model "-o", str(vocals_dir), "--mp3", # Smaller output str(audio_path) ] logger.info("Running demucs vocal separation (this takes a while for long audio)...") result = subprocess.run( cmd, capture_output=True, text=True, timeout=3600 # 1 hour timeout for long videos ) if result.returncode != 0: logger.warning(f"Demucs failed: {result.stderr}. Falling back to raw audio.") return _fallback_no_separation(audio_path, output_dir) # Demucs outputs to: separated/htdemucs//vocals.mp3 and no_vocals.mp3 stem_name = audio_path.stem demucs_out = vocals_dir / "htdemucs" / stem_name vocals_path = demucs_out / "vocals.mp3" background_path = demucs_out / "no_vocals.mp3" if not vocals_path.exists(): # Try wav extension vocals_path = demucs_out / "vocals.wav" background_path = demucs_out / "no_vocals.wav" if not vocals_path.exists(): logger.warning("Demucs output not found. Falling back.") return _fallback_no_separation(audio_path, output_dir) logger.info(f"Vocal separation complete: vocals={vocals_path}, bg={background_path}") return { "vocals": vocals_path, "background": background_path, "separated": True } except subprocess.TimeoutExpired: logger.warning("Demucs timed out. Falling back to raw audio.") return _fallback_no_separation(audio_path, output_dir) except Exception as e: logger.warning(f"Demucs error: {e}. Falling back to raw audio.") return _fallback_no_separation(audio_path, output_dir) def _fallback_no_separation(audio_path: Path, output_dir: Path) -> dict: """Fallback: use raw audio as vocals, no background track.""" logger.info("Using raw audio without vocal separation.") return { "vocals": audio_path, "background": None, "separated": False }