Spaces:

dashhdata
/

video-dubbing-agent

Build error

App Files Files Community

video-dubbing-agent / services /vocal_separator.py

dashhdata

Upload folder using huggingface_hub

4ec3855 verified 2 months ago

raw

history blame contribute delete

3.17 kB

	"""
	Stage 2B — Vocal/Background Separation using Demucs
	Separates vocals from background music/noise for cleaner dubbing.
	Falls back to using raw audio if demucs is not available.
	"""

	import logging
	import subprocess
	import shutil
	from pathlib import Path

	logger = logging.getLogger(__name__)


	def separate_vocals(audio_path: Path, output_dir: Path) -> dict:
	"""
	Use demucs to separate vocals from background audio.
	Returns dict with 'vocals' and 'background' paths.
	Falls back to raw audio if demucs is unavailable.
	"""
	vocals_dir = output_dir / "separated"
	vocals_dir.mkdir(exist_ok=True)

	# Check if demucs is available
	if not shutil.which("python") and not shutil.which("demucs"):
	logger.warning("Demucs not found. Using raw audio without separation.")
	return _fallback_no_separation(audio_path, output_dir)

	try:
	cmd = [
	"python", "-m", "demucs",
	"--two-stems", "vocals", # Only separate vocals vs rest
	"-n", "htdemucs", # Best free model
	"-o", str(vocals_dir),
	"--mp3", # Smaller output
	str(audio_path)
	]

	logger.info("Running demucs vocal separation (this takes a while for long audio)...")
	result = subprocess.run(
	cmd,
	capture_output=True,
	text=True,
	timeout=3600 # 1 hour timeout for long videos
	)

	if result.returncode != 0:
	logger.warning(f"Demucs failed: {result.stderr}. Falling back to raw audio.")
	return _fallback_no_separation(audio_path, output_dir)

	# Demucs outputs to: separated/htdemucs/<filename>/vocals.mp3 and no_vocals.mp3
	stem_name = audio_path.stem
	demucs_out = vocals_dir / "htdemucs" / stem_name

	vocals_path = demucs_out / "vocals.mp3"
	background_path = demucs_out / "no_vocals.mp3"

	if not vocals_path.exists():
	# Try wav extension
	vocals_path = demucs_out / "vocals.wav"
	background_path = demucs_out / "no_vocals.wav"

	if not vocals_path.exists():
	logger.warning("Demucs output not found. Falling back.")
	return _fallback_no_separation(audio_path, output_dir)

	logger.info(f"Vocal separation complete: vocals={vocals_path}, bg={background_path}")
	return {
	"vocals": vocals_path,
	"background": background_path,
	"separated": True
	}

	except subprocess.TimeoutExpired:
	logger.warning("Demucs timed out. Falling back to raw audio.")
	return _fallback_no_separation(audio_path, output_dir)
	except Exception as e:
	logger.warning(f"Demucs error: {e}. Falling back to raw audio.")
	return _fallback_no_separation(audio_path, output_dir)


	def _fallback_no_separation(audio_path: Path, output_dir: Path) -> dict:
	"""Fallback: use raw audio as vocals, no background track."""
	logger.info("Using raw audio without vocal separation.")
	return {
	"vocals": audio_path,
	"background": None,
	"separated": False
	}