""" Audio Separator Wrapper Provides a clean interface to audio-separator library for 2-stage source separation: 1. BS-RoFormer: Remove vocals (SOTA vocal/instrumental separation) 2. Demucs: Separate instrumental into piano/guitar/bass/drums/other Based on: https://github.com/nomadkaraoke/python-audio-separator """ from pathlib import Path from typing import Dict, Optional import subprocess import shutil import sys class AudioSeparator: """ Wrapper for audio-separator with support for multiple separation strategies. Separation strategies: 1. Two-stage (vocal removal + instrument separation) 2. Direct piano isolation (Demucs 6-stem) 3. Legacy Demucs 4-stem (backwards compatibility) """ def __init__(self, model_dir: Optional[Path] = None): """ Initialize audio separator. Args: model_dir: Directory to store downloaded models (default: ~/.audio-separator/) """ self.model_dir = model_dir or Path.home() / ".audio-separator" self.model_dir.mkdir(parents=True, exist_ok=True) def separate_vocals( self, audio_path: Path, output_dir: Path, model: str = "model_bs_roformer_ep_317_sdr_12.9755.ckpt" ) -> Dict[str, Path]: """ Separate vocals from instrumental using BS-RoFormer (SOTA). Args: audio_path: Input audio file output_dir: Directory for output stems model: BS-RoFormer model to use (default: best quality) Returns: Dict with keys: 'vocals', 'instrumental' """ output_dir.mkdir(parents=True, exist_ok=True) # Use audio-separator CLI - find it relative to Python executable python_bin = Path(sys.executable) venv_bin = python_bin.parent audio_separator_bin = venv_bin / "audio-separator" # Fall back to PATH if not in venv if not audio_separator_bin.exists(): audio_separator_bin = shutil.which("audio-separator") or "audio-separator" else: audio_separator_bin = str(audio_separator_bin) # Convert to absolute path for audio-separator audio_path_abs = audio_path.resolve() cmd = [ audio_separator_bin, str(audio_path_abs), "-m", model, "--output_dir", str(output_dir.resolve()), "--output_format", "WAV" ] if self.model_dir: cmd.extend(["--model_file_dir", str(self.model_dir)]) result = subprocess.run(cmd, capture_output=True, text=True) # Debug: print stdout/stderr to see what happened print(f" [DEBUG] audio-separator return code: {result.returncode}") if result.stdout: print(f" [DEBUG] stdout: {result.stdout[-1000:]}") if result.stderr: print(f" [DEBUG] stderr: {result.stderr[-1000:]}") if result.returncode != 0: error_msg = result.stderr.strip() or result.stdout.strip() or "Unknown error" raise RuntimeError(f"BS-RoFormer vocal separation failed: {error_msg}") # audio-separator creates files with model name appended # Pattern: filename_(Vocals)_modelname.wav or filename_(Vocals).wav # Check what files were actually created if output_dir.exists(): actual_files = list(output_dir.glob("*.wav")) print(f" [DEBUG] Files created in {output_dir}: {[f.name for f in actual_files]}") # Find vocals and instrumental files by pattern matching vocals_files = [f for f in actual_files if "Vocal" in f.name] instrumental_files = [f for f in actual_files if "Instrumental" in f.name] if vocals_files and instrumental_files: vocals_path = vocals_files[0] instrumental_path = instrumental_files[0] print(f" ✓ Found vocals: {vocals_path.name}") print(f" ✓ Found instrumental: {instrumental_path.name}") else: raise RuntimeError(f"Could not find output files. Found: {[f.name for f in actual_files]}") else: raise RuntimeError(f"Output directory {output_dir} does not exist") return { 'vocals': vocals_path, 'instrumental': instrumental_path } def separate_instruments_demucs( self, audio_path: Path, output_dir: Path, stems: int = 6 ) -> Dict[str, Path]: """ Separate instrumental audio into individual instruments using Demucs. Args: audio_path: Input audio file (should be instrumental, vocals already removed) output_dir: Directory for output stems stems: Number of stems (4 or 6) 4-stem: vocals, drums, bass, other 6-stem: vocals, drums, bass, guitar, piano, other Returns: Dict with stem names as keys and paths as values """ output_dir.mkdir(parents=True, exist_ok=True) # Use Demucs directly for instrument separation model = "htdemucs_6s" if stems == 6 else "htdemucs" # Find demucs binary relative to Python executable python_bin = Path(sys.executable) venv_bin = python_bin.parent demucs_bin = venv_bin / "demucs" # Fall back to PATH if not in venv if not demucs_bin.exists(): demucs_bin = shutil.which("demucs") or "demucs" else: demucs_bin = str(demucs_bin) # Convert to absolute path for demucs audio_path_abs = audio_path.resolve() cmd = [ demucs_bin, "-n", model, "-o", str(output_dir.resolve()), str(audio_path_abs) ] result = subprocess.run(cmd, capture_output=True, text=True) if result.returncode != 0: error_msg = result.stderr.strip() or result.stdout.strip() or "Unknown error" raise RuntimeError(f"Demucs instrument separation failed: {error_msg}") # Demucs creates: output_dir/model_name/audio_stem/*.wav demucs_output = output_dir / model / audio_path.stem if stems == 6: stem_files = { 'vocals': demucs_output / "vocals.wav", 'drums': demucs_output / "drums.wav", 'bass': demucs_output / "bass.wav", 'guitar': demucs_output / "guitar.wav", 'piano': demucs_output / "piano.wav", 'other': demucs_output / "other.wav", } else: stem_files = { 'vocals': demucs_output / "vocals.wav", 'drums': demucs_output / "drums.wav", 'bass': demucs_output / "bass.wav", 'other': demucs_output / "other.wav", } # Verify all expected stems exist missing = [name for name, path in stem_files.items() if not path.exists()] if missing: raise RuntimeError(f"Missing expected stems: {missing}") return stem_files def two_stage_separation( self, audio_path: Path, output_dir: Path, instrument_stems: int = 6 ) -> Dict[str, Path]: """ Two-stage separation for optimal quality: 1. Remove vocals with BS-RoFormer (SOTA vocal separation) 2. Separate clean instrumental with Demucs 6-stem (piano, guitar, drums, bass, other) Args: audio_path: Input audio file output_dir: Directory for output stems instrument_stems: Number of instrument stems (4 or 6) Returns: Dict with all stems: vocals, piano, guitar, drums, bass, other """ output_dir.mkdir(parents=True, exist_ok=True) # Stage 1: Remove vocals with BS-RoFormer print(" Stage 1: Separating vocals with BS-RoFormer...") vocal_dir = output_dir / "stage1_vocals" vocal_stems = self.separate_vocals(audio_path, vocal_dir) # Stage 2: Separate instrumental with Demucs print(f" Stage 2: Separating instruments with Demucs {instrument_stems}-stem...") instrument_dir = output_dir / "stage2_instruments" instrument_stems_dict = self.separate_instruments_demucs( vocal_stems['instrumental'], instrument_dir, stems=instrument_stems ) # Combine results (vocals from stage 1, instruments from stage 2) all_stems = { 'vocals': vocal_stems['vocals'], # From BS-RoFormer (clean) } # Add all instrument stems except the duplicate vocals stem from Demucs for name, path in instrument_stems_dict.items(): if name != 'vocals': # Skip Demucs vocals (we have better ones from BS-RoFormer) all_stems[name] = path print(f" ✓ 2-stage separation complete: {list(all_stems.keys())}") return all_stems if __name__ == "__main__": # Test the separator import argparse parser = argparse.ArgumentParser(description="Test Audio Separator") parser.add_argument("audio_file", type=str, help="Path to audio file") parser.add_argument("--output", type=str, default="./output_stems", help="Output directory for stems") parser.add_argument("--mode", type=str, default="two-stage", choices=["vocals", "instruments", "two-stage"], help="Separation mode") args = parser.parse_args() separator = AudioSeparator() audio_path = Path(args.audio_file) output_dir = Path(args.output) if args.mode == "vocals": stems = separator.separate_vocals(audio_path, output_dir) print(f"Vocal separation complete:") for name, path in stems.items(): print(f" {name}: {path}") elif args.mode == "instruments": stems = separator.separate_instruments_demucs(audio_path, output_dir, stems=6) print(f"Instrument separation complete:") for name, path in stems.items(): print(f" {name}: {path}") elif args.mode == "two-stage": stems = separator.two_stage_separation(audio_path, output_dir, instrument_stems=6) print(f"2-stage separation complete:") for name, path in stems.items(): print(f" {name}: {path}")