File size: 10,410 Bytes
0dfd298 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 | """
Audio Separator Wrapper
Provides a clean interface to audio-separator library for 2-stage source separation:
1. BS-RoFormer: Remove vocals (SOTA vocal/instrumental separation)
2. Demucs: Separate instrumental into piano/guitar/bass/drums/other
Based on: https://github.com/nomadkaraoke/python-audio-separator
"""
from pathlib import Path
from typing import Dict, Optional
import subprocess
import shutil
import sys
class AudioSeparator:
"""
Wrapper for audio-separator with support for multiple separation strategies.
Separation strategies:
1. Two-stage (vocal removal + instrument separation)
2. Direct piano isolation (Demucs 6-stem)
3. Legacy Demucs 4-stem (backwards compatibility)
"""
def __init__(self, model_dir: Optional[Path] = None):
"""
Initialize audio separator.
Args:
model_dir: Directory to store downloaded models (default: ~/.audio-separator/)
"""
self.model_dir = model_dir or Path.home() / ".audio-separator"
self.model_dir.mkdir(parents=True, exist_ok=True)
def separate_vocals(
self,
audio_path: Path,
output_dir: Path,
model: str = "model_bs_roformer_ep_317_sdr_12.9755.ckpt"
) -> Dict[str, Path]:
"""
Separate vocals from instrumental using BS-RoFormer (SOTA).
Args:
audio_path: Input audio file
output_dir: Directory for output stems
model: BS-RoFormer model to use (default: best quality)
Returns:
Dict with keys: 'vocals', 'instrumental'
"""
output_dir.mkdir(parents=True, exist_ok=True)
# Use audio-separator CLI - find it relative to Python executable
python_bin = Path(sys.executable)
venv_bin = python_bin.parent
audio_separator_bin = venv_bin / "audio-separator"
# Fall back to PATH if not in venv
if not audio_separator_bin.exists():
audio_separator_bin = shutil.which("audio-separator") or "audio-separator"
else:
audio_separator_bin = str(audio_separator_bin)
# Convert to absolute path for audio-separator
audio_path_abs = audio_path.resolve()
cmd = [
audio_separator_bin,
str(audio_path_abs),
"-m", model,
"--output_dir", str(output_dir.resolve()),
"--output_format", "WAV"
]
if self.model_dir:
cmd.extend(["--model_file_dir", str(self.model_dir)])
result = subprocess.run(cmd, capture_output=True, text=True)
# Debug: print stdout/stderr to see what happened
print(f" [DEBUG] audio-separator return code: {result.returncode}")
if result.stdout:
print(f" [DEBUG] stdout: {result.stdout[-1000:]}")
if result.stderr:
print(f" [DEBUG] stderr: {result.stderr[-1000:]}")
if result.returncode != 0:
error_msg = result.stderr.strip() or result.stdout.strip() or "Unknown error"
raise RuntimeError(f"BS-RoFormer vocal separation failed: {error_msg}")
# audio-separator creates files with model name appended
# Pattern: filename_(Vocals)_modelname.wav or filename_(Vocals).wav
# Check what files were actually created
if output_dir.exists():
actual_files = list(output_dir.glob("*.wav"))
print(f" [DEBUG] Files created in {output_dir}: {[f.name for f in actual_files]}")
# Find vocals and instrumental files by pattern matching
vocals_files = [f for f in actual_files if "Vocal" in f.name]
instrumental_files = [f for f in actual_files if "Instrumental" in f.name]
if vocals_files and instrumental_files:
vocals_path = vocals_files[0]
instrumental_path = instrumental_files[0]
print(f" ✓ Found vocals: {vocals_path.name}")
print(f" ✓ Found instrumental: {instrumental_path.name}")
else:
raise RuntimeError(f"Could not find output files. Found: {[f.name for f in actual_files]}")
else:
raise RuntimeError(f"Output directory {output_dir} does not exist")
return {
'vocals': vocals_path,
'instrumental': instrumental_path
}
def separate_instruments_demucs(
self,
audio_path: Path,
output_dir: Path,
stems: int = 6
) -> Dict[str, Path]:
"""
Separate instrumental audio into individual instruments using Demucs.
Args:
audio_path: Input audio file (should be instrumental, vocals already removed)
output_dir: Directory for output stems
stems: Number of stems (4 or 6)
4-stem: vocals, drums, bass, other
6-stem: vocals, drums, bass, guitar, piano, other
Returns:
Dict with stem names as keys and paths as values
"""
output_dir.mkdir(parents=True, exist_ok=True)
# Use Demucs directly for instrument separation
model = "htdemucs_6s" if stems == 6 else "htdemucs"
# Find demucs binary relative to Python executable
python_bin = Path(sys.executable)
venv_bin = python_bin.parent
demucs_bin = venv_bin / "demucs"
# Fall back to PATH if not in venv
if not demucs_bin.exists():
demucs_bin = shutil.which("demucs") or "demucs"
else:
demucs_bin = str(demucs_bin)
# Convert to absolute path for demucs
audio_path_abs = audio_path.resolve()
cmd = [
demucs_bin,
"-n", model,
"-o", str(output_dir.resolve()),
str(audio_path_abs)
]
result = subprocess.run(cmd, capture_output=True, text=True)
if result.returncode != 0:
error_msg = result.stderr.strip() or result.stdout.strip() or "Unknown error"
raise RuntimeError(f"Demucs instrument separation failed: {error_msg}")
# Demucs creates: output_dir/model_name/audio_stem/*.wav
demucs_output = output_dir / model / audio_path.stem
if stems == 6:
stem_files = {
'vocals': demucs_output / "vocals.wav",
'drums': demucs_output / "drums.wav",
'bass': demucs_output / "bass.wav",
'guitar': demucs_output / "guitar.wav",
'piano': demucs_output / "piano.wav",
'other': demucs_output / "other.wav",
}
else:
stem_files = {
'vocals': demucs_output / "vocals.wav",
'drums': demucs_output / "drums.wav",
'bass': demucs_output / "bass.wav",
'other': demucs_output / "other.wav",
}
# Verify all expected stems exist
missing = [name for name, path in stem_files.items() if not path.exists()]
if missing:
raise RuntimeError(f"Missing expected stems: {missing}")
return stem_files
def two_stage_separation(
self,
audio_path: Path,
output_dir: Path,
instrument_stems: int = 6
) -> Dict[str, Path]:
"""
Two-stage separation for optimal quality:
1. Remove vocals with BS-RoFormer (SOTA vocal separation)
2. Separate clean instrumental with Demucs 6-stem (piano, guitar, drums, bass, other)
Args:
audio_path: Input audio file
output_dir: Directory for output stems
instrument_stems: Number of instrument stems (4 or 6)
Returns:
Dict with all stems: vocals, piano, guitar, drums, bass, other
"""
output_dir.mkdir(parents=True, exist_ok=True)
# Stage 1: Remove vocals with BS-RoFormer
print(" Stage 1: Separating vocals with BS-RoFormer...")
vocal_dir = output_dir / "stage1_vocals"
vocal_stems = self.separate_vocals(audio_path, vocal_dir)
# Stage 2: Separate instrumental with Demucs
print(f" Stage 2: Separating instruments with Demucs {instrument_stems}-stem...")
instrument_dir = output_dir / "stage2_instruments"
instrument_stems_dict = self.separate_instruments_demucs(
vocal_stems['instrumental'],
instrument_dir,
stems=instrument_stems
)
# Combine results (vocals from stage 1, instruments from stage 2)
all_stems = {
'vocals': vocal_stems['vocals'], # From BS-RoFormer (clean)
}
# Add all instrument stems except the duplicate vocals stem from Demucs
for name, path in instrument_stems_dict.items():
if name != 'vocals': # Skip Demucs vocals (we have better ones from BS-RoFormer)
all_stems[name] = path
print(f" ✓ 2-stage separation complete: {list(all_stems.keys())}")
return all_stems
if __name__ == "__main__":
# Test the separator
import argparse
parser = argparse.ArgumentParser(description="Test Audio Separator")
parser.add_argument("audio_file", type=str, help="Path to audio file")
parser.add_argument("--output", type=str, default="./output_stems",
help="Output directory for stems")
parser.add_argument("--mode", type=str, default="two-stage",
choices=["vocals", "instruments", "two-stage"],
help="Separation mode")
args = parser.parse_args()
separator = AudioSeparator()
audio_path = Path(args.audio_file)
output_dir = Path(args.output)
if args.mode == "vocals":
stems = separator.separate_vocals(audio_path, output_dir)
print(f"Vocal separation complete:")
for name, path in stems.items():
print(f" {name}: {path}")
elif args.mode == "instruments":
stems = separator.separate_instruments_demucs(audio_path, output_dir, stems=6)
print(f"Instrument separation complete:")
for name, path in stems.items():
print(f" {name}: {path}")
elif args.mode == "two-stage":
stems = separator.two_stage_separation(audio_path, output_dir, instrument_stems=6)
print(f"2-stage separation complete:")
for name, path in stems.items():
print(f" {name}: {path}")
|