Spaces:
Running
Running
Jiejing Zhang Claude Opus 4.6 commited on
Commit ·
c252a57
1
Parent(s): b1c70fa
Output both backing track and isolated instrument as MP3
Browse filesCo-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
app.py
CHANGED
|
@@ -23,13 +23,14 @@ INSTRUMENT_MAP = {
|
|
| 23 |
"Vocals": "vocals",
|
| 24 |
"Piano": "piano",
|
| 25 |
}
|
| 26 |
-
MODES = ["Remove", "Extract"]
|
| 27 |
|
| 28 |
# Load model once at startup
|
| 29 |
print("Loading Demucs model (htdemucs_6s)...")
|
| 30 |
MODEL = get_model("htdemucs_6s")
|
| 31 |
MODEL.eval()
|
| 32 |
|
|
|
|
|
|
|
| 33 |
|
| 34 |
def load_audio(path, samplerate, channels):
|
| 35 |
"""Load audio file via ffmpeg, return torch tensor (channels, samples)."""
|
|
@@ -51,13 +52,26 @@ def load_audio(path, samplerate, channels):
|
|
| 51 |
os.unlink(tmp_path)
|
| 52 |
|
| 53 |
|
| 54 |
-
def
|
| 55 |
-
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 56 |
if audio_path is None:
|
| 57 |
raise gr.Error("Please upload an audio file.")
|
| 58 |
|
| 59 |
target = INSTRUMENT_MAP[instrument]
|
| 60 |
-
extract = mode == "Extract"
|
| 61 |
|
| 62 |
wav = load_audio(audio_path, MODEL.samplerate, MODEL.audio_channels)
|
| 63 |
wav = wav.unsqueeze(0) # (1, channels, samples)
|
|
@@ -66,34 +80,31 @@ def process(audio_path, instrument, mode):
|
|
| 66 |
sources = sources.squeeze(0) # (num_sources, channels, samples)
|
| 67 |
|
| 68 |
source_names = MODEL.sources
|
|
|
|
| 69 |
|
| 70 |
-
if
|
| 71 |
-
|
| 72 |
-
else:
|
| 73 |
-
result = sum(sources[i] for i in range(len(source_names)) if source_names[i] != target)
|
| 74 |
|
| 75 |
-
|
| 76 |
-
|
| 77 |
-
out_path = tempfile.mktemp(suffix=".wav")
|
| 78 |
-
sf.write(out_path, result_np, MODEL.samplerate)
|
| 79 |
-
return out_path
|
| 80 |
|
| 81 |
|
| 82 |
demo = gr.Interface(
|
| 83 |
fn=process,
|
| 84 |
inputs=[
|
| 85 |
gr.Audio(type="filepath", label="Upload audio (MP3/WAV/FLAC/...)"),
|
| 86 |
-
gr.Dropdown(choices=INSTRUMENTS, value="Drums", label="Instrument"),
|
| 87 |
-
|
|
|
|
|
|
|
|
|
|
| 88 |
],
|
| 89 |
-
outputs=gr.Audio(type="filepath", label="Processed audio"),
|
| 90 |
title="BackTrack",
|
| 91 |
description=(
|
| 92 |
-
"Upload a song and remove
|
| 93 |
-
"**
|
| 94 |
-
"**
|
| 95 |
"Supports: Drums, Guitar, Bass, Vocals, Piano.\n\n"
|
| 96 |
-
"Running on free CPU provided by Hugging Face. Processing takes about 5 minutes per song.
|
| 97 |
),
|
| 98 |
flagging_mode="never",
|
| 99 |
)
|
|
|
|
| 23 |
"Vocals": "vocals",
|
| 24 |
"Piano": "piano",
|
| 25 |
}
|
|
|
|
| 26 |
|
| 27 |
# Load model once at startup
|
| 28 |
print("Loading Demucs model (htdemucs_6s)...")
|
| 29 |
MODEL = get_model("htdemucs_6s")
|
| 30 |
MODEL.eval()
|
| 31 |
|
| 32 |
+
BITRATE = "48k"
|
| 33 |
+
|
| 34 |
|
| 35 |
def load_audio(path, samplerate, channels):
|
| 36 |
"""Load audio file via ffmpeg, return torch tensor (channels, samples)."""
|
|
|
|
| 52 |
os.unlink(tmp_path)
|
| 53 |
|
| 54 |
|
| 55 |
+
def to_mp3(audio_np, samplerate):
|
| 56 |
+
"""Convert numpy audio to MP3 temp file, return path."""
|
| 57 |
+
tmp_wav = tempfile.mktemp(suffix=".wav")
|
| 58 |
+
out_mp3 = tempfile.mktemp(suffix=".mp3")
|
| 59 |
+
sf.write(tmp_wav, audio_np, samplerate)
|
| 60 |
+
subprocess.run(
|
| 61 |
+
["ffmpeg", "-y", "-i", tmp_wav, "-b:a", BITRATE, out_mp3],
|
| 62 |
+
capture_output=True,
|
| 63 |
+
check=True,
|
| 64 |
+
)
|
| 65 |
+
os.unlink(tmp_wav)
|
| 66 |
+
return out_mp3
|
| 67 |
+
|
| 68 |
+
|
| 69 |
+
def process(audio_path, instrument):
|
| 70 |
+
"""Separate sources on CPU, return backing track and isolated instrument as MP3."""
|
| 71 |
if audio_path is None:
|
| 72 |
raise gr.Error("Please upload an audio file.")
|
| 73 |
|
| 74 |
target = INSTRUMENT_MAP[instrument]
|
|
|
|
| 75 |
|
| 76 |
wav = load_audio(audio_path, MODEL.samplerate, MODEL.audio_channels)
|
| 77 |
wav = wav.unsqueeze(0) # (1, channels, samples)
|
|
|
|
| 80 |
sources = sources.squeeze(0) # (num_sources, channels, samples)
|
| 81 |
|
| 82 |
source_names = MODEL.sources
|
| 83 |
+
target_idx = source_names.index(target)
|
| 84 |
|
| 85 |
+
backing = sum(sources[i] for i in range(len(source_names)) if i != target_idx).cpu().numpy().T
|
| 86 |
+
isolated = sources[target_idx].cpu().numpy().T
|
|
|
|
|
|
|
| 87 |
|
| 88 |
+
return to_mp3(backing, MODEL.samplerate), to_mp3(isolated, MODEL.samplerate)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 89 |
|
| 90 |
|
| 91 |
demo = gr.Interface(
|
| 92 |
fn=process,
|
| 93 |
inputs=[
|
| 94 |
gr.Audio(type="filepath", label="Upload audio (MP3/WAV/FLAC/...)"),
|
| 95 |
+
gr.Dropdown(choices=INSTRUMENTS, value="Drums", label="Instrument to remove"),
|
| 96 |
+
],
|
| 97 |
+
outputs=[
|
| 98 |
+
gr.Audio(type="filepath", label="Backing track (without selected instrument)"),
|
| 99 |
+
gr.Audio(type="filepath", label="Isolated instrument"),
|
| 100 |
],
|
|
|
|
| 101 |
title="BackTrack",
|
| 102 |
description=(
|
| 103 |
+
"Upload a song and remove the selected instrument. You get two MP3 files back:\n\n"
|
| 104 |
+
"1. **Backing track** — the song without the selected instrument.\n"
|
| 105 |
+
"2. **Isolated instrument** — just the selected instrument by itself.\n\n"
|
| 106 |
"Supports: Drums, Guitar, Bass, Vocals, Piano.\n\n"
|
| 107 |
+
"Running on free CPU provided by Hugging Face. Processing takes about 5 minutes per song."
|
| 108 |
),
|
| 109 |
flagging_mode="never",
|
| 110 |
)
|