Jiejing Zhang Claude Opus 4.6 commited on
Commit
c252a57
·
1 Parent(s): b1c70fa

Output both backing track and isolated instrument as MP3

Browse files

Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>

Files changed (1) hide show
  1. app.py +31 -20
app.py CHANGED
@@ -23,13 +23,14 @@ INSTRUMENT_MAP = {
23
  "Vocals": "vocals",
24
  "Piano": "piano",
25
  }
26
- MODES = ["Remove", "Extract"]
27
 
28
  # Load model once at startup
29
  print("Loading Demucs model (htdemucs_6s)...")
30
  MODEL = get_model("htdemucs_6s")
31
  MODEL.eval()
32
 
 
 
33
 
34
  def load_audio(path, samplerate, channels):
35
  """Load audio file via ffmpeg, return torch tensor (channels, samples)."""
@@ -51,13 +52,26 @@ def load_audio(path, samplerate, channels):
51
  os.unlink(tmp_path)
52
 
53
 
54
- def process(audio_path, instrument, mode):
55
- """Separate sources on CPU, return processed audio."""
 
 
 
 
 
 
 
 
 
 
 
 
 
 
56
  if audio_path is None:
57
  raise gr.Error("Please upload an audio file.")
58
 
59
  target = INSTRUMENT_MAP[instrument]
60
- extract = mode == "Extract"
61
 
62
  wav = load_audio(audio_path, MODEL.samplerate, MODEL.audio_channels)
63
  wav = wav.unsqueeze(0) # (1, channels, samples)
@@ -66,34 +80,31 @@ def process(audio_path, instrument, mode):
66
  sources = sources.squeeze(0) # (num_sources, channels, samples)
67
 
68
  source_names = MODEL.sources
 
69
 
70
- if extract:
71
- result = sources[source_names.index(target)]
72
- else:
73
- result = sum(sources[i] for i in range(len(source_names)) if source_names[i] != target)
74
 
75
- result_np = result.cpu().numpy().T # (samples, channels)
76
-
77
- out_path = tempfile.mktemp(suffix=".wav")
78
- sf.write(out_path, result_np, MODEL.samplerate)
79
- return out_path
80
 
81
 
82
  demo = gr.Interface(
83
  fn=process,
84
  inputs=[
85
  gr.Audio(type="filepath", label="Upload audio (MP3/WAV/FLAC/...)"),
86
- gr.Dropdown(choices=INSTRUMENTS, value="Drums", label="Instrument"),
87
- gr.Radio(choices=MODES, value="Remove", label="Mode"),
 
 
 
88
  ],
89
- outputs=gr.Audio(type="filepath", label="Processed audio"),
90
  title="BackTrack",
91
  description=(
92
- "Upload a song and remove or extract the selected instrument. Powered by Demucs (Meta AI).\n\n"
93
- "**Remove** — get the song without the selected instrument (e.g. drumless backing track).\n\n"
94
- "**Extract** — isolate just the selected instrument (e.g. vocals only).\n\n"
95
  "Supports: Drums, Guitar, Bass, Vocals, Piano.\n\n"
96
- "Running on free CPU provided by Hugging Face. Processing takes about 5 minutes per song. Output format: WAV"
97
  ),
98
  flagging_mode="never",
99
  )
 
23
  "Vocals": "vocals",
24
  "Piano": "piano",
25
  }
 
26
 
27
  # Load model once at startup
28
  print("Loading Demucs model (htdemucs_6s)...")
29
  MODEL = get_model("htdemucs_6s")
30
  MODEL.eval()
31
 
32
+ BITRATE = "48k"
33
+
34
 
35
  def load_audio(path, samplerate, channels):
36
  """Load audio file via ffmpeg, return torch tensor (channels, samples)."""
 
52
  os.unlink(tmp_path)
53
 
54
 
55
+ def to_mp3(audio_np, samplerate):
56
+ """Convert numpy audio to MP3 temp file, return path."""
57
+ tmp_wav = tempfile.mktemp(suffix=".wav")
58
+ out_mp3 = tempfile.mktemp(suffix=".mp3")
59
+ sf.write(tmp_wav, audio_np, samplerate)
60
+ subprocess.run(
61
+ ["ffmpeg", "-y", "-i", tmp_wav, "-b:a", BITRATE, out_mp3],
62
+ capture_output=True,
63
+ check=True,
64
+ )
65
+ os.unlink(tmp_wav)
66
+ return out_mp3
67
+
68
+
69
+ def process(audio_path, instrument):
70
+ """Separate sources on CPU, return backing track and isolated instrument as MP3."""
71
  if audio_path is None:
72
  raise gr.Error("Please upload an audio file.")
73
 
74
  target = INSTRUMENT_MAP[instrument]
 
75
 
76
  wav = load_audio(audio_path, MODEL.samplerate, MODEL.audio_channels)
77
  wav = wav.unsqueeze(0) # (1, channels, samples)
 
80
  sources = sources.squeeze(0) # (num_sources, channels, samples)
81
 
82
  source_names = MODEL.sources
83
+ target_idx = source_names.index(target)
84
 
85
+ backing = sum(sources[i] for i in range(len(source_names)) if i != target_idx).cpu().numpy().T
86
+ isolated = sources[target_idx].cpu().numpy().T
 
 
87
 
88
+ return to_mp3(backing, MODEL.samplerate), to_mp3(isolated, MODEL.samplerate)
 
 
 
 
89
 
90
 
91
  demo = gr.Interface(
92
  fn=process,
93
  inputs=[
94
  gr.Audio(type="filepath", label="Upload audio (MP3/WAV/FLAC/...)"),
95
+ gr.Dropdown(choices=INSTRUMENTS, value="Drums", label="Instrument to remove"),
96
+ ],
97
+ outputs=[
98
+ gr.Audio(type="filepath", label="Backing track (without selected instrument)"),
99
+ gr.Audio(type="filepath", label="Isolated instrument"),
100
  ],
 
101
  title="BackTrack",
102
  description=(
103
+ "Upload a song and remove the selected instrument. You get two MP3 files back:\n\n"
104
+ "1. **Backing track** — the song without the selected instrument.\n"
105
+ "2. **Isolated instrument** — just the selected instrument by itself.\n\n"
106
  "Supports: Drums, Guitar, Bass, Vocals, Piano.\n\n"
107
+ "Running on free CPU provided by Hugging Face. Processing takes about 5 minutes per song."
108
  ),
109
  flagging_mode="never",
110
  )