lllindsey0615 commited on
Commit
f35a623
Β·
1 Parent(s): 4a9cbe4
Files changed (1) hide show
  1. app.py +76 -119
app.py CHANGED
@@ -1,148 +1,105 @@
1
- # app.py
2
-
3
- import os
4
- import uuid
5
  import torch
6
  import torchaudio
7
- import soundfile as sf
8
  import gradio as gr
9
-
10
  from demucs import pretrained
11
  from demucs.apply import apply_model
 
 
 
 
12
 
13
- from pyharp.core import ModelCard, build_endpoint, extend_gradio
14
- from pyharp.labels import LabelList, AudioLabel
15
-
16
- # ───────────────────────────────────────────────────────────────
17
- # Spaces-compatible cache dirs (also works locally)
18
- # ───────────────────────────────────────────────────────────────
19
- os.environ["XDG_CACHE_HOME"] = "/tmp/.cache"
20
- os.environ["TORCH_HOME"] = "/tmp/torch"
21
- os.environ["HF_HOME"] = "/tmp/hf"
22
- os.environ["MPLCONFIGDIR"] = "/tmp/matplotlib"
23
- os.environ["TRANSFORMERS_CACHE"] = "/tmp/hf"
24
-
25
- for d in (os.environ["XDG_CACHE_HOME"], os.environ["TORCH_HOME"], os.environ["HF_HOME"], os.environ["MPLCONFIGDIR"]):
26
- os.makedirs(d, exist_ok=True)
27
-
28
- # Ensure Gradio components support .harp_required()
29
- extend_gradio()
30
 
31
- # ───────────────────────────────────────────────────────────────
32
- # Constants
33
- # ───────────────────────────────────────────────────────────────
34
- DEMUCS_MODELS = ["mdx_extra_q", "mdx_extra", "htdemucs", "mdx_q"]
35
  STEM_CHOICES = {
36
  "Vocals": 3,
37
  "Drums": 0,
38
  "Bass": 1,
39
  "Other": 2,
40
- "Instrumental (No Vocals)": "instrumental",
41
  }
42
- DEMUCS_SR = 44100
43
-
44
- # ───────────────────────────────────────────────────────────────
45
- # Utilities
46
- # ───────────────────────────────────────────────────────────────
47
- def ensure_stereo(wave: torch.Tensor) -> torch.Tensor:
48
- if wave.shape[0] == 1:
49
- return wave.repeat(2, 1)
50
- return wave[:2]
51
-
52
- def resample_if_needed(wave: torch.Tensor, sr: int, target_sr: int):
53
- if sr == target_sr:
54
- return wave
55
- return torchaudio.functional.resample(wave, sr, target_sr)
56
-
57
- def save_wav(audio: torch.Tensor, sr: int, stem_name: str) -> str:
58
- out_path = f"/tmp/{stem_name}_{uuid.uuid4().hex}.wav"
59
- sf.write(out_path, audio.cpu().numpy().T, sr)
60
- return out_path
61
-
62
- # ───────────────────────────────────────────────────────────────
63
- # Main processing function
64
- # ───────────────────────────────────────────────────────────────
65
- def process_fn(audio_path: str, model_name: str, stem_choice: str):
66
- # Load and prepare audio
67
- wave, sr = torchaudio.load(audio_path)
68
- wave = ensure_stereo(wave.float())
69
- wave = resample_if_needed(wave, sr, DEMUCS_SR)
70
-
71
- # Load model
72
  model = pretrained.get_model(model_name)
73
- model.to("cpu").eval()
 
 
 
 
 
 
 
74
 
75
- # Apply separation
76
  with torch.no_grad():
77
- batch = wave.unsqueeze(0)
78
- stems = apply_model(model, batch, overlap=0.2, shifts=1, split=True)[0]
 
 
 
 
 
 
 
79
 
80
- # Extract desired stem
81
  if stem_choice == "Instrumental (No Vocals)":
82
- stem_audio = stems[0] + stems[1] + stems[2]
83
  else:
84
- stem_audio = stems[STEM_CHOICES[stem_choice]]
85
-
86
- # Resample back if needed
87
- if DEMUCS_SR != sr:
88
- stem_audio = torchaudio.functional.resample(stem_audio, DEMUCS_SR, sr)
89
-
90
- # Save to temp file
91
- base_name = stem_choice.lower().replace(" ", "_").replace("(", "").replace(")", "")
92
- output_path = save_wav(stem_audio, sr, base_name)
93
-
94
- # Create simple label for full duration
95
- label = AudioLabel(
96
- t=0.0,
97
- duration=stem_audio.shape[-1] / sr,
98
- label=stem_choice,
99
- amplitude=0.0,
100
- color=AudioLabel.hex_color_to_int("#4CAF50"),
101
- )
102
- label_list = LabelList()
103
- label_list.append(label)
104
 
105
- return output_path, label_list
 
 
 
 
 
 
 
 
106
 
107
- # ───────────────────────────────────────────────────────────────
108
- # ModelCard for pyharp
109
- # ───────────────────────────────────────────────────────────────
110
  model_card = ModelCard(
111
- name="Demucs Stem Separator (CPU)",
112
- description="Separate a mix into a chosen stem using Demucs (CPU-only).",
113
- author="Your Name or Team",
114
- tags=["demucs", "source-separation", "audio", "stem", "harp"],
115
  )
116
 
117
- # ───────────────────────────────────────────────────────────────
118
- # Gradio UI and pyharp integration
119
- # ───────────────────────────────────────────────────────────────
120
- with gr.Blocks(title=model_card.name) as demo:
121
- # Define UI inputs
122
- input_audio = gr.Audio(type="filepath", label="Input Audio").harp_required(True)
123
- dropdown_model = gr.Dropdown(label="Demucs Model", choices=DEMUCS_MODELS, value="mdx_extra_q")
124
- dropdown_stem = gr.Dropdown(label="Stem", choices=list(STEM_CHOICES.keys()), value="Vocals")
125
-
126
- # Define UI outputs
127
- output_audio = gr.Audio(type="filepath", label="Output Stem")
128
- output_json = gr.JSON(label="Labels")
 
 
 
 
 
 
 
 
129
 
130
- # Build HARP-compatible endpoint inside the Blocks context
131
  app = build_endpoint(
132
  model_card=model_card,
133
- input_components=[input_audio, dropdown_model, dropdown_stem],
134
- output_components=[output_audio, output_json],
135
- process_fn=process_fn,
136
  )
137
 
138
- # Add the control buttons for HARP
139
- app["controls_button"]
140
- app["controls_data"]
141
- app["process_button"]
142
- app["cancel_button"]
143
-
144
- # Queue and launch
145
- demo.queue()
146
- if __name__ == "__main__":
147
- demo.launch(show_error=True, share=True)
148
-
 
 
 
 
 
1
  import torch
2
  import torchaudio
 
3
  import gradio as gr
 
4
  from demucs import pretrained
5
  from demucs.apply import apply_model
6
+ from audiotools import AudioSignal
7
+ from pyharp.core import ModelCard, build_endpoint
8
+ from pyharp.labels import LabelList
9
+ from pyharp.media.audio import save_audio
10
 
11
+ # Supported models
12
+ DEMUX_MODELS = ["mdx_extra_q", "mdx_extra", "htdemucs", "mdx_q"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
 
14
+ # Mapping stem names to indexes
 
 
 
15
  STEM_CHOICES = {
16
  "Vocals": 3,
17
  "Drums": 0,
18
  "Bass": 1,
19
  "Other": 2,
20
+ "Instrumental (No Vocals)": "instrumental"
21
  }
22
+
23
+
24
+ def separate_stem(audio_file_path: str, model_name: str, stem_choice: str) -> AudioSignal:
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
25
  model = pretrained.get_model(model_name)
26
+ device = 'cuda' if torch.cuda.is_available() else 'cpu'
27
+ model.to(device)
28
+ model.eval()
29
+
30
+ waveform, sr = torchaudio.load(audio_file_path)
31
+ is_mono = waveform.shape[0] == 1
32
+ if is_mono:
33
+ waveform = waveform.repeat(2, 1)
34
 
 
35
  with torch.no_grad():
36
+ stems_batch = apply_model(
37
+ model,
38
+ waveform.unsqueeze(0).to(device),
39
+ overlap=0.2,
40
+ shifts=1,
41
+ split=True
42
+ )
43
+
44
+ stems = stems_batch[0]
45
 
 
46
  if stem_choice == "Instrumental (No Vocals)":
47
+ stem = stems[0] + stems[1] + stems[2]
48
  else:
49
+ stem_index = STEM_CHOICES[stem_choice]
50
+ stem = stems[stem_index]
51
+
52
+ if is_mono:
53
+ stem = stem.mean(dim=0, keepdim=True)
54
+
55
+ return AudioSignal(stem.cpu().numpy().astype('float32'), sample_rate=sr)
56
+
 
 
 
 
 
 
 
 
 
 
 
 
57
 
58
+ def process_fn_stem(audio_file_path: str, demucs_model: str, stem_choice: str):
59
+ """
60
+ PyHARP v3 process function:
61
+ - Separates the chosen stem using Demucs.
62
+ - Saves the stem as a .wav file.
63
+ """
64
+ stem_signal = separate_stem(audio_file_path, model_name=demucs_model, stem_choice=stem_choice)
65
+ stem_path = save_audio(stem_signal, f"{stem_choice.lower().replace(' ', '_')}.wav")
66
+ return stem_path
67
 
68
+
69
+ # Model Card
 
70
  model_card = ModelCard(
71
+ name="Demucs Stem Separator",
72
+ description="Uses Demucs to separate a music track into a selected stem.",
73
+ author="Alexandre DΓ©fossez, Nicolas Usunier, LΓ©on Bottou, Francis Bach",
74
+ tags=["demucs", "source-separation", "pyharp", "stems"]
75
  )
76
 
77
+ # Gradio UI
78
+ with gr.Blocks() as demo:
79
+
80
+ input_components = [
81
+ gr.Audio(type="filepath", label="Input Audio").harp_required(True),
82
+ gr.Dropdown(
83
+ label="Select Demucs Model",
84
+ choices=DEMUX_MODELS,
85
+ value="mdx_extra_q"
86
+ ),
87
+ gr.Dropdown(
88
+ label="Select Stem to Separate",
89
+ choices=list(STEM_CHOICES.keys()),
90
+ value="Vocals"
91
+ )
92
+ ]
93
+
94
+ output_components = [
95
+ gr.Audio(type="filepath", label="Separated Output"),
96
+ ]
97
 
 
98
  app = build_endpoint(
99
  model_card=model_card,
100
+ input_components=input_components,
101
+ output_components=output_components,
102
+ process_fn=process_fn_stem
103
  )
104
 
105
+ demo.queue().launch(share=True,show_error=True)