lllindsey0615 commited on
Commit
a52caef
·
1 Parent(s): c065186
Files changed (2) hide show
  1. README.md +1 -0
  2. app.py +55 -75
README.md CHANGED
@@ -13,4 +13,5 @@ hf_oauth: true
13
  hf_oauth_expiration_minutes: 480
14
  hf_oauth_scopes:
15
  - inference-api
 
16
  ---
 
13
  hf_oauth_expiration_minutes: 480
14
  hf_oauth_scopes:
15
  - inference-api
16
+ - read
17
  ---
app.py CHANGED
@@ -5,39 +5,33 @@ from demucs import pretrained
5
  from demucs.apply import apply_model
6
  from pyharp import *
7
  from audiotools import AudioSignal
8
- from typing import Tuple, Dict
 
 
9
 
10
 
11
- # Available Demucs models
12
  DEMUX_MODELS = ["mdx_extra_q", "mdx_extra", "htdemucs", "mdx_q"]
13
 
14
  STEM_CHOICES = {
15
- "Vocals": 3,
16
- "Drums": 0,
17
- "Bass": 1,
18
- "Other": 2,
19
  "Instrumental (No Vocals)": "instrumental"
20
  }
21
 
22
- def separate_stem(audio_file_path: str, model_name: str, stem_choice: str):
23
- """
24
- Separates an audio file into the chosen stem using a Demucs model.
25
- Ensures correct stem ordering and supports mono input.
26
- """
27
- # Load Demucs model
28
  model = pretrained.get_model(model_name)
29
  model.to('cuda' if torch.cuda.is_available() else 'cpu')
30
  model.eval()
31
 
32
- # Load the audio file
33
  waveform, sr = torchaudio.load(audio_file_path)
34
-
35
- # Check if input is mono
36
  is_mono = waveform.shape[0] == 1
37
  if is_mono:
38
- waveform = waveform.repeat(2, 1) # Convert mono to stereo for Demucs
39
 
40
- # Apply Demucs model
41
  with torch.no_grad():
42
  stems_batch = apply_model(
43
  model,
@@ -47,92 +41,78 @@ def separate_stem(audio_file_path: str, model_name: str, stem_choice: str):
47
  split=True
48
  )
49
 
50
- # stems shape: (batch, stems, channels, samples)
51
- stems = stems_batch[0]
52
-
53
- print(f"Model '{model_name}' extracted stems shape: {stems.shape}")
54
 
55
  if stem_choice == "Instrumental (No Vocals)":
56
- stem = stems[0] + stems[1] + stems[2] # Drums + Bass + Other
57
  else:
58
  stem_index = STEM_CHOICES[stem_choice]
59
  stem = stems[stem_index]
60
 
61
- # Convert back to mono if the input was originally mono
62
  if is_mono:
63
- stem = stem.mean(dim=0, keepdim=True) # Stereo → Mono
64
-
65
- # Convert to AudioSignal with float32 dtype
66
- stem_signal = AudioSignal(stem.cpu().numpy().astype('float32'), sample_rate=sr)
67
- return stem_signal
 
 
 
 
 
 
 
 
68
 
69
- def label_list_to_dict(label_list: LabelList) -> dict:
70
- def clean_dict(obj):
71
- return {k: v for k, v in vars(obj).items() if v is not None}
72
 
73
  return {
74
  "meta": label_list.meta,
75
- "labels": [clean_dict(label) for label in label_list.labels]
76
  }
77
 
 
78
  def process_fn_stem(
79
  audio_file_path: str,
80
  demucs_model: str,
81
  stem_choice: str,
82
- profile: gr.OAuthProfile | None = None,
83
- token: gr.OAuthToken | None = None
84
- ) -> Tuple[str, Dict]:
85
  username = profile.username if profile else "anonymous"
86
- print(f"User: {username}")
87
 
88
- # Separate stem
89
- stem_signal = separate_stem(
90
- audio_file_path,
91
- model_name=demucs_model,
92
- stem_choice=stem_choice
93
- )
94
-
95
- # Save output
96
- stem_path = save_audio(stem_signal, f"{stem_choice.lower().replace(' ', '_')}.wav")
97
 
98
- # Dummy label + metadata with user info
99
- label_list = LabelList(labels=[
100
- AudioLabel(t=0.0, label="Dummy", amplitude=0.5)
101
- ])
102
- label_list.meta["user"] = username
103
 
104
- return stem_path, label_list_to_dict(label_list)
105
 
 
106
 
107
- # Define the model card
108
- model_card = ModelCard(
109
- name="Demucs Stem Separator",
110
- description="Uses Demucs to separate a music track into a selected stem.",
111
- author="Alexandre Défossez, Nicolas Usunier, Léon Bottou, Francis Bach",
112
- tags=["demucs", "source-separation", "pyharp", "stems"]
113
- )
114
 
115
- # Build Gradio interface with dropdowns for model and stem selection
116
  with gr.Blocks() as demo:
117
- # Add the built-in LoginButton to let users sign in with their HF account.
 
 
118
  gr.LoginButton()
119
 
120
- dropdown_model = gr.Dropdown(
121
- label="Select Demucs Model",
122
- choices=DEMUX_MODELS,
123
- value="mdx_extra_q"
124
- )
125
- dropdown_stem = gr.Dropdown(
126
- label="Select Stem to Separate",
127
- choices=list(STEM_CHOICES.keys()),
128
- value="Vocals"
129
- )
130
 
131
- app = build_endpoint(
132
- model_card=model_card,
133
- components=[dropdown_model, dropdown_stem],
134
- process_fn=process_fn_stem
135
  )
136
 
137
  demo.queue()
138
- demo.launch(show_error=True,share=True)
 
5
  from demucs.apply import apply_model
6
  from pyharp import *
7
  from audiotools import AudioSignal
8
+ from typing import Dict
9
+ from pyharp.label import AudioLabel, LabelList
10
+
11
 
12
 
 
13
  DEMUX_MODELS = ["mdx_extra_q", "mdx_extra", "htdemucs", "mdx_q"]
14
 
15
  STEM_CHOICES = {
16
+ "Vocals": 3,
17
+ "Drums": 0,
18
+ "Bass": 1,
19
+ "Other": 2,
20
  "Instrumental (No Vocals)": "instrumental"
21
  }
22
 
23
+
24
+ # Stem Separation
25
+ def separate_stem(audio_file_path: str, model_name: str, stem_choice: str) -> AudioSignal:
 
 
 
26
  model = pretrained.get_model(model_name)
27
  model.to('cuda' if torch.cuda.is_available() else 'cpu')
28
  model.eval()
29
 
 
30
  waveform, sr = torchaudio.load(audio_file_path)
 
 
31
  is_mono = waveform.shape[0] == 1
32
  if is_mono:
33
+ waveform = waveform.repeat(2, 1)
34
 
 
35
  with torch.no_grad():
36
  stems_batch = apply_model(
37
  model,
 
41
  split=True
42
  )
43
 
44
+ stems = stems_batch[0]
 
 
 
45
 
46
  if stem_choice == "Instrumental (No Vocals)":
47
+ stem = stems[0] + stems[1] + stems[2]
48
  else:
49
  stem_index = STEM_CHOICES[stem_choice]
50
  stem = stems[stem_index]
51
 
 
52
  if is_mono:
53
+ stem = stem.mean(dim=0, keepdim=True)
54
+
55
+ return AudioSignal(stem.cpu().numpy().astype('float32'), sample_rate=sr)
56
+
57
+ # Label & Metadata Handling
58
+ def generate_dummy_metadata(stem_choice: str, username: str) -> Dict:
59
+ dummy_label = AudioLabel(
60
+ t=0.0,
61
+ label=stem_choice,
62
+ amplitude=0.7,
63
+ description=f"Start of {stem_choice} stem",
64
+ color=AudioLabel.hex_color_to_int("#FF5733")
65
+ )
66
 
67
+ label_list = LabelList(labels=[dummy_label])
68
+ label_list.meta["user"] = username
 
69
 
70
  return {
71
  "meta": label_list.meta,
72
+ "labels": [vars(label) for label in label_list.labels]
73
  }
74
 
75
+
76
  def process_fn_stem(
77
  audio_file_path: str,
78
  demucs_model: str,
79
  stem_choice: str,
80
+ profile: gr.OAuthProfile | None = None
81
+ ) -> tuple:
 
82
  username = profile.username if profile else "anonymous"
83
+ print(f"Processing for user: {username}")
84
 
85
+ stem_signal = separate_stem(audio_file_path, model_name=demucs_model, stem_choice=stem_choice)
86
+ stem_filename = f"{stem_choice.lower().replace(' ', '_')}.wav"
87
+ stem_path = save_audio(stem_signal, stem_filename)
 
 
 
 
 
 
88
 
89
+ metadata = generate_dummy_metadata(stem_choice, username)
90
+ return stem_path, metadata
 
 
 
91
 
 
92
 
93
+ # Gradio Interface
94
 
 
 
 
 
 
 
 
95
 
 
96
  with gr.Blocks() as demo:
97
+ gr.Markdown("# 🎧 Demucs Stem Separator")
98
+ gr.Markdown("Sign in with your Hugging Face account to use this tool.")
99
+
100
  gr.LoginButton()
101
 
102
+ with gr.Row():
103
+ model_dropdown = gr.Dropdown(label="Select Demucs Model", choices=DEMUX_MODELS, value="mdx_extra_q")
104
+ stem_dropdown = gr.Dropdown(label="Select Stem", choices=list(STEM_CHOICES.keys()), value="Vocals")
105
+
106
+ audio_input = gr.Audio(label="Upload Audio", type="filepath")
107
+ stem_output = gr.File(label="Separated Stem (.wav)")
108
+ metadata_output = gr.JSON(label="Separation Metadata")
109
+
110
+ run_button = gr.Button("Separate Stem")
 
111
 
112
+ run_button.click(
113
+ fn=process_fn_stem,
114
+ inputs=[audio_input, model_dropdown, stem_dropdown],
115
+ outputs=[stem_output, metadata_output]
116
  )
117
 
118
  demo.queue()