demucs-cpu

Sleeping

App Files Files Community

lllindsey0615 commited on Apr 2, 2025

Commit

a52caef

1 Parent(s): c065186

add oauth

Browse files

Files changed (2) hide show

README.md +1 -0
app.py +55 -75

README.md CHANGED Viewed

@@ -13,4 +13,5 @@ hf_oauth: true
 hf_oauth_expiration_minutes: 480
 hf_oauth_scopes:
   - inference-api
 ---

 hf_oauth_expiration_minutes: 480
 hf_oauth_scopes:
   - inference-api
+  - read
 ---

app.py CHANGED Viewed

@@ -5,39 +5,33 @@ from demucs import pretrained
 from demucs.apply import apply_model
 from pyharp import *
 from audiotools import AudioSignal
-from typing import Tuple, Dict
-# Available Demucs models
 DEMUX_MODELS = ["mdx_extra_q", "mdx_extra", "htdemucs", "mdx_q"]
 STEM_CHOICES = {
-    "Vocals": 3,
-    "Drums": 0,
-    "Bass": 1,
-    "Other": 2,
     "Instrumental (No Vocals)": "instrumental"
 }
-def separate_stem(audio_file_path: str, model_name: str, stem_choice: str):
-    """
-    Separates an audio file into the chosen stem using a Demucs model.
-    Ensures correct stem ordering and supports mono input.
-    """
-    # Load Demucs model
     model = pretrained.get_model(model_name)
     model.to('cuda' if torch.cuda.is_available() else 'cpu')
     model.eval()
-    # Load the audio file
     waveform, sr = torchaudio.load(audio_file_path)
-    # Check if input is mono
     is_mono = waveform.shape[0] == 1
     if is_mono:
-        waveform = waveform.repeat(2, 1)  # Convert mono to stereo for Demucs
-    # Apply Demucs model
     with torch.no_grad():
         stems_batch = apply_model(
             model,
@@ -47,92 +41,78 @@ def separate_stem(audio_file_path: str, model_name: str, stem_choice: str):
             split=True
         )
-    # stems shape: (batch, stems, channels, samples)
-    stems = stems_batch[0]
-    print(f"Model '{model_name}' extracted stems shape: {stems.shape}")
     if stem_choice == "Instrumental (No Vocals)":
-        stem = stems[0] + stems[1] + stems[2]  # Drums + Bass + Other
     else:
         stem_index = STEM_CHOICES[stem_choice]
         stem = stems[stem_index]
-    # Convert back to mono if the input was originally mono
     if is_mono:
-        stem = stem.mean(dim=0, keepdim=True)  # Stereo → Mono
-    # Convert to AudioSignal with float32 dtype
-    stem_signal = AudioSignal(stem.cpu().numpy().astype('float32'), sample_rate=sr)
-    return stem_signal
-def label_list_to_dict(label_list: LabelList) -> dict:
-    def clean_dict(obj):
-        return {k: v for k, v in vars(obj).items() if v is not None}
     return {
         "meta": label_list.meta,
-        "labels": [clean_dict(label) for label in label_list.labels]
     }
 def process_fn_stem(
     audio_file_path: str,
     demucs_model: str,
     stem_choice: str,
-    profile: gr.OAuthProfile | None = None,
-    token: gr.OAuthToken | None = None
-) -> Tuple[str, Dict]:
     username = profile.username if profile else "anonymous"
-    print(f"User: {username}")
-    # Separate stem
-    stem_signal = separate_stem(
-        audio_file_path,
-        model_name=demucs_model,
-        stem_choice=stem_choice
-    )
-    # Save output
-    stem_path = save_audio(stem_signal, f"{stem_choice.lower().replace(' ', '_')}.wav")
-    # Dummy label + metadata with user info
-    label_list = LabelList(labels=[
-        AudioLabel(t=0.0, label="Dummy", amplitude=0.5)
-    ])
-    label_list.meta["user"] = username
-    return stem_path, label_list_to_dict(label_list)
-# Define the model card
-model_card = ModelCard(
-    name="Demucs Stem Separator",
-    description="Uses Demucs to separate a music track into a selected stem.",
-    author="Alexandre Défossez, Nicolas Usunier, Léon Bottou, Francis Bach",
-    tags=["demucs", "source-separation", "pyharp", "stems"]
-)
-# Build Gradio interface with dropdowns for model and stem selection
 with gr.Blocks() as demo:
-    # Add the built-in LoginButton to let users sign in with their HF account.
     gr.LoginButton()
-    dropdown_model = gr.Dropdown(
-        label="Select Demucs Model",
-        choices=DEMUX_MODELS,
-        value="mdx_extra_q"
-    )
-    dropdown_stem = gr.Dropdown(
-        label="Select Stem to Separate",
-        choices=list(STEM_CHOICES.keys()),
-        value="Vocals"
-    )
-    app = build_endpoint(
-        model_card=model_card,
-        components=[dropdown_model, dropdown_stem],
-        process_fn=process_fn_stem
     )
 demo.queue()
-demo.launch(show_error=True,share=True)

 from demucs.apply import apply_model
 from pyharp import *
 from audiotools import AudioSignal
+from typing import Dict
+from pyharp.label import AudioLabel, LabelList
 DEMUX_MODELS = ["mdx_extra_q", "mdx_extra", "htdemucs", "mdx_q"]
 STEM_CHOICES = {
+    "Vocals": 3,
+    "Drums": 0,
+    "Bass": 1,
+    "Other": 2,
     "Instrumental (No Vocals)": "instrumental"
 }
+# Stem Separation
+def separate_stem(audio_file_path: str, model_name: str, stem_choice: str) -> AudioSignal:
     model = pretrained.get_model(model_name)
     model.to('cuda' if torch.cuda.is_available() else 'cpu')
     model.eval()
     waveform, sr = torchaudio.load(audio_file_path)
     is_mono = waveform.shape[0] == 1
     if is_mono:
+        waveform = waveform.repeat(2, 1)
     with torch.no_grad():
         stems_batch = apply_model(
             model,
             split=True
         )
+    stems = stems_batch[0]
     if stem_choice == "Instrumental (No Vocals)":
+        stem = stems[0] + stems[1] + stems[2]
     else:
         stem_index = STEM_CHOICES[stem_choice]
         stem = stems[stem_index]
     if is_mono:
+        stem = stem.mean(dim=0, keepdim=True)
+    return AudioSignal(stem.cpu().numpy().astype('float32'), sample_rate=sr)
+# Label & Metadata Handling
+def generate_dummy_metadata(stem_choice: str, username: str) -> Dict:
+    dummy_label = AudioLabel(
+        t=0.0,
+        label=stem_choice,
+        amplitude=0.7,
+        description=f"Start of {stem_choice} stem",
+        color=AudioLabel.hex_color_to_int("#FF5733")
+    )
+    label_list = LabelList(labels=[dummy_label])
+    label_list.meta["user"] = username
     return {
         "meta": label_list.meta,
+        "labels": [vars(label) for label in label_list.labels]
     }
 def process_fn_stem(
     audio_file_path: str,
     demucs_model: str,
     stem_choice: str,
+    profile: gr.OAuthProfile | None = None
+) -> tuple:
     username = profile.username if profile else "anonymous"
+    print(f"Processing for user: {username}")
+    stem_signal = separate_stem(audio_file_path, model_name=demucs_model, stem_choice=stem_choice)
+    stem_filename = f"{stem_choice.lower().replace(' ', '_')}.wav"
+    stem_path = save_audio(stem_signal, stem_filename)
+    metadata = generate_dummy_metadata(stem_choice, username)
+    return stem_path, metadata
+# Gradio Interface
 with gr.Blocks() as demo:
+    gr.Markdown("# 🎧 Demucs Stem Separator")
+    gr.Markdown("Sign in with your Hugging Face account to use this tool.")
     gr.LoginButton()
+    with gr.Row():
+        model_dropdown = gr.Dropdown(label="Select Demucs Model", choices=DEMUX_MODELS, value="mdx_extra_q")
+        stem_dropdown = gr.Dropdown(label="Select Stem", choices=list(STEM_CHOICES.keys()), value="Vocals")
+    audio_input = gr.Audio(label="Upload Audio", type="filepath")
+    stem_output = gr.File(label="Separated Stem (.wav)")
+    metadata_output = gr.JSON(label="Separation Metadata")
+    run_button = gr.Button("Separate Stem")
+    run_button.click(
+        fn=process_fn_stem,
+        inputs=[audio_input, model_dropdown, stem_dropdown],
+        outputs=[stem_output, metadata_output]
     )
 demo.queue()