demucs-cpu

Sleeping

App Files Files Community

lllindsey0615 commited on Feb 18, 2025

Commit

31bdbd1

1 Parent(s): 1035bfa

added model and stem selection

Browse files

Files changed (1) hide show

app.py +63 -39

app.py CHANGED Viewed

@@ -3,85 +3,109 @@ import torchaudio
 import gradio as gr
 from demucs import pretrained
 from demucs.apply import apply_model
-from pyharp import ModelCard, LabelList, build_endpoint, save_audio
 from audiotools import AudioSignal
 DEMUX_MODELS = ["mdx_extra_q", "mdx_extra", "htdemucs", "mdx_q"]
-def separate_instrumental(audio_file_path: str, model_name: str):
     # Load Demucs model
     model = pretrained.get_model(model_name)
     model.to('cuda' if torch.cuda.is_available() else 'cpu')
     model.eval()
-    # Load audio file (waveform shape: (channels, samples))
     waveform, sr = torchaudio.load(audio_file_path)
-    # Check if the input is mono
     is_mono = waveform.shape[0] == 1
-    # If mono, duplicate to stereo
     if is_mono:
-        waveform = waveform.repeat(2, 1)
-    # Run Demucs — returns a list (batch, stems, channels, samples)
     with torch.no_grad():
         stems_batch = apply_model(
             model,
-            waveform.unsqueeze(0),  # shape: (batch=1, channels, samples)
             overlap=0.2,
             shifts=1,
             split=True
         )
-    stems = stems_batch[0]  # Extract stems from batch
-    # Extract the instrumental stem (stems[0] is vocals in most models)
-    instrumental = stems[0]
     if is_mono:
-        instrumental = instrumental.mean(dim=0, keepdim=True)  # Stereo → Mono
-    # Convert to an AudioSignal object
-    instrumental_signal = AudioSignal(instrumental.cpu().numpy(), sample_rate=sr)
-    return instrumental_signal
-def process_fn_instrumental(audio_file_path: str):
     """
-    pyharp instrumentals_fn:
-      - Receives an audio file path
-      - Separates the audio into a instrumental stem with Demucs
-      - Saves the instrumental stem as a .wav file
-      - Returns the file path of the instrumental stem and an empty LabelList
     """
-    # Separate instrumental using the hardcoded model
-    instrumental_signal = separate_instrumental(audio_file_path, model_name='mdx_extra_q')
-    # Save the instrumental stem to a .wav file
-    instrumental_path= save_audio(instrumental_signal, "instrumental.wav")
-    # Return the instrumental file path and an empty LabelList
-    return instrumental_path, LabelList(labels=[])
 # Define the model card
 model_card = ModelCard(
-    name="Demucs Vocal Separator",
-    description="Uses Demucs to separate a music track into a vocal stem.",
     author="Alexandre Défossez, Nicolas Usunier, Léon Bottou, Francis Bach",
-    tags=["demucs", "source-separation", "pyharp", "vocals"]
 )
-# Build Gradio interface
 with gr.Blocks() as demo:
-    # Build the Gradio endpoint (only audio input, no dropdown)
     app = build_endpoint(
         model_card=model_card,
-        components=[],
-        process_fn=process_fn_instrumental
     )
 demo.queue()
-demo.launch(share=True, show_error=True)

 import gradio as gr
 from demucs import pretrained
 from demucs.apply import apply_model
+from pyharp import *
 from audiotools import AudioSignal
+# Available Demucs models
 DEMUX_MODELS = ["mdx_extra_q", "mdx_extra", "htdemucs", "mdx_q"]
+STEM_CHOICES = {
+    "Vocals": 3,
+    "Drums": 0,
+    "Bass": 1,
+    "Other": 2,
+    "Instrumental (No Vocals)": "instrumental"
+}
+def separate_stem(audio_file_path: str, model_name: str, stem_choice: str):
+    """
+    Separates an audio file into the chosen stem using a Demucs model.
+    Ensures correct stem ordering and supports mono input.
+    """
     # Load Demucs model
     model = pretrained.get_model(model_name)
     model.to('cuda' if torch.cuda.is_available() else 'cpu')
     model.eval()
+    # Load the audio file
     waveform, sr = torchaudio.load(audio_file_path)
+    # Check if input is mono
     is_mono = waveform.shape[0] == 1
     if is_mono:
+        waveform = waveform.repeat(2, 1)  # Convert mono to stereo for Demucs
+    # Apply Demucs model
     with torch.no_grad():
         stems_batch = apply_model(
             model,
+            waveform.unsqueeze(0),
             overlap=0.2,
             shifts=1,
             split=True
         )
+    # stems shape: (batch, stems, channels, samples)
+    stems = stems_batch[0]
+    print(f"Model '{model_name}' extracted stems shape: {stems.shape}")
+    if stem_choice == "Instrumental (No Vocals)":
+        stem = stems[0] + stems[1] + stems[2]  # Drums + Bass + Other
+    else:
+        stem_index = STEM_CHOICES[stem_choice]
+        stem = stems[stem_index]
+    # Convert back to mono if the input was originally mono
     if is_mono:
+        stem = stem.mean(dim=0, keepdim=True)  # Stereo → Mono
+    # Convert to AudioSignal with float32 dtype
+    stem_signal = AudioSignal(stem.cpu().numpy().astype('float32'), sample_rate=sr)
+    return stem_signal
+def process_fn_stem(audio_file_path: str, demucs_model: str, stem_choice: str):
     """
+    PyHARP process function:
+      - Separates the chosen stem using Demucs.
+      - Saves the stem as a .wav file.
     """
+    stem_signal = separate_stem(audio_file_path, model_name=demucs_model, stem_choice=stem_choice)
+    stem_path = save_audio(stem_signal, f"{stem_choice.lower().replace(' ', '_')}.wav")
+    return stem_path, LabelList(labels=[])
 # Define the model card
 model_card = ModelCard(
+    name="Demucs Stem Separator",
+    description="Uses Demucs to separate a music track into a selected stem.",
     author="Alexandre Défossez, Nicolas Usunier, Léon Bottou, Francis Bach",
+    tags=["demucs", "source-separation", "pyharp", "stems"]
 )
+# Build Gradio interface with dropdowns for model and stem selection
 with gr.Blocks() as demo:
+    components = [
+        gr.Dropdown(
+            label="Select Demucs Model",
+            choices=DEMUX_MODELS,
+            value="mdx_extra_q"
+        ),
+        gr.Dropdown(
+            label="Select Stem to Separate",
+            choices=list(STEM_CHOICES.keys()),
+            value="Vocals"
+        )
+    ]
     app = build_endpoint(
         model_card=model_card,
+        components=components,
+        process_fn=process_fn_stem
     )
 demo.queue()
+demo.launch(share=True, show_error=True)