Spaces:

lllindsey0615
/

DEMUCS_GPU

Runtime error

App Files Files Community

lllindsey0615 commited on Mar 12, 2025

Commit

f7ddc5b

1 Parent(s): ab29845

uploaded correct files

Browse files

Files changed (2) hide show

app.py +88 -119
requirements.txt +16 -11

app.py CHANGED Viewed

@@ -1,143 +1,112 @@
 import gradio as gr
 import spaces  # Enables ZeroGPU on Hugging Face
-from transformers import AutoModelForCausalLM
-from anticipation.sample import generate
-from anticipation.convert import events_to_midi, midi_to_events
-from anticipation import ops
-from anticipation.tokenize import extract_instruments
-import torch
 from pyharp import *
-from safetensors.torch import load_file
-import os
-#Model Choices
-SMALL_MODEL = "stanford-crfm/music-small-800k"
-MEDIUM_MODEL = "stanford-crfm/music-medium-800k"
-LARGE_MODEL = "stanford-crfm/music-large-800k"
-# === Model Card ===
-model_card = ModelCard(
-    name="Anticipatory Music Transformer",
-    description="Using Anticipatory Music Transformer (AMT) to generate accompaniment for a given MIDI file with selected melody.",
-    author="John Thickstun, David Hall, Chris Donahue, Percy Liang",
-    tags=["midi", "generation", "accompaniment"],
-    midi_in=True,
-    midi_out=True
-)
-model_cache = {}
-'''
-def load_amt_model(model_choice):
-    """Loads and caches the AMT model inside the worker process."""
-    if model_choice in model_cache:
-        return model_cache[model_choice]
-    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-    model = AutoModelForCausalLM.from_pretrained(model_choice).to(device)
-    model_cache[model_choice] = model
-    return model
-    '''
-def load_amt_model(model_choice):
-    """Loads and caches the AMT model inside the worker process."""
-    if model_choice in model_cache:
-        return model_cache[model_choice]
-    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
-    if model_choice == LARGE_MODEL:
-        # Large model uses safetensors
-        model_dir = "./tmp_music_large"
-        os.makedirs(model_dir, exist_ok=True)
-        print(f"Loading {LARGE_MODEL} from safetensors format...")
-        model = AutoModelForCausalLM.from_pretrained(
-            LARGE_MODEL,
-            torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
-            low_cpu_mem_usage=True
-        ).to(device)
-    else:
-        # Small and medium use standard PyTorch .bin format
-        print(f"Loading {model_choice} from standard format...")
-        model = AutoModelForCausalLM.from_pretrained(model_choice).to(device)
-    model_cache[model_choice] = model
-    return model
 @spaces.GPU
-def generate_accompaniment(midi_file, model_choice, selected_midi_program, history_length):
-    """Generates accompaniment for the entire MIDI input, conditioned on the user-selected history length."""
-    model = load_amt_model(model_choice)
-    events = midi_to_events(midi_file.name)
-    total_time = round(ops.max_time(events, seconds=True))
-    # Extract melody line using the selected MIDI program number
-    events, melody = extract_instruments(events, [selected_midi_program])
-    if not melody:
-        return None, "⚠️ Please select a valid MIDI program that contains events."
-    history = ops.clip(events, 0, history_length, clip_duration=False)
-    # Generate accompaniment for the remaining duration
-    accompaniment = generate(
-        model,
-        history_length,  # Start generating after user-defined history length
-        total_time,  # Generate for the full remaining duration
-        inputs=history,
-        controls=melody,
-        top_p=0.95,
-        debug=False
-    )
-    # Combine the accompaniment with the melody
-    output_events = ops.clip(ops.combine(accompaniment, melody), 0, total_time, clip_duration=True)
-    # Convert back to MIDI
-    output_midi = "generated_accompaniment_huggingface.mid"
-    mid = events_to_midi(output_events)
-    mid.save(output_midi)
-    return output_midi, None
-def process_fn(input_midi, model_choice, selected_midi_program, history_length):
-    """Processes the input and runs AMT to generate accompaniment for the full MIDI file."""
-    output_midi, error_message = generate_accompaniment(input_midi, model_choice, selected_midi_program, history_length)
-    if error_message:
-        return None, {"message": error_message}
-    output_labels = LabelList()
-    return output_midi, output_labels
-# === Build HARP gradioEndpoint ===
 with gr.Blocks() as demo:
     components = [
         gr.Dropdown(
-          choices=[SMALL_MODEL, MEDIUM_MODEL, LARGE_MODEL],
-            value=MEDIUM_MODEL,
-            label="Select AMT Model (Faster vs. Higher Quality)"
         ),
-        gr.Slider(0, 127, step=1, value=1, label="Select Melody Instrument (MIDI Program Number)"),
-        gr.Slider(1, 10, step=1, value=5, label="Select History Length (seconds)")
     ]
-        # Wrap in PyHARP
     app = build_endpoint(
         model_card=model_card,
         components=components,
-        process_fn=process_fn)
-# Launch PyHARP App
-demo.launch(share=True, show_error=True, debug=True)

+import torch
+import torchaudio
 import gradio as gr
 import spaces  # Enables ZeroGPU on Hugging Face
+from demucs import pretrained
+from demucs.apply import apply_model
 from pyharp import *
+from audiotools import AudioSignal
+# Available Demucs models
+DEMUX_MODELS = ["mdx_extra_q", "mdx_extra", "htdemucs", "mdx_q"]
+STEM_CHOICES = {
+    "Vocals": 3,
+    "Drums": 0,
+    "Bass": 1,
+    "Other": 2,
+    "Instrumental (No Vocals)": "instrumental"
+}
 @spaces.GPU
+def separate_stem(audio_file_path: str, model_name: str, stem_choice: str):
+    """
+    Separates an audio file into the chosen stem using a Demucs model.
+    Ensures correct stem ordering and supports mono input.
+    """
+    # Load Demucs model
+    model = pretrained.get_model(model_name)
+    model.to('cuda' if torch.cuda.is_available() else 'cpu')
+    model.eval()
+    # Load the audio file
+    waveform, sr = torchaudio.load(audio_file_path)
+    # Check if input is mono
+    is_mono = waveform.shape[0] == 1
+    if is_mono:
+        waveform = waveform.repeat(2, 1)  # Convert mono to stereo for Demucs
+    # Apply Demucs model
+    with torch.no_grad():
+        stems_batch = apply_model(
+            model,
+            waveform.unsqueeze(0),
+            overlap=0.2,
+            shifts=1,
+            split=True
+        )
+    # stems shape: (batch, stems, channels, samples)
+    stems = stems_batch[0]
+    print(f"Model '{model_name}' extracted stems shape: {stems.shape}")
+    if stem_choice == "Instrumental (No Vocals)":
+        stem = stems[0] + stems[1] + stems[2]  # Drums + Bass + Other
+    else:
+        stem_index = STEM_CHOICES[stem_choice]
+        stem = stems[stem_index]
+    # Convert back to mono if the input was originally mono
+    if is_mono:
+        stem = stem.mean(dim=0, keepdim=True)  # Stereo → Mono
+    # Convert to AudioSignal with float32 dtype
+    stem_signal = AudioSignal(stem.cpu().numpy().astype('float32'), sample_rate=sr)
+    return stem_signal
+def process_fn_stem(audio_file_path: str, demucs_model: str, stem_choice: str):
+    """
+    PyHARP process function:
+      - Separates the chosen stem using Demucs.
+      - Saves the stem as a .wav file.
+    """
+    stem_signal = separate_stem(audio_file_path, model_name=demucs_model, stem_choice=stem_choice)
+    stem_path = save_audio(stem_signal, f"{stem_choice.lower().replace(' ', '_')}.wav")
+    return stem_path, LabelList(labels=[])
+# Define the model card
+model_card = ModelCard(
+    name="Demucs Stem Separator",
+    description="Uses Demucs to separate a music track into a selected stem.",
+    author="Alexandre Défossez, Nicolas Usunier, Léon Bottou, Francis Bach",
+    tags=["demucs", "source-separation", "pyharp", "stems"]
+)
+# Build Gradio interface with dropdowns for model and stem selection
 with gr.Blocks() as demo:
     components = [
         gr.Dropdown(
+            label="Select Demucs Model",
+            choices=DEMUX_MODELS,
+            value="mdx_extra_q"
         ),
+        gr.Dropdown(
+            label="Select Stem to Separate",
+            choices=list(STEM_CHOICES.keys()),
+            value="Vocals"
+        )
     ]
     app = build_endpoint(
         model_card=model_card,
         components=components,
+        process_fn=process_fn_stem
+    )
+demo.queue()
+demo.launch(share=True, show_error=True)

requirements.txt CHANGED Viewed

@@ -1,12 +1,17 @@
 -e git+https://github.com/TEAMuP-dev/pyharp.git#egg=pyharp
-midi2audio == 0.1.1
-mido == 1.2.10
-numpy >= 1.22.4
-torch >= 2.0.1
-transformers == 4.29.2
-safetensors
-accelerate
-tqdm == 4.65.0
-# Additional dependencies for AMT
-git+https://github.com/jthickstun/anticipation.git

 -e git+https://github.com/TEAMuP-dev/pyharp.git#egg=pyharp
+demucs
+dora-search
+einops
+julius>=0.2.3
+lameenc>=1.2
+openunmix
+pyyaml
+tqdm
+torch>=1.8.1, <2.1
+torchaudio>=0.8, <2.1
+diffq>=0.2.1
+ffmpeg
+numpy<2
+scipy
+soundfile
+hydra-core>=1.1