lllindsey0615 commited on
Commit
f7ddc5b
·
1 Parent(s): ab29845

uploaded correct files

Browse files
Files changed (2) hide show
  1. app.py +88 -119
  2. requirements.txt +16 -11
app.py CHANGED
@@ -1,143 +1,112 @@
 
 
1
  import gradio as gr
2
  import spaces # Enables ZeroGPU on Hugging Face
3
- from transformers import AutoModelForCausalLM
4
- from anticipation.sample import generate
5
- from anticipation.convert import events_to_midi, midi_to_events
6
- from anticipation import ops
7
- from anticipation.tokenize import extract_instruments
8
- import torch
9
  from pyharp import *
10
- from safetensors.torch import load_file
11
- import os
12
-
13
-
14
- #Model Choices
15
- SMALL_MODEL = "stanford-crfm/music-small-800k"
16
- MEDIUM_MODEL = "stanford-crfm/music-medium-800k"
17
- LARGE_MODEL = "stanford-crfm/music-large-800k"
18
-
19
- # === Model Card ===
20
- model_card = ModelCard(
21
- name="Anticipatory Music Transformer",
22
- description="Using Anticipatory Music Transformer (AMT) to generate accompaniment for a given MIDI file with selected melody.",
23
- author="John Thickstun, David Hall, Chris Donahue, Percy Liang",
24
- tags=["midi", "generation", "accompaniment"],
25
- midi_in=True,
26
- midi_out=True
27
- )
28
-
29
- model_cache = {}
30
-
31
- '''
32
- def load_amt_model(model_choice):
33
- """Loads and caches the AMT model inside the worker process."""
34
- if model_choice in model_cache:
35
- return model_cache[model_choice]
36
-
37
- device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
38
- model = AutoModelForCausalLM.from_pretrained(model_choice).to(device)
39
-
40
- model_cache[model_choice] = model
41
- return model
42
- '''
43
-
44
- def load_amt_model(model_choice):
45
- """Loads and caches the AMT model inside the worker process."""
46
- if model_choice in model_cache:
47
- return model_cache[model_choice]
48
-
49
- device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
50
-
51
- if model_choice == LARGE_MODEL:
52
- # Large model uses safetensors
53
- model_dir = "./tmp_music_large"
54
- os.makedirs(model_dir, exist_ok=True)
55
-
56
- print(f"Loading {LARGE_MODEL} from safetensors format...")
57
- model = AutoModelForCausalLM.from_pretrained(
58
- LARGE_MODEL,
59
- torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32,
60
- low_cpu_mem_usage=True
61
- ).to(device)
62
- else:
63
- # Small and medium use standard PyTorch .bin format
64
- print(f"Loading {model_choice} from standard format...")
65
- model = AutoModelForCausalLM.from_pretrained(model_choice).to(device)
66
-
67
- model_cache[model_choice] = model
68
- return model
69
 
70
 
 
 
71
 
 
 
 
 
 
 
 
72
 
73
  @spaces.GPU
74
- def generate_accompaniment(midi_file, model_choice, selected_midi_program, history_length):
75
- """Generates accompaniment for the entire MIDI input, conditioned on the user-selected history length."""
76
-
77
- model = load_amt_model(model_choice)
78
- events = midi_to_events(midi_file.name)
79
- total_time = round(ops.max_time(events, seconds=True))
80
-
81
- # Extract melody line using the selected MIDI program number
82
- events, melody = extract_instruments(events, [selected_midi_program])
83
-
84
- if not melody:
85
- return None, "⚠️ Please select a valid MIDI program that contains events."
86
-
87
- history = ops.clip(events, 0, history_length, clip_duration=False)
88
-
89
- # Generate accompaniment for the remaining duration
90
- accompaniment = generate(
91
- model,
92
- history_length, # Start generating after user-defined history length
93
- total_time, # Generate for the full remaining duration
94
- inputs=history,
95
- controls=melody,
96
- top_p=0.95,
97
- debug=False
98
- )
99
-
100
- # Combine the accompaniment with the melody
101
- output_events = ops.clip(ops.combine(accompaniment, melody), 0, total_time, clip_duration=True)
102
-
103
- # Convert back to MIDI
104
- output_midi = "generated_accompaniment_huggingface.mid"
105
- mid = events_to_midi(output_events)
106
- mid.save(output_midi)
107
-
108
- return output_midi, None
 
 
 
109
 
 
 
 
110
 
111
- def process_fn(input_midi, model_choice, selected_midi_program, history_length):
112
- """Processes the input and runs AMT to generate accompaniment for the full MIDI file."""
113
- output_midi, error_message = generate_accompaniment(input_midi, model_choice, selected_midi_program, history_length)
114
 
115
- if error_message:
116
- return None, {"message": error_message}
 
 
 
 
 
 
 
117
 
118
- output_labels = LabelList()
119
- return output_midi, output_labels
120
 
 
 
 
 
 
 
 
121
 
122
- # === Build HARP gradioEndpoint ===
123
  with gr.Blocks() as demo:
124
  components = [
125
  gr.Dropdown(
126
- choices=[SMALL_MODEL, MEDIUM_MODEL, LARGE_MODEL],
127
- value=MEDIUM_MODEL,
128
- label="Select AMT Model (Faster vs. Higher Quality)"
129
  ),
130
- gr.Slider(0, 127, step=1, value=1, label="Select Melody Instrument (MIDI Program Number)"),
131
- gr.Slider(1, 10, step=1, value=5, label="Select History Length (seconds)")
 
 
 
 
132
  ]
133
 
134
- # Wrap in PyHARP
135
  app = build_endpoint(
136
  model_card=model_card,
137
  components=components,
138
- process_fn=process_fn)
139
-
140
- # Launch PyHARP App
141
- demo.launch(share=True, show_error=True, debug=True)
142
-
143
 
 
 
 
1
+ import torch
2
+ import torchaudio
3
  import gradio as gr
4
  import spaces # Enables ZeroGPU on Hugging Face
5
+ from demucs import pretrained
6
+ from demucs.apply import apply_model
 
 
 
 
7
  from pyharp import *
8
+ from audiotools import AudioSignal
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
 
10
 
11
+ # Available Demucs models
12
+ DEMUX_MODELS = ["mdx_extra_q", "mdx_extra", "htdemucs", "mdx_q"]
13
 
14
+ STEM_CHOICES = {
15
+ "Vocals": 3,
16
+ "Drums": 0,
17
+ "Bass": 1,
18
+ "Other": 2,
19
+ "Instrumental (No Vocals)": "instrumental"
20
+ }
21
 
22
  @spaces.GPU
23
+ def separate_stem(audio_file_path: str, model_name: str, stem_choice: str):
24
+ """
25
+ Separates an audio file into the chosen stem using a Demucs model.
26
+ Ensures correct stem ordering and supports mono input.
27
+ """
28
+ # Load Demucs model
29
+ model = pretrained.get_model(model_name)
30
+ model.to('cuda' if torch.cuda.is_available() else 'cpu')
31
+ model.eval()
32
+
33
+ # Load the audio file
34
+ waveform, sr = torchaudio.load(audio_file_path)
35
+
36
+ # Check if input is mono
37
+ is_mono = waveform.shape[0] == 1
38
+ if is_mono:
39
+ waveform = waveform.repeat(2, 1) # Convert mono to stereo for Demucs
40
+
41
+ # Apply Demucs model
42
+ with torch.no_grad():
43
+ stems_batch = apply_model(
44
+ model,
45
+ waveform.unsqueeze(0),
46
+ overlap=0.2,
47
+ shifts=1,
48
+ split=True
49
+ )
50
+
51
+ # stems shape: (batch, stems, channels, samples)
52
+ stems = stems_batch[0]
53
+
54
+ print(f"Model '{model_name}' extracted stems shape: {stems.shape}")
55
+
56
+ if stem_choice == "Instrumental (No Vocals)":
57
+ stem = stems[0] + stems[1] + stems[2] # Drums + Bass + Other
58
+ else:
59
+ stem_index = STEM_CHOICES[stem_choice]
60
+ stem = stems[stem_index]
61
 
62
+ # Convert back to mono if the input was originally mono
63
+ if is_mono:
64
+ stem = stem.mean(dim=0, keepdim=True) # Stereo → Mono
65
 
66
+ # Convert to AudioSignal with float32 dtype
67
+ stem_signal = AudioSignal(stem.cpu().numpy().astype('float32'), sample_rate=sr)
68
+ return stem_signal
69
 
70
+ def process_fn_stem(audio_file_path: str, demucs_model: str, stem_choice: str):
71
+ """
72
+ PyHARP process function:
73
+ - Separates the chosen stem using Demucs.
74
+ - Saves the stem as a .wav file.
75
+ """
76
+ stem_signal = separate_stem(audio_file_path, model_name=demucs_model, stem_choice=stem_choice)
77
+ stem_path = save_audio(stem_signal, f"{stem_choice.lower().replace(' ', '_')}.wav")
78
+ return stem_path, LabelList(labels=[])
79
 
 
 
80
 
81
+ # Define the model card
82
+ model_card = ModelCard(
83
+ name="Demucs Stem Separator",
84
+ description="Uses Demucs to separate a music track into a selected stem.",
85
+ author="Alexandre Défossez, Nicolas Usunier, Léon Bottou, Francis Bach",
86
+ tags=["demucs", "source-separation", "pyharp", "stems"]
87
+ )
88
 
89
+ # Build Gradio interface with dropdowns for model and stem selection
90
  with gr.Blocks() as demo:
91
  components = [
92
  gr.Dropdown(
93
+ label="Select Demucs Model",
94
+ choices=DEMUX_MODELS,
95
+ value="mdx_extra_q"
96
  ),
97
+ gr.Dropdown(
98
+ label="Select Stem to Separate",
99
+ choices=list(STEM_CHOICES.keys()),
100
+ value="Vocals"
101
+
102
+ )
103
  ]
104
 
 
105
  app = build_endpoint(
106
  model_card=model_card,
107
  components=components,
108
+ process_fn=process_fn_stem
109
+ )
 
 
 
110
 
111
+ demo.queue()
112
+ demo.launch(share=True, show_error=True)
requirements.txt CHANGED
@@ -1,12 +1,17 @@
1
  -e git+https://github.com/TEAMuP-dev/pyharp.git#egg=pyharp
2
- midi2audio == 0.1.1
3
- mido == 1.2.10
4
- numpy >= 1.22.4
5
- torch >= 2.0.1
6
- transformers == 4.29.2
7
- safetensors
8
- accelerate
9
- tqdm == 4.65.0
10
-
11
- # Additional dependencies for AMT
12
- git+https://github.com/jthickstun/anticipation.git
 
 
 
 
 
 
1
  -e git+https://github.com/TEAMuP-dev/pyharp.git#egg=pyharp
2
+ demucs
3
+ dora-search
4
+ einops
5
+ julius>=0.2.3
6
+ lameenc>=1.2
7
+ openunmix
8
+ pyyaml
9
+ tqdm
10
+ torch>=1.8.1, <2.1
11
+ torchaudio>=0.8, <2.1
12
+ diffq>=0.2.1
13
+ ffmpeg
14
+ numpy<2
15
+ scipy
16
+ soundfile
17
+ hydra-core>=1.1