thecollabagepatch commited on
Commit
ed6f2d5
·
1 Parent(s): 421b140

ok herewego

Browse files
Files changed (1) hide show
  1. app.py +111 -120
app.py CHANGED
@@ -18,8 +18,9 @@ def preprocess_audio(waveform):
18
  waveform_np = waveform.cpu().squeeze().numpy()
19
  return torch.from_numpy(waveform_np).unsqueeze(0).to(device)
20
 
 
21
  @spaces.GPU
22
- def generate_drum_sample() -> str:
23
  model = MusicGen.get_pretrained('pharoAIsanders420/micro-musicgen-jungle')
24
  model.set_generation_params(duration=10)
25
  wav = model.generate_unconditional(1).squeeze(0)
@@ -31,155 +32,142 @@ def generate_drum_sample() -> str:
31
 
32
  return filename_with_extension
33
 
34
- # @spaces.GPU(duration=10)
35
- # def generate_drum_sample():
36
- # model = MusicGen.get_pretrained('pharoAIsanders420/micro-musicgen-jungle')
37
- # model.set_generation_params(duration=10)
38
- # wav = model.generate_unconditional(1).squeeze(0)
39
-
40
- # filename_without_extension = f'jungle'
41
- # filename_with_extension = f'{filename_without_extension}.wav'
42
-
43
- # audio_write(filename_without_extension, wav.cpu(), model.sample_rate, strategy="loudness", loudness_compressor=True)
44
-
45
- # return filename_with_extension
46
-
47
- # @spaces.GPU(duration=10)
48
- # def continue_drum_sample(existing_audio_path):
49
- # if existing_audio_path is None:
50
- # return None
51
 
52
- # existing_audio, sr = torchaudio.load(existing_audio_path)
53
- # existing_audio = existing_audio.to(device)
54
 
55
- # prompt_duration = 2
56
- # output_duration = 10
57
 
58
- # num_samples = int(prompt_duration * sr)
59
- # if existing_audio.shape[1] < num_samples:
60
- # raise ValueError("The existing audio is too short for the specified prompt duration.")
61
 
62
- # start_sample = existing_audio.shape[1] - num_samples
63
- # prompt_waveform = existing_audio[..., start_sample:]
64
 
65
- # model = MusicGen.get_pretrained('pharoAIsanders420/micro-musicgen-jungle')
66
- # model.set_generation_params(duration=output_duration)
67
 
68
- # output = model.generate_continuation(prompt_waveform, prompt_sample_rate=sr, progress=True)
69
- # output = output.to(device)
70
 
71
- # if output.dim() == 3:
72
- # output = output.squeeze(0)
73
 
74
- # if output.dim() == 1:
75
- # output = output.unsqueeze(0)
76
 
77
- # combined_audio = torch.cat((existing_audio, output), dim=1)
78
- # combined_audio = combined_audio.cpu()
79
 
80
- # combined_file_path = f'./continued_jungle_{random.randint(1000, 9999)}.wav'
81
- # torchaudio.save(combined_file_path, combined_audio, sr)
82
 
83
- # return combined_file_path
84
 
85
- # @spaces.GPU(duration=120)
86
- # def generate_music(wav_filename, prompt_duration, musicgen_model, output_duration):
87
- # if wav_filename is None:
88
- # return None
89
 
90
- # song, sr = torchaudio.load(wav_filename)
91
- # song = song.to(device)
92
-
93
- # model_name = musicgen_model.split(" ")[0]
94
- # model_continue = MusicGen.get_pretrained(model_name)
95
-
96
- # model_continue.set_generation_params(
97
- # use_sampling=True,
98
- # top_k=250,
99
- # top_p=0.0,
100
- # temperature=1.0,
101
- # duration=output_duration,
102
- # cfg_coef=3
103
- # )
104
-
105
- # prompt_waveform = song[..., :int(prompt_duration * sr)]
106
- # prompt_waveform = preprocess_audio(prompt_waveform)
107
 
108
- # output = model_continue.generate_continuation(prompt_waveform, prompt_sample_rate=sr, progress=True)
109
- # output = output.cpu()
110
 
111
- # if len(output.size()) > 2:
112
- # output = output.squeeze()
113
 
114
- # filename_without_extension = f'continued_music'
115
- # filename_with_extension = f'{filename_without_extension}.wav'
116
- # audio_write(filename_without_extension, output, model_continue.sample_rate, strategy="loudness", loudness_compressor=True)
117
 
118
- # return filename_with_extension
119
 
120
- # @spaces.GPU(duration=120)
121
- # def continue_music(input_audio_path, prompt_duration, musicgen_model, output_duration):
122
- # if input_audio_path is None:
123
- # return None
124
 
125
- # song, sr = torchaudio.load(input_audio_path)
126
- # song = song.to(device)
127
 
128
- # model_continue = MusicGen.get_pretrained(musicgen_model.split(" ")[0])
129
- # model_continue.set_generation_params(
130
- # use_sampling=True,
131
- # top_k=250,
132
- # top_p=0.0,
133
- # temperature=1.0,
134
- # duration=output_duration,
135
- # cfg_coef=3
136
- # )
137
 
138
- # original_audio = AudioSegment.from_mp3(input_audio_path)
139
- # current_audio = original_audio
140
 
141
- # file_paths_for_cleanup = []
142
 
143
- # for i in range(1):
144
- # num_samples = int(prompt_duration * sr)
145
- # if current_audio.duration_seconds * 1000 < prompt_duration * 1000:
146
- # raise ValueError("The prompt_duration is longer than the current audio length.")
147
 
148
- # start_time = current_audio.duration_seconds * 1000 - prompt_duration * 1000
149
- # prompt_audio = current_audio[start_time:]
150
 
151
- # prompt_bytes = prompt_audio.export(format="wav").read()
152
- # prompt_waveform, _ = torchaudio.load(io.BytesIO(prompt_bytes))
153
- # prompt_waveform = prompt_waveform.to(device)
154
 
155
- # prompt_waveform = preprocess_audio(prompt_waveform)
156
 
157
- # output = model_continue.generate_continuation(prompt_waveform, prompt_sample_rate=sr, progress=True)
158
- # output = output.cpu()
159
 
160
- # if len(output.size()) > 2:
161
- # output = output.squeeze()
162
 
163
- # filename_without_extension = f'continue_{i}'
164
- # filename_with_extension = f'{filename_without_extension}.wav'
165
- # correct_filename_extension = f'{filename_without_extension}.wav.wav'
166
 
167
- # audio_write(filename_with_extension, output, model_continue.sample_rate, strategy="loudness", loudness_compressor=True)
168
- # generated_audio_segment = AudioSegment.from_wav(correct_filename_extension)
169
 
170
- # current_audio = current_audio[:start_time] + generated_audio_segment
171
 
172
- # file_paths_for_cleanup.append(correct_filename_extension)
173
 
174
- # combined_audio_filename = f"combined_audio_{random.randint(1, 10000)}.mp3"
175
- # current_audio.export(combined_audio_filename, format="mp3")
176
 
177
- # for file_path in file_paths_for_cleanup:
178
- # os.remove(file_path)
179
 
180
- # return combined_audio_filename
181
 
182
- # Define the expandable sections (keeping your existing content)
183
  musicgen_micro_blurb = """
184
  ## musicgen_micro
185
  musicgen micro is an experimental series of models by aaron abebe. they are incredibly fast, and extra insane. this one does goated jungle drums. we're very excited about these.
@@ -222,7 +210,7 @@ thepatch/PhonkV2 was trained by MJ BERSABEph. there are multiple versions in the
222
  foureyednymph/musicgen-sza-sos-small was just trained by foureyednymph. We're all about to find out if it does continuations well.
223
  """
224
 
225
- # Create the Gradio interface with explicit types
226
  with gr.Blocks() as iface:
227
  gr.Markdown("# the-micro-slot-machine")
228
  gr.Markdown("two ai's jamming. warning: outputs will be very strange, likely stupid, and possibly rad.")
@@ -277,11 +265,14 @@ with gr.Blocks() as iface:
277
  continue_button = gr.Button("Continue Generating Music")
278
  continue_output_audio = gr.Audio(label="Continued Music Output", type="filepath")
279
 
280
- # Connecting the components
281
- generate_button.click(generate_drum_sample, outputs=[drum_audio])
282
- # continue_drum_sample_button.click(continue_drum_sample, inputs=[drum_audio], outputs=[drum_audio])
283
- # generate_music_button.click(generate_music, inputs=[drum_audio, prompt_duration, musicgen_model, output_duration], outputs=[output_audio])
284
- # continue_button.click(continue_music, inputs=[output_audio, prompt_duration, musicgen_model, output_duration], outputs=continue_output_audio)
 
 
 
285
 
286
  if __name__ == "__main__":
287
  iface.launch()
 
18
  waveform_np = waveform.cpu().squeeze().numpy()
19
  return torch.from_numpy(waveform_np).unsqueeze(0).to(device)
20
 
21
+ # Fix: Add dummy parameter to avoid schema generation bug
22
  @spaces.GPU
23
+ def generate_drum_sample(dummy_trigger="generate"):
24
  model = MusicGen.get_pretrained('pharoAIsanders420/micro-musicgen-jungle')
25
  model.set_generation_params(duration=10)
26
  wav = model.generate_unconditional(1).squeeze(0)
 
32
 
33
  return filename_with_extension
34
 
35
+ @spaces.GPU
36
+ def continue_drum_sample(existing_audio_path):
37
+ if existing_audio_path is None:
38
+ return None
 
 
 
 
 
 
 
 
 
 
 
 
 
39
 
40
+ existing_audio, sr = torchaudio.load(existing_audio_path)
41
+ existing_audio = existing_audio.to(device)
42
 
43
+ prompt_duration = 2
44
+ output_duration = 10
45
 
46
+ num_samples = int(prompt_duration * sr)
47
+ if existing_audio.shape[1] < num_samples:
48
+ raise ValueError("The existing audio is too short for the specified prompt duration.")
49
 
50
+ start_sample = existing_audio.shape[1] - num_samples
51
+ prompt_waveform = existing_audio[..., start_sample:]
52
 
53
+ model = MusicGen.get_pretrained('pharoAIsanders420/micro-musicgen-jungle')
54
+ model.set_generation_params(duration=output_duration)
55
 
56
+ output = model.generate_continuation(prompt_waveform, prompt_sample_rate=sr, progress=True)
57
+ output = output.to(device)
58
 
59
+ if output.dim() == 3:
60
+ output = output.squeeze(0)
61
 
62
+ if output.dim() == 1:
63
+ output = output.unsqueeze(0)
64
 
65
+ combined_audio = torch.cat((existing_audio, output), dim=1)
66
+ combined_audio = combined_audio.cpu()
67
 
68
+ combined_file_path = f'./continued_jungle_{random.randint(1000, 9999)}.wav'
69
+ torchaudio.save(combined_file_path, combined_audio, sr)
70
 
71
+ return combined_file_path
72
 
73
+ @spaces.GPU
74
+ def generate_music(wav_filename, prompt_duration, musicgen_model, output_duration):
75
+ if wav_filename is None:
76
+ return None
77
 
78
+ song, sr = torchaudio.load(wav_filename)
79
+ song = song.to(device)
80
+
81
+ model_name = musicgen_model.split(" ")[0]
82
+ model_continue = MusicGen.get_pretrained(model_name)
83
+
84
+ model_continue.set_generation_params(
85
+ use_sampling=True,
86
+ top_k=250,
87
+ top_p=0.0,
88
+ temperature=1.0,
89
+ duration=output_duration,
90
+ cfg_coef=3
91
+ )
92
+
93
+ prompt_waveform = song[..., :int(prompt_duration * sr)]
94
+ prompt_waveform = preprocess_audio(prompt_waveform)
95
 
96
+ output = model_continue.generate_continuation(prompt_waveform, prompt_sample_rate=sr, progress=True)
97
+ output = output.cpu()
98
 
99
+ if len(output.size()) > 2:
100
+ output = output.squeeze()
101
 
102
+ filename_without_extension = f'continued_music'
103
+ filename_with_extension = f'{filename_without_extension}.wav'
104
+ audio_write(filename_without_extension, output, model_continue.sample_rate, strategy="loudness", loudness_compressor=True)
105
 
106
+ return filename_with_extension
107
 
108
+ @spaces.GPU
109
+ def continue_music(input_audio_path, prompt_duration, musicgen_model, output_duration):
110
+ if input_audio_path is None:
111
+ return None
112
 
113
+ song, sr = torchaudio.load(input_audio_path)
114
+ song = song.to(device)
115
 
116
+ model_continue = MusicGen.get_pretrained(musicgen_model.split(" ")[0])
117
+ model_continue.set_generation_params(
118
+ use_sampling=True,
119
+ top_k=250,
120
+ top_p=0.0,
121
+ temperature=1.0,
122
+ duration=output_duration,
123
+ cfg_coef=3
124
+ )
125
 
126
+ original_audio = AudioSegment.from_mp3(input_audio_path)
127
+ current_audio = original_audio
128
 
129
+ file_paths_for_cleanup = []
130
 
131
+ for i in range(1):
132
+ num_samples = int(prompt_duration * sr)
133
+ if current_audio.duration_seconds * 1000 < prompt_duration * 1000:
134
+ raise ValueError("The prompt_duration is longer than the current audio length.")
135
 
136
+ start_time = current_audio.duration_seconds * 1000 - prompt_duration * 1000
137
+ prompt_audio = current_audio[start_time:]
138
 
139
+ prompt_bytes = prompt_audio.export(format="wav").read()
140
+ prompt_waveform, _ = torchaudio.load(io.BytesIO(prompt_bytes))
141
+ prompt_waveform = prompt_waveform.to(device)
142
 
143
+ prompt_waveform = preprocess_audio(prompt_waveform)
144
 
145
+ output = model_continue.generate_continuation(prompt_waveform, prompt_sample_rate=sr, progress=True)
146
+ output = output.cpu()
147
 
148
+ if len(output.size()) > 2:
149
+ output = output.squeeze()
150
 
151
+ filename_without_extension = f'continue_{i}'
152
+ filename_with_extension = f'{filename_without_extension}.wav'
153
+ correct_filename_extension = f'{filename_without_extension}.wav.wav'
154
 
155
+ audio_write(filename_with_extension, output, model_continue.sample_rate, strategy="loudness", loudness_compressor=True)
156
+ generated_audio_segment = AudioSegment.from_wav(correct_filename_extension)
157
 
158
+ current_audio = current_audio[:start_time] + generated_audio_segment
159
 
160
+ file_paths_for_cleanup.append(correct_filename_extension)
161
 
162
+ combined_audio_filename = f"combined_audio_{random.randint(1, 10000)}.mp3"
163
+ current_audio.export(combined_audio_filename, format="mp3")
164
 
165
+ for file_path in file_paths_for_cleanup:
166
+ os.remove(file_path)
167
 
168
+ return combined_audio_filename
169
 
170
+ # Define the expandable sections
171
  musicgen_micro_blurb = """
172
  ## musicgen_micro
173
  musicgen micro is an experimental series of models by aaron abebe. they are incredibly fast, and extra insane. this one does goated jungle drums. we're very excited about these.
 
210
  foureyednymph/musicgen-sza-sos-small was just trained by foureyednymph. We're all about to find out if it does continuations well.
211
  """
212
 
213
+ # Create the Gradio interface
214
  with gr.Blocks() as iface:
215
  gr.Markdown("# the-micro-slot-machine")
216
  gr.Markdown("two ai's jamming. warning: outputs will be very strange, likely stupid, and possibly rad.")
 
265
  continue_button = gr.Button("Continue Generating Music")
266
  continue_output_audio = gr.Audio(label="Continued Music Output", type="filepath")
267
 
268
+ # Hidden component to provide dummy input
269
+ hidden_trigger = gr.Textbox(value="generate", visible=False)
270
+
271
+ # Fixed click handlers - use hidden input for generate_drum_sample
272
+ generate_button.click(generate_drum_sample, inputs=[hidden_trigger], outputs=[drum_audio])
273
+ continue_drum_sample_button.click(continue_drum_sample, inputs=[drum_audio], outputs=[drum_audio])
274
+ generate_music_button.click(generate_music, inputs=[drum_audio, prompt_duration, musicgen_model, output_duration], outputs=[output_audio])
275
+ continue_button.click(continue_music, inputs=[output_audio, prompt_duration, musicgen_model, output_duration], outputs=continue_output_audio)
276
 
277
  if __name__ == "__main__":
278
  iface.launch()