thecollabagepatch commited on
Commit
85b6f84
·
1 Parent(s): 98a8828

no more duration params

Browse files
Files changed (1) hide show
  1. app.py +114 -118
app.py CHANGED
@@ -18,159 +18,155 @@ def preprocess_audio(waveform):
18
  waveform_np = waveform.cpu().squeeze().numpy()
19
  return torch.from_numpy(waveform_np).unsqueeze(0).to(device)
20
 
21
- @spaces.GPU
22
- def simple_test(text):
23
- return f"Hello {text}!"
24
-
25
- # @spaces.GPU(duration=10)
26
- # def generate_drum_sample():
27
- # model = MusicGen.get_pretrained('pharoAIsanders420/micro-musicgen-jungle')
28
- # model.set_generation_params(duration=10)
29
- # wav = model.generate_unconditional(1).squeeze(0)
30
 
31
- # filename_without_extension = f'jungle'
32
- # filename_with_extension = f'{filename_without_extension}.wav'
33
 
34
- # audio_write(filename_without_extension, wav.cpu(), model.sample_rate, strategy="loudness", loudness_compressor=True)
35
 
36
- # return filename_with_extension
37
 
38
- # @spaces.GPU(duration=10)
39
- # def continue_drum_sample(existing_audio_path):
40
- # if existing_audio_path is None:
41
- # return None
42
 
43
- # existing_audio, sr = torchaudio.load(existing_audio_path)
44
- # existing_audio = existing_audio.to(device)
45
 
46
- # prompt_duration = 2
47
- # output_duration = 10
48
 
49
- # num_samples = int(prompt_duration * sr)
50
- # if existing_audio.shape[1] < num_samples:
51
- # raise ValueError("The existing audio is too short for the specified prompt duration.")
52
 
53
- # start_sample = existing_audio.shape[1] - num_samples
54
- # prompt_waveform = existing_audio[..., start_sample:]
55
 
56
- # model = MusicGen.get_pretrained('pharoAIsanders420/micro-musicgen-jungle')
57
- # model.set_generation_params(duration=output_duration)
58
 
59
- # output = model.generate_continuation(prompt_waveform, prompt_sample_rate=sr, progress=True)
60
- # output = output.to(device)
61
 
62
- # if output.dim() == 3:
63
- # output = output.squeeze(0)
64
 
65
- # if output.dim() == 1:
66
- # output = output.unsqueeze(0)
67
 
68
- # combined_audio = torch.cat((existing_audio, output), dim=1)
69
- # combined_audio = combined_audio.cpu()
70
 
71
- # combined_file_path = f'./continued_jungle_{random.randint(1000, 9999)}.wav'
72
- # torchaudio.save(combined_file_path, combined_audio, sr)
73
 
74
- # return combined_file_path
75
 
76
- # @spaces.GPU(duration=120)
77
- # def generate_music(wav_filename, prompt_duration, musicgen_model, output_duration):
78
- # if wav_filename is None:
79
- # return None
80
 
81
- # song, sr = torchaudio.load(wav_filename)
82
- # song = song.to(device)
83
-
84
- # model_name = musicgen_model.split(" ")[0]
85
- # model_continue = MusicGen.get_pretrained(model_name)
86
-
87
- # model_continue.set_generation_params(
88
- # use_sampling=True,
89
- # top_k=250,
90
- # top_p=0.0,
91
- # temperature=1.0,
92
- # duration=output_duration,
93
- # cfg_coef=3
94
- # )
95
-
96
- # prompt_waveform = song[..., :int(prompt_duration * sr)]
97
- # prompt_waveform = preprocess_audio(prompt_waveform)
98
 
99
- # output = model_continue.generate_continuation(prompt_waveform, prompt_sample_rate=sr, progress=True)
100
- # output = output.cpu()
101
 
102
- # if len(output.size()) > 2:
103
- # output = output.squeeze()
104
 
105
- # filename_without_extension = f'continued_music'
106
- # filename_with_extension = f'{filename_without_extension}.wav'
107
- # audio_write(filename_without_extension, output, model_continue.sample_rate, strategy="loudness", loudness_compressor=True)
108
 
109
- # return filename_with_extension
110
 
111
- # @spaces.GPU(duration=120)
112
- # def continue_music(input_audio_path, prompt_duration, musicgen_model, output_duration):
113
- # if input_audio_path is None:
114
- # return None
115
 
116
- # song, sr = torchaudio.load(input_audio_path)
117
- # song = song.to(device)
118
 
119
- # model_continue = MusicGen.get_pretrained(musicgen_model.split(" ")[0])
120
- # model_continue.set_generation_params(
121
- # use_sampling=True,
122
- # top_k=250,
123
- # top_p=0.0,
124
- # temperature=1.0,
125
- # duration=output_duration,
126
- # cfg_coef=3
127
- # )
128
 
129
- # original_audio = AudioSegment.from_mp3(input_audio_path)
130
- # current_audio = original_audio
131
 
132
- # file_paths_for_cleanup = []
133
 
134
- # for i in range(1):
135
- # num_samples = int(prompt_duration * sr)
136
- # if current_audio.duration_seconds * 1000 < prompt_duration * 1000:
137
- # raise ValueError("The prompt_duration is longer than the current audio length.")
138
 
139
- # start_time = current_audio.duration_seconds * 1000 - prompt_duration * 1000
140
- # prompt_audio = current_audio[start_time:]
141
 
142
- # prompt_bytes = prompt_audio.export(format="wav").read()
143
- # prompt_waveform, _ = torchaudio.load(io.BytesIO(prompt_bytes))
144
- # prompt_waveform = prompt_waveform.to(device)
145
 
146
- # prompt_waveform = preprocess_audio(prompt_waveform)
147
 
148
- # output = model_continue.generate_continuation(prompt_waveform, prompt_sample_rate=sr, progress=True)
149
- # output = output.cpu()
150
 
151
- # if len(output.size()) > 2:
152
- # output = output.squeeze()
153
 
154
- # filename_without_extension = f'continue_{i}'
155
- # filename_with_extension = f'{filename_without_extension}.wav'
156
- # correct_filename_extension = f'{filename_without_extension}.wav.wav'
157
 
158
- # audio_write(filename_with_extension, output, model_continue.sample_rate, strategy="loudness", loudness_compressor=True)
159
- # generated_audio_segment = AudioSegment.from_wav(correct_filename_extension)
160
 
161
- # current_audio = current_audio[:start_time] + generated_audio_segment
162
 
163
- # file_paths_for_cleanup.append(correct_filename_extension)
164
 
165
- # combined_audio_filename = f"combined_audio_{random.randint(1, 10000)}.mp3"
166
- # current_audio.export(combined_audio_filename, format="mp3")
167
 
168
- # for file_path in file_paths_for_cleanup:
169
- # os.remove(file_path)
170
 
171
- # return combined_audio_filename
172
 
173
- # Define the expandable sections (keeping your existing content)
174
  musicgen_micro_blurb = """
175
  ## musicgen_micro
176
  musicgen micro is an experimental series of models by aaron abebe. they are incredibly fast, and extra insane. this one does goated jungle drums. we're very excited about these.
@@ -213,7 +209,7 @@ thepatch/PhonkV2 was trained by MJ BERSABEph. there are multiple versions in the
213
  foureyednymph/musicgen-sza-sos-small was just trained by foureyednymph. We're all about to find out if it does continuations well.
214
  """
215
 
216
- # Create the Gradio interface with explicit types
217
  with gr.Blocks() as iface:
218
  gr.Markdown("# the-micro-slot-machine")
219
  gr.Markdown("two ai's jamming. warning: outputs will be very strange, likely stupid, and possibly rad.")
@@ -269,10 +265,10 @@ with gr.Blocks() as iface:
269
  continue_output_audio = gr.Audio(label="Continued Music Output", type="filepath")
270
 
271
  # Connecting the components
272
- # generate_button.click(generate_drum_sample, outputs=[drum_audio])
273
- # continue_drum_sample_button.click(continue_drum_sample, inputs=[drum_audio], outputs=[drum_audio])
274
- # generate_music_button.click(generate_music, inputs=[drum_audio, prompt_duration, musicgen_model, output_duration], outputs=[output_audio])
275
- # continue_button.click(continue_music, inputs=[output_audio, prompt_duration, musicgen_model, output_duration], outputs=continue_output_audio)
276
 
277
  if __name__ == "__main__":
278
  iface.launch()
 
18
  waveform_np = waveform.cpu().squeeze().numpy()
19
  return torch.from_numpy(waveform_np).unsqueeze(0).to(device)
20
 
21
+ @spaces.GPU # Remove duration parameter
22
+ def generate_drum_sample():
23
+ model = MusicGen.get_pretrained('pharoAIsanders420/micro-musicgen-jungle')
24
+ model.set_generation_params(duration=10)
25
+ wav = model.generate_unconditional(1).squeeze(0)
 
 
 
 
26
 
27
+ filename_without_extension = f'jungle'
28
+ filename_with_extension = f'{filename_without_extension}.wav'
29
 
30
+ audio_write(filename_without_extension, wav.cpu(), model.sample_rate, strategy="loudness", loudness_compressor=True)
31
 
32
+ return filename_with_extension
33
 
34
+ @spaces.GPU # Remove duration parameter
35
+ def continue_drum_sample(existing_audio_path):
36
+ if existing_audio_path is None:
37
+ return None
38
 
39
+ existing_audio, sr = torchaudio.load(existing_audio_path)
40
+ existing_audio = existing_audio.to(device)
41
 
42
+ prompt_duration = 2
43
+ output_duration = 10
44
 
45
+ num_samples = int(prompt_duration * sr)
46
+ if existing_audio.shape[1] < num_samples:
47
+ raise ValueError("The existing audio is too short for the specified prompt duration.")
48
 
49
+ start_sample = existing_audio.shape[1] - num_samples
50
+ prompt_waveform = existing_audio[..., start_sample:]
51
 
52
+ model = MusicGen.get_pretrained('pharoAIsanders420/micro-musicgen-jungle')
53
+ model.set_generation_params(duration=output_duration)
54
 
55
+ output = model.generate_continuation(prompt_waveform, prompt_sample_rate=sr, progress=True)
56
+ output = output.to(device)
57
 
58
+ if output.dim() == 3:
59
+ output = output.squeeze(0)
60
 
61
+ if output.dim() == 1:
62
+ output = output.unsqueeze(0)
63
 
64
+ combined_audio = torch.cat((existing_audio, output), dim=1)
65
+ combined_audio = combined_audio.cpu()
66
 
67
+ combined_file_path = f'./continued_jungle_{random.randint(1000, 9999)}.wav'
68
+ torchaudio.save(combined_file_path, combined_audio, sr)
69
 
70
+ return combined_file_path
71
 
72
+ @spaces.GPU # Remove duration parameter
73
+ def generate_music(wav_filename, prompt_duration, musicgen_model, output_duration):
74
+ if wav_filename is None:
75
+ return None
76
 
77
+ song, sr = torchaudio.load(wav_filename)
78
+ song = song.to(device)
79
+
80
+ model_name = musicgen_model.split(" ")[0]
81
+ model_continue = MusicGen.get_pretrained(model_name)
82
+
83
+ model_continue.set_generation_params(
84
+ use_sampling=True,
85
+ top_k=250,
86
+ top_p=0.0,
87
+ temperature=1.0,
88
+ duration=output_duration,
89
+ cfg_coef=3
90
+ )
91
+
92
+ prompt_waveform = song[..., :int(prompt_duration * sr)]
93
+ prompt_waveform = preprocess_audio(prompt_waveform)
94
 
95
+ output = model_continue.generate_continuation(prompt_waveform, prompt_sample_rate=sr, progress=True)
96
+ output = output.cpu()
97
 
98
+ if len(output.size()) > 2:
99
+ output = output.squeeze()
100
 
101
+ filename_without_extension = f'continued_music'
102
+ filename_with_extension = f'{filename_without_extension}.wav'
103
+ audio_write(filename_without_extension, output, model_continue.sample_rate, strategy="loudness", loudness_compressor=True)
104
 
105
+ return filename_with_extension
106
 
107
+ @spaces.GPU # Remove duration parameter
108
+ def continue_music(input_audio_path, prompt_duration, musicgen_model, output_duration):
109
+ if input_audio_path is None:
110
+ return None
111
 
112
+ song, sr = torchaudio.load(input_audio_path)
113
+ song = song.to(device)
114
 
115
+ model_continue = MusicGen.get_pretrained(musicgen_model.split(" ")[0])
116
+ model_continue.set_generation_params(
117
+ use_sampling=True,
118
+ top_k=250,
119
+ top_p=0.0,
120
+ temperature=1.0,
121
+ duration=output_duration,
122
+ cfg_coef=3
123
+ )
124
 
125
+ original_audio = AudioSegment.from_mp3(input_audio_path)
126
+ current_audio = original_audio
127
 
128
+ file_paths_for_cleanup = []
129
 
130
+ for i in range(1):
131
+ num_samples = int(prompt_duration * sr)
132
+ if current_audio.duration_seconds * 1000 < prompt_duration * 1000:
133
+ raise ValueError("The prompt_duration is longer than the current audio length.")
134
 
135
+ start_time = current_audio.duration_seconds * 1000 - prompt_duration * 1000
136
+ prompt_audio = current_audio[start_time:]
137
 
138
+ prompt_bytes = prompt_audio.export(format="wav").read()
139
+ prompt_waveform, _ = torchaudio.load(io.BytesIO(prompt_bytes))
140
+ prompt_waveform = prompt_waveform.to(device)
141
 
142
+ prompt_waveform = preprocess_audio(prompt_waveform)
143
 
144
+ output = model_continue.generate_continuation(prompt_waveform, prompt_sample_rate=sr, progress=True)
145
+ output = output.cpu()
146
 
147
+ if len(output.size()) > 2:
148
+ output = output.squeeze()
149
 
150
+ filename_without_extension = f'continue_{i}'
151
+ filename_with_extension = f'{filename_without_extension}.wav'
152
+ correct_filename_extension = f'{filename_without_extension}.wav.wav'
153
 
154
+ audio_write(filename_with_extension, output, model_continue.sample_rate, strategy="loudness", loudness_compressor=True)
155
+ generated_audio_segment = AudioSegment.from_wav(correct_filename_extension)
156
 
157
+ current_audio = current_audio[:start_time] + generated_audio_segment
158
 
159
+ file_paths_for_cleanup.append(correct_filename_extension)
160
 
161
+ combined_audio_filename = f"combined_audio_{random.randint(1, 10000)}.mp3"
162
+ current_audio.export(combined_audio_filename, format="mp3")
163
 
164
+ for file_path in file_paths_for_cleanup:
165
+ os.remove(file_path)
166
 
167
+ return combined_audio_filename
168
 
169
+ # Define the expandable sections
170
  musicgen_micro_blurb = """
171
  ## musicgen_micro
172
  musicgen micro is an experimental series of models by aaron abebe. they are incredibly fast, and extra insane. this one does goated jungle drums. we're very excited about these.
 
209
  foureyednymph/musicgen-sza-sos-small was just trained by foureyednymph. We're all about to find out if it does continuations well.
210
  """
211
 
212
+ # Create the Gradio interface
213
  with gr.Blocks() as iface:
214
  gr.Markdown("# the-micro-slot-machine")
215
  gr.Markdown("two ai's jamming. warning: outputs will be very strange, likely stupid, and possibly rad.")
 
265
  continue_output_audio = gr.Audio(label="Continued Music Output", type="filepath")
266
 
267
  # Connecting the components
268
+ generate_button.click(generate_drum_sample, outputs=[drum_audio])
269
+ continue_drum_sample_button.click(continue_drum_sample, inputs=[drum_audio], outputs=[drum_audio])
270
+ generate_music_button.click(generate_music, inputs=[drum_audio, prompt_duration, musicgen_model, output_duration], outputs=[output_audio])
271
+ continue_button.click(continue_music, inputs=[output_audio, prompt_duration, musicgen_model, output_duration], outputs=continue_output_audio)
272
 
273
  if __name__ == "__main__":
274
  iface.launch()