Spaces:
Running on Zero
Running on Zero
Update app.py
Browse files
app.py
CHANGED
|
@@ -127,7 +127,10 @@ def infer_img2img(prompt, audio_path, desired_strength, progress=gr.Progress(tra
|
|
| 127 |
|
| 128 |
return "output.wav", input_spec_image_path, output_spec_image_path
|
| 129 |
|
| 130 |
-
def infer_inp(prompt, audio_path, progress=gr.Progress(track_tqdm=True)):
|
|
|
|
|
|
|
|
|
|
| 131 |
|
| 132 |
pretrained_model_name_or_path = "auffusion/auffusion-full-no-adapter"
|
| 133 |
dtype = torch.float16
|
|
@@ -213,6 +216,18 @@ def infer_inp(prompt, audio_path, progress=gr.Progress(track_tqdm=True)):
|
|
| 213 |
|
| 214 |
return "output.wav"
|
| 215 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 216 |
css="""
|
| 217 |
div#col-container{
|
| 218 |
margin: 0 auto;
|
|
@@ -282,6 +297,7 @@ with gr.Blocks(css=css) as demo:
|
|
| 282 |
with gr.Tab("Audio InPainting"):
|
| 283 |
prompt_inp = gr.Textbox(label="Prompt")
|
| 284 |
audio_in_inp = gr.Audio(label="Audio Reference", type="filepath")
|
|
|
|
| 285 |
submit_btn_inp = gr.Button("Submit")
|
| 286 |
audio_out_inp = gr.Audio(label="Audio Ressult")
|
| 287 |
|
|
@@ -290,11 +306,15 @@ with gr.Blocks(css=css) as demo:
|
|
| 290 |
input_spectrogram_inp = gr.Image(label="Input Spectrogram")
|
| 291 |
output_spectrogram_inp = gr.Image(label="Output Spectrogram")
|
| 292 |
|
| 293 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 294 |
|
| 295 |
submit_btn_inp.click(
|
| 296 |
fn = infer_inp,
|
| 297 |
-
inputs = [prompt_inp, audio_in_inp],
|
| 298 |
outputs = [audio_out_inp]
|
| 299 |
)
|
| 300 |
|
|
|
|
| 127 |
|
| 128 |
return "output.wav", input_spec_image_path, output_spec_image_path
|
| 129 |
|
| 130 |
+
def infer_inp(prompt, audio_path, spec_with_mask, progress=gr.Progress(track_tqdm=True)):
|
| 131 |
+
|
| 132 |
+
if spec_with_mask:
|
| 133 |
+
print(spec_with_mask)
|
| 134 |
|
| 135 |
pretrained_model_name_or_path = "auffusion/auffusion-full-no-adapter"
|
| 136 |
dtype = torch.float16
|
|
|
|
| 216 |
|
| 217 |
return "output.wav"
|
| 218 |
|
| 219 |
+
def load_spec_for_manual_masking(audio_path):
|
| 220 |
+
# Loading
|
| 221 |
+
audio, sampling_rate = load_wav(audio_path)
|
| 222 |
+
audio, spec = get_mel_spectrogram_from_audio(audio)
|
| 223 |
+
norm_spec = normalize_spectrogram(spec)
|
| 224 |
+
norm_spec = pad_spec(norm_spec, 1024)
|
| 225 |
+
norm_spec = normalize(norm_spec) # normalize to [-1, 1], because pipeline do not normalize for torch.Tensor input
|
| 226 |
+
|
| 227 |
+
raw_image = image_add_color(torch_to_pil(norm_spec))
|
| 228 |
+
return raw_image
|
| 229 |
+
|
| 230 |
+
|
| 231 |
css="""
|
| 232 |
div#col-container{
|
| 233 |
margin: 0 auto;
|
|
|
|
| 297 |
with gr.Tab("Audio InPainting"):
|
| 298 |
prompt_inp = gr.Textbox(label="Prompt")
|
| 299 |
audio_in_inp = gr.Audio(label="Audio Reference", type="filepath")
|
| 300 |
+
spec_for_mask = gr.ImageMask(label="Draw Mask", type="pil", interactive=False)
|
| 301 |
submit_btn_inp = gr.Button("Submit")
|
| 302 |
audio_out_inp = gr.Audio(label="Audio Ressult")
|
| 303 |
|
|
|
|
| 306 |
input_spectrogram_inp = gr.Image(label="Input Spectrogram")
|
| 307 |
output_spectrogram_inp = gr.Image(label="Output Spectrogram")
|
| 308 |
|
| 309 |
+
audio_in_inp.upload(
|
| 310 |
+
fn = load_spec_for_manual_masking,
|
| 311 |
+
inputs = [audio_in_inp],
|
| 312 |
+
outputs = [spec_for_mask]
|
| 313 |
+
)
|
| 314 |
|
| 315 |
submit_btn_inp.click(
|
| 316 |
fn = infer_inp,
|
| 317 |
+
inputs = [prompt_inp, audio_in_inp, spec_for_mask],
|
| 318 |
outputs = [audio_out_inp]
|
| 319 |
)
|
| 320 |
|