Spaces:
Running on Zero
Running on Zero
Update app.py
Browse files
app.py
CHANGED
|
@@ -206,6 +206,10 @@ def infer_inp(prompt, audio_path, mask_start_point, mask_end_point, progress=gr.
|
|
| 206 |
post_masked_spec = denormalize(masked_spec).to(device, dtype)
|
| 207 |
denorm_masked_spec = denormalize_spectrogram(post_masked_spec)
|
| 208 |
denorm_masked_spec_audio = vocoder.inference(denorm_masked_spec)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 209 |
|
| 210 |
denorm_spec = denormalize_spectrogram(output_spec)
|
| 211 |
denorm_spec_audio = vocoder.inference(denorm_spec)
|
|
@@ -225,7 +229,9 @@ def infer_inp(prompt, audio_path, mask_start_point, mask_end_point, progress=gr.
|
|
| 225 |
print("Output spectrogram min/max:", output_spec.min().item(), output_spec.max().item())
|
| 226 |
|
| 227 |
# Save as WAV
|
| 228 |
-
sf.write("
|
|
|
|
|
|
|
| 229 |
|
| 230 |
# Save input spectrogram image
|
| 231 |
input_spec_image_path = "input_spectrogram.png"
|
|
@@ -235,7 +241,7 @@ def infer_inp(prompt, audio_path, mask_start_point, mask_end_point, progress=gr.
|
|
| 235 |
output_spec_image_path = "output_spectrogram.png"
|
| 236 |
color_output_spec_image.save(output_spec_image_path)
|
| 237 |
|
| 238 |
-
return "
|
| 239 |
|
| 240 |
def load_input_spectrogram(audio_path):
|
| 241 |
# Loading
|
|
@@ -380,6 +386,11 @@ with gr.Blocks(css=css) as demo:
|
|
| 380 |
with gr.Column():
|
| 381 |
input_spectrogram_inp = gr.Image(label="Input Spectrogram")
|
| 382 |
output_spectrogram_inp = gr.Image(label="Output Spectrogram")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 383 |
|
| 384 |
gr.Examples(
|
| 385 |
examples = [
|
|
@@ -415,7 +426,7 @@ with gr.Blocks(css=css) as demo:
|
|
| 415 |
submit_btn_inp.click(
|
| 416 |
fn = infer_inp,
|
| 417 |
inputs = [prompt_inp, audio_in_inp, mask_start_point, mask_end_point],
|
| 418 |
-
outputs = [audio_out_inp, input_spectrogram_inp, output_spectrogram_inp]
|
| 419 |
)
|
| 420 |
|
| 421 |
demo.queue().launch(show_api=False, show_error=True)
|
|
|
|
| 206 |
post_masked_spec = denormalize(masked_spec).to(device, dtype)
|
| 207 |
denorm_masked_spec = denormalize_spectrogram(post_masked_spec)
|
| 208 |
denorm_masked_spec_audio = vocoder.inference(denorm_masked_spec)
|
| 209 |
+
|
| 210 |
+
# Rescale generated spectrogram to match original range
|
| 211 |
+
output_spec = (output_spec - output_spec.min()) / (output_spec.max() - output_spec.min()) # Normalize to [0,1]
|
| 212 |
+
output_spec = output_spec * (norm_spec.max() - norm_spec.min()) + norm_spec.min() # Rescale to match input range
|
| 213 |
|
| 214 |
denorm_spec = denormalize_spectrogram(output_spec)
|
| 215 |
denorm_spec_audio = vocoder.inference(denorm_spec)
|
|
|
|
| 229 |
print("Output spectrogram min/max:", output_spec.min().item(), output_spec.max().item())
|
| 230 |
|
| 231 |
# Save as WAV
|
| 232 |
+
sf.write("raw_output.wav", raw_chunk_audio, 16000)
|
| 233 |
+
sf.write("masked_raw_output.wav", denorm_masked_spec_audio, 16000)
|
| 234 |
+
sf.write("generated_output.wav", denorm_spec_audio, 16000)
|
| 235 |
|
| 236 |
# Save input spectrogram image
|
| 237 |
input_spec_image_path = "input_spectrogram.png"
|
|
|
|
| 241 |
output_spec_image_path = "output_spectrogram.png"
|
| 242 |
color_output_spec_image.save(output_spec_image_path)
|
| 243 |
|
| 244 |
+
return "raw_output.wav", input_spec_image_path, color_output_spec_image, "raw_output.wav", "masked_raw_output.wav"
|
| 245 |
|
| 246 |
def load_input_spectrogram(audio_path):
|
| 247 |
# Loading
|
|
|
|
| 386 |
with gr.Column():
|
| 387 |
input_spectrogram_inp = gr.Image(label="Input Spectrogram")
|
| 388 |
output_spectrogram_inp = gr.Image(label="Output Spectrogram")
|
| 389 |
+
|
| 390 |
+
with gr.Accordion("Raw Processed audio", open=False):
|
| 391 |
+
with gr.Column():
|
| 392 |
+
raw_out_audio = gr.Audio(label="RAW Audio")
|
| 393 |
+
raw_masked_out_audio = gr.Audio(label="RAW Masked Audio")
|
| 394 |
|
| 395 |
gr.Examples(
|
| 396 |
examples = [
|
|
|
|
| 426 |
submit_btn_inp.click(
|
| 427 |
fn = infer_inp,
|
| 428 |
inputs = [prompt_inp, audio_in_inp, mask_start_point, mask_end_point],
|
| 429 |
+
outputs = [audio_out_inp, input_spectrogram_inp, output_spectrogram_inp, raw_out_audio, raw_masked_out_audio]
|
| 430 |
)
|
| 431 |
|
| 432 |
demo.queue().launch(show_api=False, show_error=True)
|