| | import gradio as gr |
| | from audiosr import super_resolution, build_model |
| | import torch |
| | import gc |
| | import spaces |
| |
|
| | @spaces.GPU(duration=300) |
| | def inference(audio_file, model_name, guidance_scale, ddim_steps, seed): |
| | audiosr = build_model(model_name=model_name) |
| | |
| | if torch.cuda.is_avaible(): |
| | torch.cuda.empty_cache() |
| |
|
| | gc.collect() |
| |
|
| | |
| | if seed == 0: |
| | import random |
| | seed = random.randint(1, 2**32-1) |
| | |
| | waveform = super_resolution( |
| | audiosr, |
| | audio_file, |
| | seed, |
| | guidance_scale=guidance_scale, |
| | ddim_steps=ddim_steps |
| | ) |
| |
|
| | if torch.cuda.is_avaible(): |
| | torch.cuda.empty_cache() |
| |
|
| | gc.collect() |
| | |
| | return (48000, waveform) |
| |
|
| | iface = gr.Interface( |
| | fn=inference, |
| | inputs=[ |
| | gr.Audio(type="filepath", label="Input Audio"), |
| | gr.Dropdown(["basic", "speech"], value="basic", label="Model"), |
| | gr.Slider(1, 10, value=3.5, step=0.1, label="Guidance Scale", info="Guidance scale (Large => better quality and relavancy to text; Small => better diversity)"), |
| | gr.Slider(1, 100, value=50, step=1, label="DDIM Steps", info="The sampling step for DDIM"), |
| | gr.Number(value=42, precision=0, label="Seed", info="Changing this value (any integer number) will lead to a different generation result, put 0 for a random one.") |
| | ], |
| | outputs=gr.Audio(type="numpy", label="Output Audio"), |
| | title="AudioSR", |
| | description="Audio Super Resolution with AudioSR" |
| | ) |
| |
|
| | iface.launch(share=False) |
| |
|