import gradio as gr import soundfile as sf import tempfile import torch import numpy as np from NovaSR import FastSR ## restarting because of update. upsampler = FastSR() upsampler.model.float() ## float's faster on cpu TARGET_SR = 48000 def super_resolve(audio): """ audio: tuple (sample_rate, numpy_array) from Gradio """ if audio is None: return None sr, wav = audio # Save uploaded audio to temp file (NovaSR expects file input) with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f: sf.write(f.name, wav, sr) temp_path = f.name # Load + infer lowres_audio = upsampler.load_audio(temp_path).float() with torch.no_grad(): highres_audio = upsampler.infer(lowres_audio).cpu().squeeze(0).numpy() highres_audio = (np.clip(highres_audio, -1.0, 1.0) * 32767).astype(np.int16) print(highres_audio.shape) return (TARGET_SR, highres_audio) demo = gr.Interface( fn=super_resolve, inputs=gr.Audio( sources=["upload", "microphone"], type="numpy", label="Low-resolution audio (16 kHz recommended)" ), outputs=gr.Audio( type="numpy", label="Upsampled audio (48 kHz)" ), title="NovaSR Audio Super-Resolution", description=( "NovaSR is just a 52kb incredibly fast audio upsampler reaching speeds of 3600x realtime. This demo is on a 2 core CPU so speeds will not be as fast as it can be on consumer devices. Please upload a low quality audio file to try it out." ), ) if __name__ == "__main__": demo.launch()