import gradio as gr
import soundfile as sf
import tempfile
import torch
import numpy as np
from NovaSR import FastSR

## restarting because of update.
upsampler = FastSR()
upsampler.model.float() ## float's faster on cpu
TARGET_SR = 48000


def super_resolve(audio):
    """
    audio: tuple (sample_rate, numpy_array) from Gradio
    """
    if audio is None:
        return None

    sr, wav = audio

    # Save uploaded audio to temp file (NovaSR expects file input)
    with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f:
        sf.write(f.name, wav, sr)
        temp_path = f.name

    # Load + infer
    lowres_audio = upsampler.load_audio(temp_path).float()
    with torch.no_grad():
        highres_audio = upsampler.infer(lowres_audio).cpu().squeeze(0).numpy()
        highres_audio = (np.clip(highres_audio, -1.0, 1.0) * 32767).astype(np.int16)

    print(highres_audio.shape)
    return (TARGET_SR, highres_audio)


demo = gr.Interface(
    fn=super_resolve,
    inputs=gr.Audio(
        sources=["upload", "microphone"],
        type="numpy",
        label="Low-resolution audio (16 kHz recommended)"
    ),
    outputs=gr.Audio(
        type="numpy",
        label="Upsampled audio (48 kHz)"
    ),
    title="NovaSR Audio Super-Resolution",
    description=(
        "NovaSR is just a 52kb incredibly fast audio upsampler reaching speeds of 3600x realtime. This demo is on a 2 core CPU so speeds will not be as fast as it can be on consumer devices. Please upload a low quality audio file to try it out."
    ),
)

if __name__ == "__main__":
    demo.launch()