import gradio as gr
import torch
import numpy as np
from soprano import SopranoTTS
from scipy.io.wavfile import write as wav_write
import tempfile
import os

# Detect device
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Using device: {DEVICE}")

# Load model once - works on both CUDA and CPU
model = SopranoTTS(
    backend="auto",  # Will automatically choose best backend for device
    device=DEVICE,
    cache_size_mb=100,  # Only relevant for CUDA
    decoder_batch_size=1,
)

SAMPLE_RATE = 32000

# Remove @spaces.GPU decorator - not needed for CPU support
def tts_stream(text, temperature, top_p, repetition_penalty, state):
    if not text.strip():
        yield None, state
        return
    
    out = model.infer(
        text,
        temperature=temperature,
        top_p=top_p,
        repetition_penalty=repetition_penalty,
    )
    
    audio_np = out.cpu().numpy()
    yield (SAMPLE_RATE, audio_np), audio_np

def save_audio(state):
    if state is None or len(state) == 0:
        return None
    
    fd, path = tempfile.mkstemp(suffix=".wav")
    os.close(fd)
    wav_write(path, SAMPLE_RATE, state)
    return path

with gr.Blocks() as demo:
    state_audio = gr.State(None)
    
    with gr.Row():
        with gr.Column():
            gr.Markdown(
                f"# SevenLabs"
                
            )
            
            text_in = gr.Textbox(
                label="Input Text",
                placeholder="Enter text to synthesize...",
                value="SevenLabs is an extremely lightweight text to speech model designed to produce highly realistic speech at unprecedented speed.",
                lines=4,
            )
            
            with gr.Accordion("Advanced options", open=False):
                temperature = gr.Slider(
                    0.0, 1.0, value=1, step=0.05, label="Temperature"
                )
                top_p = gr.Slider(
                    0.0, 1.0, value=0.95, step=0.01, label="Top-p"
                )
                repetition_penalty = gr.Slider(
                    1.0, 2.0, value=1.2, step=0.05, label="Repetition penalty"
                )
            
            gen_btn = gr.Button("Generate")
        
        with gr.Column():
            audio_out = gr.Audio(
                label="Output Audio",
                autoplay=True,
                streaming=False,
            )
            
            download_btn = gr.Button("Download")
            file_out = gr.File(label="Download file")
            
            gr.Markdown(
               
            )
    
    gen_btn.click(
        fn=tts_stream,
        inputs=[text_in, temperature, top_p, repetition_penalty, state_audio],
        outputs=[audio_out, state_audio],
    )
    
    download_btn.click(
        fn=save_audio,
        inputs=[state_audio],
        outputs=[file_out],
    )

demo.queue()
demo.launch()