import gradio as gr import torch import numpy as np from soprano import SopranoTTS from scipy.io.wavfile import write as wav_write import tempfile import os import spaces DEVICE = "cuda" if torch.cuda.is_available() else "cpu" print(DEVICE) # Load model once model = SopranoTTS( backend="auto", device=DEVICE, cache_size_mb=100, decoder_batch_size=1, ) SAMPLE_RATE = 32000 @spaces.GPU def tts_stream(text, temperature, top_p, repetition_penalty, state): if not text.strip(): yield None, state return chunks = [] stream = model.infer_stream( text, chunk_size=1, temperature=temperature, top_p=top_p, repetition_penalty=repetition_penalty, ) for chunk in stream: if isinstance(chunk, torch.Tensor): audio_np = chunk.detach().cpu().numpy().astype(np.float32) chunks.append(audio_np) # stream partial audio yield (SAMPLE_RATE, audio_np), np.concatenate(chunks) if chunks: final_audio = np.concatenate(chunks) yield (SAMPLE_RATE, final_audio), final_audio def save_audio(state): if state is None or len(state) == 0: return None fd, path = tempfile.mkstemp(suffix=".wav") os.close(fd) wav_write(path, SAMPLE_RATE, state) return path with gr.Blocks() as demo: state_audio = gr.State(None) with gr.Row(): with gr.Column(): gr.Markdown("## Soprano Demo") text_in = gr.Textbox( label="Input Text", placeholder="Enter text to synthesize...", lines=4, ) with gr.Accordion("Advanced options", open=False): temperature = gr.Slider( 0.0, 1.0, value=0.3, step=0.05, label="Temperature" ) top_p = gr.Slider( 0.0, 1.0, value=0.95, step=0.01, label="Top-p" ) repetition_penalty = gr.Slider( 0.5, 2.0, value=1.2, step=0.05, label="Repetition penalty" ) gen_btn = gr.Button("Generate") with gr.Column(): audio_out = gr.Audio( label="Output Audio", autoplay=True, streaming=True, ) download_btn = gr.Button("Download") file_out = gr.File(label="Download file") gr.Markdown( "Usage tips: (placeholder)\n\n" "- Tip 1\n" "- Tip 2\n" "- Tip 3" ) gen_btn.click( fn=tts_stream, inputs=[text_in, temperature, top_p, repetition_penalty, state_audio], outputs=[audio_out, state_audio], ) download_btn.click( fn=save_audio, inputs=[state_audio], outputs=[file_out], ) demo.queue() demo.launch()