Spaces:
Runtime error
Runtime error
| import gradio as gr | |
| import torch | |
| import numpy as np | |
| from soprano import SopranoTTS | |
| from scipy.io.wavfile import write as wav_write | |
| import tempfile | |
| import os | |
| import spaces | |
| DEVICE = "cuda" if torch.cuda.is_available() else "cpu" | |
| print(DEVICE) | |
| # Load model once | |
| model = SopranoTTS( | |
| backend="auto", | |
| device=DEVICE, | |
| cache_size_mb=100, | |
| decoder_batch_size=1, | |
| ) | |
| SAMPLE_RATE = 32000 | |
| def tts_stream(text, temperature, top_p, repetition_penalty, state): | |
| if not text.strip(): | |
| yield None, state | |
| return | |
| chunks = [] | |
| stream = model.infer_stream( | |
| text, | |
| chunk_size=20, | |
| temperature=temperature, | |
| top_p=top_p, | |
| repetition_penalty=repetition_penalty, | |
| ) | |
| for chunk in stream: | |
| if isinstance(chunk, torch.Tensor): | |
| audio_np = chunk.detach().cpu().numpy().astype(np.float32) | |
| chunks.append(audio_np) | |
| # stream partial audio | |
| yield (SAMPLE_RATE, audio_np), np.concatenate(chunks) | |
| #if chunks: | |
| # final_audio = np.concatenate(chunks) | |
| # yield (SAMPLE_RATE, final_audio), final_audio | |
| def save_audio(state): | |
| if state is None or len(state) == 0: | |
| return None | |
| fd, path = tempfile.mkstemp(suffix=".wav") | |
| os.close(fd) | |
| wav_write(path, SAMPLE_RATE, state) | |
| return path | |
| with gr.Blocks() as demo: | |
| state_audio = gr.State(None) | |
| with gr.Row(): | |
| with gr.Column(): | |
| gr.Markdown("# Soprano Demo") | |
| text_in = gr.Textbox( | |
| label="Input Text", | |
| placeholder="Enter text to synthesize...", | |
| lines=4, | |
| ) | |
| with gr.Accordion("Advanced options", open=False): | |
| temperature = gr.Slider( | |
| 0.0, 1.0, value=0.3, step=0.05, label="Temperature" | |
| ) | |
| top_p = gr.Slider( | |
| 0.0, 1.0, value=0.95, step=0.01, label="Top-p" | |
| ) | |
| repetition_penalty = gr.Slider( | |
| 0.5, 2.0, value=1.2, step=0.05, label="Repetition penalty" | |
| ) | |
| gen_btn = gr.Button("Generate") | |
| with gr.Column(): | |
| audio_out = gr.Audio( | |
| label="Output Audio", | |
| autoplay=True, | |
| streaming=True, | |
| ) | |
| download_btn = gr.Button("Download") | |
| file_out = gr.File(label="Download file") | |
| gr.Markdown( | |
| "Usage tips:\n\n" | |
| "- Soprano works best when each sentence is between 2 and 15 seconds long.\n" | |
| "- Although Soprano recognizes numbers and some special characters, it occasionally mispronounces them. Best results can be achieved by converting these into their phonetic form. (1+1 -> one plus one, etc)\n" | |
| "- If Soprano produces unsatisfactory results, you can easily regenerate it for a new, potentially better generation. You may also change the sampling settings for more varied results." | |
| ) | |
| gen_btn.click( | |
| fn=tts_stream, | |
| inputs=[text_in, temperature, top_p, repetition_penalty, state_audio], | |
| outputs=[audio_out, state_audio], | |
| ) | |
| download_btn.click( | |
| fn=save_audio, | |
| inputs=[state_audio], | |
| outputs=[file_out], | |
| ) | |
| demo.queue() | |
| demo.launch() |