|
|
import gradio as gr |
|
|
import torch |
|
|
import numpy as np |
|
|
from soprano import SopranoTTS |
|
|
from scipy.io.wavfile import write as wav_write |
|
|
import tempfile |
|
|
import os |
|
|
|
|
|
|
|
|
DEVICE = "cuda" if torch.cuda.is_available() else "cpu" |
|
|
print(f"Using device: {DEVICE}") |
|
|
|
|
|
|
|
|
model = SopranoTTS( |
|
|
backend="auto", |
|
|
device=DEVICE, |
|
|
cache_size_mb=100, |
|
|
decoder_batch_size=1, |
|
|
) |
|
|
|
|
|
SAMPLE_RATE = 32000 |
|
|
|
|
|
|
|
|
def tts_stream(text, temperature, top_p, repetition_penalty, state): |
|
|
if not text.strip(): |
|
|
yield None, state |
|
|
return |
|
|
|
|
|
out = model.infer( |
|
|
text, |
|
|
temperature=temperature, |
|
|
top_p=top_p, |
|
|
repetition_penalty=repetition_penalty, |
|
|
) |
|
|
|
|
|
audio_np = out.cpu().numpy() |
|
|
yield (SAMPLE_RATE, audio_np), audio_np |
|
|
|
|
|
def save_audio(state): |
|
|
if state is None or len(state) == 0: |
|
|
return None |
|
|
|
|
|
fd, path = tempfile.mkstemp(suffix=".wav") |
|
|
os.close(fd) |
|
|
wav_write(path, SAMPLE_RATE, state) |
|
|
return path |
|
|
|
|
|
with gr.Blocks() as demo: |
|
|
state_audio = gr.State(None) |
|
|
|
|
|
with gr.Row(): |
|
|
with gr.Column(): |
|
|
gr.Markdown( |
|
|
f"# SevenLabs" |
|
|
|
|
|
) |
|
|
|
|
|
text_in = gr.Textbox( |
|
|
label="Input Text", |
|
|
placeholder="Enter text to synthesize...", |
|
|
value="SevenLabs is an extremely lightweight text to speech model designed to produce highly realistic speech at unprecedented speed.", |
|
|
lines=4, |
|
|
) |
|
|
|
|
|
with gr.Accordion("Advanced options", open=False): |
|
|
temperature = gr.Slider( |
|
|
0.0, 1.0, value=1, step=0.05, label="Temperature" |
|
|
) |
|
|
top_p = gr.Slider( |
|
|
0.0, 1.0, value=0.95, step=0.01, label="Top-p" |
|
|
) |
|
|
repetition_penalty = gr.Slider( |
|
|
1.0, 2.0, value=1.2, step=0.05, label="Repetition penalty" |
|
|
) |
|
|
|
|
|
gen_btn = gr.Button("Generate") |
|
|
|
|
|
with gr.Column(): |
|
|
audio_out = gr.Audio( |
|
|
label="Output Audio", |
|
|
autoplay=True, |
|
|
streaming=False, |
|
|
) |
|
|
|
|
|
download_btn = gr.Button("Download") |
|
|
file_out = gr.File(label="Download file") |
|
|
|
|
|
gr.Markdown( |
|
|
|
|
|
) |
|
|
|
|
|
gen_btn.click( |
|
|
fn=tts_stream, |
|
|
inputs=[text_in, temperature, top_p, repetition_penalty, state_audio], |
|
|
outputs=[audio_out, state_audio], |
|
|
) |
|
|
|
|
|
download_btn.click( |
|
|
fn=save_audio, |
|
|
inputs=[state_audio], |
|
|
outputs=[file_out], |
|
|
) |
|
|
|
|
|
demo.queue() |
|
|
demo.launch() |