Spaces:
Runtime error
Runtime error
File size: 3,343 Bytes
7e66c78 6137274 7e66c78 6137274 7e66c78 46db302 7e66c78 744f1d7 7e66c78 f0ac9ac 7e66c78 7a56e38 f0ac9ac 7e66c78 f0ac9ac 7e66c78 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 |
import gradio as gr
import torch
import numpy as np
from soprano import SopranoTTS
from scipy.io.wavfile import write as wav_write
import tempfile
import os
import spaces
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
print(DEVICE)
# Load model once
model = SopranoTTS(
backend="auto",
device=DEVICE,
cache_size_mb=100,
decoder_batch_size=1,
)
SAMPLE_RATE = 32000
@spaces.GPU
def tts_stream(text, temperature, top_p, repetition_penalty, state):
if not text.strip():
yield None, state
return
chunks = []
stream = model.infer_stream(
text,
chunk_size=20,
temperature=temperature,
top_p=top_p,
repetition_penalty=repetition_penalty,
)
for chunk in stream:
if isinstance(chunk, torch.Tensor):
audio_np = chunk.detach().cpu().numpy().astype(np.float32)
chunks.append(audio_np)
# stream partial audio
yield (SAMPLE_RATE, audio_np), np.concatenate(chunks)
#if chunks:
# final_audio = np.concatenate(chunks)
# yield (SAMPLE_RATE, final_audio), final_audio
def save_audio(state):
if state is None or len(state) == 0:
return None
fd, path = tempfile.mkstemp(suffix=".wav")
os.close(fd)
wav_write(path, SAMPLE_RATE, state)
return path
with gr.Blocks() as demo:
state_audio = gr.State(None)
with gr.Row():
with gr.Column():
gr.Markdown("# Soprano Demo")
text_in = gr.Textbox(
label="Input Text",
placeholder="Enter text to synthesize...",
lines=4,
)
with gr.Accordion("Advanced options", open=False):
temperature = gr.Slider(
0.0, 1.0, value=0.3, step=0.05, label="Temperature"
)
top_p = gr.Slider(
0.0, 1.0, value=0.95, step=0.01, label="Top-p"
)
repetition_penalty = gr.Slider(
0.5, 2.0, value=1.2, step=0.05, label="Repetition penalty"
)
gen_btn = gr.Button("Generate")
with gr.Column():
audio_out = gr.Audio(
label="Output Audio",
autoplay=True,
streaming=True,
)
download_btn = gr.Button("Download")
file_out = gr.File(label="Download file")
gr.Markdown(
"Usage tips:\n\n"
"- Soprano works best when each sentence is between 2 and 15 seconds long.\n"
"- Although Soprano recognizes numbers and some special characters, it occasionally mispronounces them. Best results can be achieved by converting these into their phonetic form. (1+1 -> one plus one, etc)\n"
"- If Soprano produces unsatisfactory results, you can easily regenerate it for a new, potentially better generation. You may also change the sampling settings for more varied results."
)
gen_btn.click(
fn=tts_stream,
inputs=[text_in, temperature, top_p, repetition_penalty, state_audio],
outputs=[audio_out, state_audio],
)
download_btn.click(
fn=save_audio,
inputs=[state_audio],
outputs=[file_out],
)
demo.queue()
demo.launch() |