TTS-LU / app.py
hca97's picture
Short very short naming fixed
23af960 verified
# Developed by SoftGiant
# Credits:
# * Marco Barnig
# * https://piper.ttstool.com/
# * https://huggingface.co/spaces/broadfield/piper-fast-tts
# * https://github.com/rhasspy
# * https://github.com/rhasspy/piper
# * https://github.com/broadfield-dev/PyPiperTTS-win
# * https://github.com/broadfield-dev/PyPiperTTS
import gradio as gr
import os
from pypipertts import PyPiper
import shlex
pp = PyPiper()
MY_EXAMPLE = """D’Wanteraktioun huet Mëtt Abrëll hir Dieren no fënnef Méint rëm zougemaach."""
# Speed & pause presets
SPEED_CHOICES = ["normal", "fast (x2)", "very fast (x5)"]
PAUSE_CHOICES = ["very short (x5)", "short (x2)", "medium"]
def speed_to_length(choice):
return {
"normal": 1.0,
"fast (x2)": 0.5,
"very fast (x5)": 0.2,
}[choice]
def pause_to_length(choice):
return {
"very short (x5)": 0.2,
"short (x2)": 0.5,
"medium": 1.0,
}[choice]
def new_load_mod(voice_key):
# voice_key += "-medium"
model_path = f"voices/{voice_key}.onnx"
pp.json_ob = f"{os.getcwd()}/{model_path}.json"
# actually load the model
pp.load_mod(instr=voice_key)
return gr.Info(f"Loaded model: {voice_key}", duration=2)
async def stream_tts(text, length, noise, width, sen_pause):
# Stream the audio asynchronously
for audio in pp.stream_tts(text, length=length, noise=noise, width=width, sen_pause=sen_pause):
yield audio
def tts(text, length, noise, width, sen_pause):
# Synthesize the audio
audio = pp.tts(text, length=length, noise=noise, width=width, sen_pause=sen_pause)
return audio
def clean_str(text):
return shlex.quote(text)
# initialize the model
new_load_mod("lb_LU-female-medium")
with gr.Blocks() as demo:
gr.HTML("<h1>SoftGiant TTS LU</h1>")
with gr.Row():
with gr.Column(scale=2):
text_in = gr.Textbox(label="Text", lines=10, value=MY_EXAMPLE)
voice = gr.Dropdown(label="Voice",
choices=["lb_LU-female-low", "lb_LU-female-medium"],
value="lb_LU-female-medium")
# Buttons
with gr.Row():
# stream_btn = gr.Button("Stream")
synth_btn = gr.Button("Synthesize")
stop_btn = gr.Button("Stop")
output_audio = gr.Audio(streaming=True, autoplay=True)
with gr.Column(scale=1):
with gr.Accordion("Information", open=False):
gr.Markdown("""
**SoftGiant TTS LU model** trained on the [ZLS](https://huggingface.co/datasets/denZLS/Luxembourgish-Male-TTS-for-LOD) dataset.
Built using the [Piper TTS](https://github.com/rhasspy/piper) for high‑quality Luxembourgish text‑to‑speech.
""")
speed_radio = gr.Radio(label="Reading Speed", choices=SPEED_CHOICES, value="normal")
pause_radio = gr.Radio(label="Sentence Pause", choices=PAUSE_CHOICES, value="medium")
with gr.Accordion("Advanced Settings", open=False):
length = gr.Slider(label="Length", minimum=0.01, maximum=10.0, value=1.0)
noise = gr.Slider(label="Noise Level", minimum=0.01, maximum=3.0, value=0.1)
width = gr.Slider(label="Noise Width", minimum=0.01, maximum=3.0, value=0.5)
sen_pause = gr.Slider(label="Sentence Pause", minimum=0.1, maximum=10.0, value=1.0)
# Wire up callbacks
voice.change(new_load_mod, inputs=voice, outputs=output_audio)
speed_radio.change(speed_to_length, inputs=speed_radio, outputs=length)
pause_radio.change(pause_to_length, inputs=pause_radio, outputs=sen_pause)
# Stream vs full synth
# f_stream = stream_btn.click(stream_tts,
# inputs=[text_in, length, noise, width, sen_pause],
# outputs=output_audio)
f_synth = synth_btn.click(tts,
inputs=[text_in, length, noise, width, sen_pause],
outputs=output_audio)
# # Stop any in‑flight generation
stop_btn.click(None, None, outputs=output_audio, cancels=[
# f_stream,
f_synth
])
demo.launch()