SoftGiant TTS LU

# Developed by SoftGiant
# Credits:
# * Marco Barnig
# * https://piper.ttstool.com/
# * https://huggingface.co/spaces/broadfield/piper-fast-tts
# * https://github.com/rhasspy
# * https://github.com/rhasspy/piper
# * https://github.com/broadfield-dev/PyPiperTTS-win
# * https://github.com/broadfield-dev/PyPiperTTS

import gradio as gr
import os
from pypipertts import PyPiper
import shlex

pp = PyPiper()

MY_EXAMPLE = """D’Wanteraktioun huet Mëtt Abrëll hir Dieren no fënnef Méint rëm zougemaach."""
# Speed & pause presets
SPEED_CHOICES = ["normal", "fast (x2)", "very fast (x5)"]
PAUSE_CHOICES = ["very short (x5)", "short (x2)", "medium"]

def speed_to_length(choice):
    return {
        "normal": 1.0,
        "fast (x2)": 0.5,
        "very fast (x5)": 0.2,
    }[choice]

def pause_to_length(choice):
    return {
        "very short (x5)": 0.2,
        "short (x2)": 0.5,
        "medium": 1.0,
    }[choice]

def new_load_mod(voice_key):
    # voice_key += "-medium"
    model_path = f"voices/{voice_key}.onnx"
    pp.json_ob = f"{os.getcwd()}/{model_path}.json"
    # actually load the model
    pp.load_mod(instr=voice_key)
    return gr.Info(f"Loaded model: {voice_key}", duration=2)

async def stream_tts(text, length, noise, width, sen_pause):
    # Stream the audio asynchronously
    for audio in pp.stream_tts(text, length=length, noise=noise, width=width, sen_pause=sen_pause):
        yield audio

def tts(text, length, noise, width, sen_pause):
    # Synthesize the audio
    audio = pp.tts(text, length=length, noise=noise, width=width, sen_pause=sen_pause)
    return audio

def clean_str(text):
    return shlex.quote(text)

# initialize the model
new_load_mod("lb_LU-female-medium")

with gr.Blocks() as demo:
    gr.HTML("<h1>SoftGiant TTS LU</h1>")

    with gr.Row():
        with gr.Column(scale=2):
            text_in    = gr.Textbox(label="Text",     lines=10, value=MY_EXAMPLE)
            voice      = gr.Dropdown(label="Voice",
                                     choices=["lb_LU-female-low", "lb_LU-female-medium"],
                                     value="lb_LU-female-medium")
            # Buttons
            with gr.Row():
                # stream_btn = gr.Button("Stream")
                synth_btn  = gr.Button("Synthesize")
                stop_btn   = gr.Button("Stop")
            output_audio = gr.Audio(streaming=True, autoplay=True)

        with gr.Column(scale=1):
            with gr.Accordion("Information", open=False):
                gr.Markdown("""
**SoftGiant TTS LU model** trained on the [ZLS](https://huggingface.co/datasets/denZLS/Luxembourgish-Male-TTS-for-LOD) dataset.  
Built using the [Piper TTS](https://github.com/rhasspy/piper) for high‑quality Luxembourgish text‑to‑speech.
""")
            speed_radio = gr.Radio(label="Reading Speed", choices=SPEED_CHOICES, value="normal")
            pause_radio = gr.Radio(label="Sentence Pause", choices=PAUSE_CHOICES, value="medium")
            with gr.Accordion("Advanced Settings", open=False):
                length     = gr.Slider(label="Length",       minimum=0.01, maximum=10.0, value=1.0)
                noise      = gr.Slider(label="Noise Level",  minimum=0.01, maximum=3.0,  value=0.1)
                width      = gr.Slider(label="Noise Width",  minimum=0.01, maximum=3.0,  value=0.5)
                sen_pause  = gr.Slider(label="Sentence Pause", minimum=0.1, maximum=10.0, value=1.0)

    # Wire up callbacks
    voice.change(new_load_mod, inputs=voice, outputs=output_audio)
    speed_radio.change(speed_to_length, inputs=speed_radio, outputs=length)
    pause_radio.change(pause_to_length, inputs=pause_radio, outputs=sen_pause)

    # Stream vs full synth
    # f_stream = stream_btn.click(stream_tts,
    #                             inputs=[text_in, length, noise, width, sen_pause],
    #                             outputs=output_audio)
    f_synth  = synth_btn.click(tts,
                                inputs=[text_in, length, noise, width, sen_pause],
                                outputs=output_audio)

    # # Stop any in‑flight generation
    stop_btn.click(None, None, outputs=output_audio, cancels=[
        # f_stream, 
        f_synth
    ])

demo.launch()