# Developed by SoftGiant # Credits: # * Marco Barnig # * https://piper.ttstool.com/ # * https://huggingface.co/spaces/broadfield/piper-fast-tts # * https://github.com/rhasspy # * https://github.com/rhasspy/piper # * https://github.com/broadfield-dev/PyPiperTTS-win # * https://github.com/broadfield-dev/PyPiperTTS import gradio as gr import os from pypipertts import PyPiper import shlex pp = PyPiper() MY_EXAMPLE = """D’Wanteraktioun huet Mëtt Abrëll hir Dieren no fënnef Méint rëm zougemaach.""" # Speed & pause presets SPEED_CHOICES = ["normal", "fast (x2)", "very fast (x5)"] PAUSE_CHOICES = ["very short (x5)", "short (x2)", "medium"] def speed_to_length(choice): return { "normal": 1.0, "fast (x2)": 0.5, "very fast (x5)": 0.2, }[choice] def pause_to_length(choice): return { "very short (x5)": 0.2, "short (x2)": 0.5, "medium": 1.0, }[choice] def new_load_mod(voice_key): # voice_key += "-medium" model_path = f"voices/{voice_key}.onnx" pp.json_ob = f"{os.getcwd()}/{model_path}.json" # actually load the model pp.load_mod(instr=voice_key) return gr.Info(f"Loaded model: {voice_key}", duration=2) async def stream_tts(text, length, noise, width, sen_pause): # Stream the audio asynchronously for audio in pp.stream_tts(text, length=length, noise=noise, width=width, sen_pause=sen_pause): yield audio def tts(text, length, noise, width, sen_pause): # Synthesize the audio audio = pp.tts(text, length=length, noise=noise, width=width, sen_pause=sen_pause) return audio def clean_str(text): return shlex.quote(text) # initialize the model new_load_mod("lb_LU-female-medium") with gr.Blocks() as demo: gr.HTML("

SoftGiant TTS LU

") with gr.Row(): with gr.Column(scale=2): text_in = gr.Textbox(label="Text", lines=10, value=MY_EXAMPLE) voice = gr.Dropdown(label="Voice", choices=["lb_LU-female-low", "lb_LU-female-medium"], value="lb_LU-female-medium") # Buttons with gr.Row(): # stream_btn = gr.Button("Stream") synth_btn = gr.Button("Synthesize") stop_btn = gr.Button("Stop") output_audio = gr.Audio(streaming=True, autoplay=True) with gr.Column(scale=1): with gr.Accordion("Information", open=False): gr.Markdown(""" **SoftGiant TTS LU model** trained on the [ZLS](https://huggingface.co/datasets/denZLS/Luxembourgish-Male-TTS-for-LOD) dataset. Built using the [Piper TTS](https://github.com/rhasspy/piper) for high‑quality Luxembourgish text‑to‑speech. """) speed_radio = gr.Radio(label="Reading Speed", choices=SPEED_CHOICES, value="normal") pause_radio = gr.Radio(label="Sentence Pause", choices=PAUSE_CHOICES, value="medium") with gr.Accordion("Advanced Settings", open=False): length = gr.Slider(label="Length", minimum=0.01, maximum=10.0, value=1.0) noise = gr.Slider(label="Noise Level", minimum=0.01, maximum=3.0, value=0.1) width = gr.Slider(label="Noise Width", minimum=0.01, maximum=3.0, value=0.5) sen_pause = gr.Slider(label="Sentence Pause", minimum=0.1, maximum=10.0, value=1.0) # Wire up callbacks voice.change(new_load_mod, inputs=voice, outputs=output_audio) speed_radio.change(speed_to_length, inputs=speed_radio, outputs=length) pause_radio.change(pause_to_length, inputs=pause_radio, outputs=sen_pause) # Stream vs full synth # f_stream = stream_btn.click(stream_tts, # inputs=[text_in, length, noise, width, sen_pause], # outputs=output_audio) f_synth = synth_btn.click(tts, inputs=[text_in, length, noise, width, sen_pause], outputs=output_audio) # # Stop any in‑flight generation stop_btn.click(None, None, outputs=output_audio, cancels=[ # f_stream, f_synth ]) demo.launch()