Spaces:
Sleeping
Sleeping
| # Developed by SoftGiant | |
| # Credits: | |
| # * Marco Barnig | |
| # * https://piper.ttstool.com/ | |
| # * https://huggingface.co/spaces/broadfield/piper-fast-tts | |
| # * https://github.com/rhasspy | |
| # * https://github.com/rhasspy/piper | |
| # * https://github.com/broadfield-dev/PyPiperTTS-win | |
| # * https://github.com/broadfield-dev/PyPiperTTS | |
| import gradio as gr | |
| import os | |
| from pypipertts import PyPiper | |
| import shlex | |
| pp = PyPiper() | |
| MY_EXAMPLE = """D’Wanteraktioun huet Mëtt Abrëll hir Dieren no fënnef Méint rëm zougemaach.""" | |
| # Speed & pause presets | |
| SPEED_CHOICES = ["normal", "fast (x2)", "very fast (x5)"] | |
| PAUSE_CHOICES = ["very short (x5)", "short (x2)", "medium"] | |
| def speed_to_length(choice): | |
| return { | |
| "normal": 1.0, | |
| "fast (x2)": 0.5, | |
| "very fast (x5)": 0.2, | |
| }[choice] | |
| def pause_to_length(choice): | |
| return { | |
| "very short (x5)": 0.2, | |
| "short (x2)": 0.5, | |
| "medium": 1.0, | |
| }[choice] | |
| def new_load_mod(voice_key): | |
| # voice_key += "-medium" | |
| model_path = f"voices/{voice_key}.onnx" | |
| pp.json_ob = f"{os.getcwd()}/{model_path}.json" | |
| # actually load the model | |
| pp.load_mod(instr=voice_key) | |
| return gr.Info(f"Loaded model: {voice_key}", duration=2) | |
| async def stream_tts(text, length, noise, width, sen_pause): | |
| # Stream the audio asynchronously | |
| for audio in pp.stream_tts(text, length=length, noise=noise, width=width, sen_pause=sen_pause): | |
| yield audio | |
| def tts(text, length, noise, width, sen_pause): | |
| # Synthesize the audio | |
| audio = pp.tts(text, length=length, noise=noise, width=width, sen_pause=sen_pause) | |
| return audio | |
| def clean_str(text): | |
| return shlex.quote(text) | |
| # initialize the model | |
| new_load_mod("lb_LU-female-medium") | |
| with gr.Blocks() as demo: | |
| gr.HTML("<h1>SoftGiant TTS LU</h1>") | |
| with gr.Row(): | |
| with gr.Column(scale=2): | |
| text_in = gr.Textbox(label="Text", lines=10, value=MY_EXAMPLE) | |
| voice = gr.Dropdown(label="Voice", | |
| choices=["lb_LU-female-low", "lb_LU-female-medium"], | |
| value="lb_LU-female-medium") | |
| # Buttons | |
| with gr.Row(): | |
| # stream_btn = gr.Button("Stream") | |
| synth_btn = gr.Button("Synthesize") | |
| stop_btn = gr.Button("Stop") | |
| output_audio = gr.Audio(streaming=True, autoplay=True) | |
| with gr.Column(scale=1): | |
| with gr.Accordion("Information", open=False): | |
| gr.Markdown(""" | |
| **SoftGiant TTS LU model** trained on the [ZLS](https://huggingface.co/datasets/denZLS/Luxembourgish-Male-TTS-for-LOD) dataset. | |
| Built using the [Piper TTS](https://github.com/rhasspy/piper) for high‑quality Luxembourgish text‑to‑speech. | |
| """) | |
| speed_radio = gr.Radio(label="Reading Speed", choices=SPEED_CHOICES, value="normal") | |
| pause_radio = gr.Radio(label="Sentence Pause", choices=PAUSE_CHOICES, value="medium") | |
| with gr.Accordion("Advanced Settings", open=False): | |
| length = gr.Slider(label="Length", minimum=0.01, maximum=10.0, value=1.0) | |
| noise = gr.Slider(label="Noise Level", minimum=0.01, maximum=3.0, value=0.1) | |
| width = gr.Slider(label="Noise Width", minimum=0.01, maximum=3.0, value=0.5) | |
| sen_pause = gr.Slider(label="Sentence Pause", minimum=0.1, maximum=10.0, value=1.0) | |
| # Wire up callbacks | |
| voice.change(new_load_mod, inputs=voice, outputs=output_audio) | |
| speed_radio.change(speed_to_length, inputs=speed_radio, outputs=length) | |
| pause_radio.change(pause_to_length, inputs=pause_radio, outputs=sen_pause) | |
| # Stream vs full synth | |
| # f_stream = stream_btn.click(stream_tts, | |
| # inputs=[text_in, length, noise, width, sen_pause], | |
| # outputs=output_audio) | |
| f_synth = synth_btn.click(tts, | |
| inputs=[text_in, length, noise, width, sen_pause], | |
| outputs=output_audio) | |
| # # Stop any in‑flight generation | |
| stop_btn.click(None, None, outputs=output_audio, cancels=[ | |
| # f_stream, | |
| f_synth | |
| ]) | |
| demo.launch() |