TTS-LU

Sleeping

App Files Files Community

TTS-LU / app.py

hca97

Short very short naming fixed

23af960 verified 9 months ago

raw

history blame contribute delete

4.23 kB

	# Developed by SoftGiant
	# Credits:
	# * Marco Barnig
	# * https://piper.ttstool.com/
	# * https://huggingface.co/spaces/broadfield/piper-fast-tts
	# * https://github.com/rhasspy
	# * https://github.com/rhasspy/piper
	# * https://github.com/broadfield-dev/PyPiperTTS-win
	# * https://github.com/broadfield-dev/PyPiperTTS

	import gradio as gr
	import os
	from pypipertts import PyPiper
	import shlex

	pp = PyPiper()

	MY_EXAMPLE = """D’Wanteraktioun huet Mëtt Abrëll hir Dieren no fënnef Méint rëm zougemaach."""
	# Speed & pause presets
	SPEED_CHOICES = ["normal", "fast (x2)", "very fast (x5)"]
	PAUSE_CHOICES = ["very short (x5)", "short (x2)", "medium"]

	def speed_to_length(choice):
	return {
	"normal": 1.0,
	"fast (x2)": 0.5,
	"very fast (x5)": 0.2,
	}[choice]

	def pause_to_length(choice):
	return {
	"very short (x5)": 0.2,
	"short (x2)": 0.5,
	"medium": 1.0,
	}[choice]

	def new_load_mod(voice_key):
	# voice_key += "-medium"
	model_path = f"voices/{voice_key}.onnx"
	pp.json_ob = f"{os.getcwd()}/{model_path}.json"
	# actually load the model
	pp.load_mod(instr=voice_key)
	return gr.Info(f"Loaded model: {voice_key}", duration=2)

	async def stream_tts(text, length, noise, width, sen_pause):
	# Stream the audio asynchronously
	for audio in pp.stream_tts(text, length=length, noise=noise, width=width, sen_pause=sen_pause):
	yield audio

	def tts(text, length, noise, width, sen_pause):
	# Synthesize the audio
	audio = pp.tts(text, length=length, noise=noise, width=width, sen_pause=sen_pause)
	return audio

	def clean_str(text):
	return shlex.quote(text)

	# initialize the model
	new_load_mod("lb_LU-female-medium")

	with gr.Blocks() as demo:
	gr.HTML("<h1>SoftGiant TTS LU</h1>")

	with gr.Row():
	with gr.Column(scale=2):
	text_in = gr.Textbox(label="Text", lines=10, value=MY_EXAMPLE)
	voice = gr.Dropdown(label="Voice",
	choices=["lb_LU-female-low", "lb_LU-female-medium"],
	value="lb_LU-female-medium")
	# Buttons
	with gr.Row():
	# stream_btn = gr.Button("Stream")
	synth_btn = gr.Button("Synthesize")
	stop_btn = gr.Button("Stop")
	output_audio = gr.Audio(streaming=True, autoplay=True)

	with gr.Column(scale=1):
	with gr.Accordion("Information", open=False):
	gr.Markdown("""
	SoftGiant TTS LU model trained on the [ZLS](https://huggingface.co/datasets/denZLS/Luxembourgish-Male-TTS-for-LOD) dataset.
	Built using the [Piper TTS](https://github.com/rhasspy/piper) for high‑quality Luxembourgish text‑to‑speech.
	""")
	speed_radio = gr.Radio(label="Reading Speed", choices=SPEED_CHOICES, value="normal")
	pause_radio = gr.Radio(label="Sentence Pause", choices=PAUSE_CHOICES, value="medium")
	with gr.Accordion("Advanced Settings", open=False):
	length = gr.Slider(label="Length", minimum=0.01, maximum=10.0, value=1.0)
	noise = gr.Slider(label="Noise Level", minimum=0.01, maximum=3.0, value=0.1)
	width = gr.Slider(label="Noise Width", minimum=0.01, maximum=3.0, value=0.5)
	sen_pause = gr.Slider(label="Sentence Pause", minimum=0.1, maximum=10.0, value=1.0)

	# Wire up callbacks
	voice.change(new_load_mod, inputs=voice, outputs=output_audio)
	speed_radio.change(speed_to_length, inputs=speed_radio, outputs=length)
	pause_radio.change(pause_to_length, inputs=pause_radio, outputs=sen_pause)

	# Stream vs full synth
	# f_stream = stream_btn.click(stream_tts,
	# inputs=[text_in, length, noise, width, sen_pause],
	# outputs=output_audio)
	f_synth = synth_btn.click(tts,
	inputs=[text_in, length, noise, width, sen_pause],
	outputs=output_audio)

	# # Stop any in‑flight generation
	stop_btn.click(None, None, outputs=output_audio, cancels=[
	# f_stream,
	f_synth
	])

	demo.launch()