Spaces:

profplate
/

edge-tts-practice

Paused

App Files Files Community

edge-tts-practice / app.py

profplate

Update app.py

a44a43b verified about 1 month ago

raw

history blame contribute delete

3.41 kB

	import asyncio
	import tempfile

	import edge_tts
	import gradio as gr


	async def get_voices():
	"""Fetch the full list of Edge TTS voices, keyed by a human-readable label."""
	voices = await edge_tts.list_voices()
	voices.sort(key=lambda v: (v["Locale"], v["ShortName"]))
	return {
	f"{v['ShortName']} - {v['Locale']} ({v['Gender']})": v["ShortName"]
	for v in voices
	}


	async def text_to_speech(text, voice, rate, pitch):
	"""Synthesize `text` with the given voice/rate/pitch and return an mp3 path."""
	if not text.strip():
	return None, "Please enter some text to convert."
	if not voice:
	return None, "Please select a voice."

	voice_short_name = voice.split(" - ")[0]
	rate_str = f"{int(rate):+d}%"
	pitch_str = f"{int(pitch):+d}Hz"

	communicate = edge_tts.Communicate(
	text, voice_short_name, rate=rate_str, pitch=pitch_str
	)

	with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file:
	tmp_path = tmp_file.name
	await communicate.save(tmp_path)

	return tmp_path, None


	async def tts_interface(text, voice, rate, pitch):
	audio, warning = await text_to_speech(text, voice, rate, pitch)
	if warning:
	gr.Warning(warning)
	return None
	return audio


	async def create_demo():
	voices = await get_voices()
	voice_labels = list(voices.keys())

	# Pick a sensible default voice if one is available.
	default_voice = next(
	(label for label in voice_labels if label.startswith("en-US-AriaNeural")),
	voice_labels[0] if voice_labels else "",
	)

	with gr.Blocks(analytics_enabled=False, title="Edge TTS Text-to-Speech") as demo:
	gr.Markdown("# 🎙️ Edge TTS Text-to-Speech")
	gr.Markdown(
	"Convert text to speech using Microsoft Edge's online TTS voices. "
	"Adjust rate and pitch as percentages/Hz offsets from the default "
	"(0 = unchanged, positive = faster/higher, negative = slower/lower)."
	)

	with gr.Row():
	with gr.Column():
	text_input = gr.Textbox(
	label="Input Text",
	lines=6,
	placeholder="Type or paste the text you want to hear...",
	)
	voice_dropdown = gr.Dropdown(
	choices=voice_labels,
	label="Voice",
	value=default_voice,
	filterable=True,
	)
	rate_slider = gr.Slider(
	minimum=-50, maximum=50, value=0, step=1,
	label="Speech Rate Adjustment (%)",
	)
	pitch_slider = gr.Slider(
	minimum=-20, maximum=20, value=0, step=1,
	label="Pitch Adjustment (Hz)",
	)
	generate_btn = gr.Button("Generate Speech", variant="primary")

	with gr.Column():
	audio_output = gr.Audio(label="Generated Audio", type="filepath")

	generate_btn.click(
	fn=tts_interface,
	inputs=[text_input, voice_dropdown, rate_slider, pitch_slider],
	outputs=audio_output,
	)

	return demo


	async def main():
	demo = await create_demo()
	demo.queue(default_concurrency_limit=20)
	demo.launch()


	if __name__ == "__main__":
	asyncio.run(main())