profplate's picture
Update app.py
a44a43b verified
import asyncio
import tempfile
import edge_tts
import gradio as gr
async def get_voices():
"""Fetch the full list of Edge TTS voices, keyed by a human-readable label."""
voices = await edge_tts.list_voices()
voices.sort(key=lambda v: (v["Locale"], v["ShortName"]))
return {
f"{v['ShortName']} - {v['Locale']} ({v['Gender']})": v["ShortName"]
for v in voices
}
async def text_to_speech(text, voice, rate, pitch):
"""Synthesize `text` with the given voice/rate/pitch and return an mp3 path."""
if not text.strip():
return None, "Please enter some text to convert."
if not voice:
return None, "Please select a voice."
voice_short_name = voice.split(" - ")[0]
rate_str = f"{int(rate):+d}%"
pitch_str = f"{int(pitch):+d}Hz"
communicate = edge_tts.Communicate(
text, voice_short_name, rate=rate_str, pitch=pitch_str
)
with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file:
tmp_path = tmp_file.name
await communicate.save(tmp_path)
return tmp_path, None
async def tts_interface(text, voice, rate, pitch):
audio, warning = await text_to_speech(text, voice, rate, pitch)
if warning:
gr.Warning(warning)
return None
return audio
async def create_demo():
voices = await get_voices()
voice_labels = list(voices.keys())
# Pick a sensible default voice if one is available.
default_voice = next(
(label for label in voice_labels if label.startswith("en-US-AriaNeural")),
voice_labels[0] if voice_labels else "",
)
with gr.Blocks(analytics_enabled=False, title="Edge TTS Text-to-Speech") as demo:
gr.Markdown("# 🎙️ Edge TTS Text-to-Speech")
gr.Markdown(
"Convert text to speech using Microsoft Edge's online TTS voices. "
"Adjust rate and pitch as percentages/Hz offsets from the default "
"(0 = unchanged, positive = faster/higher, negative = slower/lower)."
)
with gr.Row():
with gr.Column():
text_input = gr.Textbox(
label="Input Text",
lines=6,
placeholder="Type or paste the text you want to hear...",
)
voice_dropdown = gr.Dropdown(
choices=voice_labels,
label="Voice",
value=default_voice,
filterable=True,
)
rate_slider = gr.Slider(
minimum=-50, maximum=50, value=0, step=1,
label="Speech Rate Adjustment (%)",
)
pitch_slider = gr.Slider(
minimum=-20, maximum=20, value=0, step=1,
label="Pitch Adjustment (Hz)",
)
generate_btn = gr.Button("Generate Speech", variant="primary")
with gr.Column():
audio_output = gr.Audio(label="Generated Audio", type="filepath")
generate_btn.click(
fn=tts_interface,
inputs=[text_input, voice_dropdown, rate_slider, pitch_slider],
outputs=audio_output,
)
return demo
async def main():
demo = await create_demo()
demo.queue(default_concurrency_limit=20)
demo.launch()
if __name__ == "__main__":
asyncio.run(main())