Spaces:
Paused
Paused
| import asyncio | |
| import tempfile | |
| import edge_tts | |
| import gradio as gr | |
| async def get_voices(): | |
| """Fetch the full list of Edge TTS voices, keyed by a human-readable label.""" | |
| voices = await edge_tts.list_voices() | |
| voices.sort(key=lambda v: (v["Locale"], v["ShortName"])) | |
| return { | |
| f"{v['ShortName']} - {v['Locale']} ({v['Gender']})": v["ShortName"] | |
| for v in voices | |
| } | |
| async def text_to_speech(text, voice, rate, pitch): | |
| """Synthesize `text` with the given voice/rate/pitch and return an mp3 path.""" | |
| if not text.strip(): | |
| return None, "Please enter some text to convert." | |
| if not voice: | |
| return None, "Please select a voice." | |
| voice_short_name = voice.split(" - ")[0] | |
| rate_str = f"{int(rate):+d}%" | |
| pitch_str = f"{int(pitch):+d}Hz" | |
| communicate = edge_tts.Communicate( | |
| text, voice_short_name, rate=rate_str, pitch=pitch_str | |
| ) | |
| with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file: | |
| tmp_path = tmp_file.name | |
| await communicate.save(tmp_path) | |
| return tmp_path, None | |
| async def tts_interface(text, voice, rate, pitch): | |
| audio, warning = await text_to_speech(text, voice, rate, pitch) | |
| if warning: | |
| gr.Warning(warning) | |
| return None | |
| return audio | |
| async def create_demo(): | |
| voices = await get_voices() | |
| voice_labels = list(voices.keys()) | |
| # Pick a sensible default voice if one is available. | |
| default_voice = next( | |
| (label for label in voice_labels if label.startswith("en-US-AriaNeural")), | |
| voice_labels[0] if voice_labels else "", | |
| ) | |
| with gr.Blocks(analytics_enabled=False, title="Edge TTS Text-to-Speech") as demo: | |
| gr.Markdown("# 🎙️ Edge TTS Text-to-Speech") | |
| gr.Markdown( | |
| "Convert text to speech using Microsoft Edge's online TTS voices. " | |
| "Adjust rate and pitch as percentages/Hz offsets from the default " | |
| "(0 = unchanged, positive = faster/higher, negative = slower/lower)." | |
| ) | |
| with gr.Row(): | |
| with gr.Column(): | |
| text_input = gr.Textbox( | |
| label="Input Text", | |
| lines=6, | |
| placeholder="Type or paste the text you want to hear...", | |
| ) | |
| voice_dropdown = gr.Dropdown( | |
| choices=voice_labels, | |
| label="Voice", | |
| value=default_voice, | |
| filterable=True, | |
| ) | |
| rate_slider = gr.Slider( | |
| minimum=-50, maximum=50, value=0, step=1, | |
| label="Speech Rate Adjustment (%)", | |
| ) | |
| pitch_slider = gr.Slider( | |
| minimum=-20, maximum=20, value=0, step=1, | |
| label="Pitch Adjustment (Hz)", | |
| ) | |
| generate_btn = gr.Button("Generate Speech", variant="primary") | |
| with gr.Column(): | |
| audio_output = gr.Audio(label="Generated Audio", type="filepath") | |
| generate_btn.click( | |
| fn=tts_interface, | |
| inputs=[text_input, voice_dropdown, rate_slider, pitch_slider], | |
| outputs=audio_output, | |
| ) | |
| return demo | |
| async def main(): | |
| demo = await create_demo() | |
| demo.queue(default_concurrency_limit=20) | |
| demo.launch() | |
| if __name__ == "__main__": | |
| asyncio.run(main()) |