| | import gradio as gr |
| | import edge_tts |
| | import asyncio |
| | import tempfile |
| | import os |
| | import json |
| |
|
| | |
| | selected_voice_index = None |
| |
|
| | async def update_voices_to_json(): |
| | voices = await edge_tts.list_voices() |
| | with open("voices.json", "w") as f: |
| | json.dump(voices, f) |
| | global selected_voice_index |
| | selected_voice_index = None |
| | return "Voices updated successfully to voices.json" |
| |
|
| | async def get_voices(): |
| | with open("voices.json", "r") as f: |
| | voices = json.load(f) |
| | |
| | |
| | voices_by_language = {} |
| | for voice in voices: |
| | locale = voice.get("Locale", "Unknown") |
| | voice_name = f"{voice.get('ShortName')} - {voice.get('DisplayName')}" |
| | if locale not in voices_by_language: |
| | voices_by_language[locale] = [] |
| | voices_by_language[locale].append(voice_name) |
| | |
| | |
| | for locale in voices_by_language: |
| | voices_by_language[locale].sort() |
| | |
| | return voices_by_language |
| |
|
| | async def text_to_speech(text, voice, rate, pitch): |
| | if not text.strip(): |
| | return None, "Please enter text to convert." |
| | if not voice: |
| | return None, "Please select a voice." |
| | |
| | voice_short_name = voice.split(" - ")[0] |
| | rate_str = f"{rate:+d}%" |
| | pitch_str = f"{pitch:+d}Hz" |
| | communicate = edge_tts.Communicate(text, voice_short_name, rate=rate_str, pitch=pitch_str) |
| | |
| | |
| | with tempfile.NamedTemporaryFile(delete=False, suffix=".mp3") as tmp_file: |
| | tmp_path = tmp_file.name |
| | await communicate.save(tmp_path) |
| | |
| | return tmp_path, None |
| |
|
| | async def tts_interface(text, voice, rate, pitch): |
| | audio, warning = await text_to_speech(text, voice, rate, pitch) |
| | if warning: |
| | return audio, gr.Warning(warning) |
| | return audio, None |
| |
|
| | async def get_voices_table(): |
| | with open("voices.json", "r") as f: |
| | voices = json.load(f) |
| | |
| | global selected_voice_index |
| | |
| | |
| | all_keys = set() |
| | for voice in voices: |
| | all_keys.update(voice.keys()) |
| | |
| | |
| | headers = sorted(list(all_keys)) |
| | |
| | headers.insert(0, "Select") |
| | |
| | |
| | voice_data = [] |
| | |
| | for i, voice in enumerate(voices): |
| | |
| | processed_values = [] |
| | for key in headers[1:]: |
| | value = voice.get(key, "") |
| | |
| | if isinstance(value, dict) or isinstance(value, list): |
| | value = json.dumps(value) |
| | processed_values.append(value) |
| | |
| | |
| | select_html = create_select_button(i == selected_voice_index) |
| | |
| | |
| | row = [select_html] + processed_values |
| | voice_data.append(row) |
| | |
| | |
| | locale_index = headers.index("Locale") if "Locale" in headers else 1 |
| | name_index = headers.index("DisplayName") if "DisplayName" in headers else 2 |
| | |
| | |
| | sorted_with_indices = [(i, row) for i, row in enumerate(voice_data)] |
| | sorted_with_indices.sort(key=lambda x: (x[1][locale_index], x[1][name_index])) |
| | |
| | |
| | sorted_voice_data = [row for _, row in sorted_with_indices] |
| | sorted_indices = [i for i, _ in sorted_with_indices] |
| | |
| | return (headers, sorted_voice_data, sorted_indices) |
| |
|
| | def create_select_button(is_selected): |
| | """Create HTML representation of a toggle button""" |
| | if is_selected: |
| | return "✓ Selected" |
| | else: |
| | return "Select" |
| |
|
| | async def select_voice_from_table(evt: gr.SelectData): |
| | """Handle voice selection from table""" |
| | global selected_voice_index |
| | |
| | row_index = evt.index[0] |
| | with open("voices.json", "r") as f: |
| | voices = json.load(f) |
| | |
| | |
| | _, _, sorted_indices = await get_voices_table() |
| | |
| | |
| | if row_index < len(sorted_indices): |
| | voice_index = sorted_indices[row_index] |
| | selected_voice_index = voice_index |
| | else: |
| | |
| | selected_voice_index = row_index |
| | |
| | |
| | all_keys = set() |
| | for voice in voices: |
| | all_keys.update(voice.keys()) |
| | headers = sorted(list(all_keys)) |
| | |
| | headers.insert(0, "Select") |
| | |
| | locale_index = headers.index("Locale") if "Locale" in headers else 1 |
| | name_index = headers.index("DisplayName") if "DisplayName" in headers else 2 |
| | |
| | sorted_voices = sorted(voices, key=lambda x: (x.get("Locale", ""), x.get("DisplayName", ""))) |
| | |
| | |
| | selected_voice = sorted_voices[row_index] |
| | locale = selected_voice.get("Locale", "") |
| | short_name = selected_voice.get("ShortName", "") |
| | display_name = selected_voice.get("DisplayName", "") |
| | voice_full_name = f"{short_name} - {display_name}" |
| | |
| | |
| | voices_by_language = await get_voices() |
| | voice_choices = voices_by_language.get(locale, []) |
| | |
| | |
| | updated_table = await get_voices_table() |
| | |
| | |
| | return ( |
| | gr.update(value=locale), |
| | gr.update(value=voice_full_name, choices=[""] + voice_choices), |
| | gr.update(headers=updated_table[0], value=updated_table[1]) |
| | ) |
| |
|
| | async def update_voices_handler(): |
| | result = await update_voices_to_json() |
| | voices_table_data = await get_voices_table() |
| | return result, gr.update(headers=voices_table_data[0], value=voices_table_data[1]) |
| |
|
| | async def filter_voices_by_language(language): |
| | voices_by_language = await get_voices() |
| | if language in voices_by_language: |
| | return gr.update(choices=[""] + voices_by_language[language]) |
| | return gr.update(choices=[""]) |
| |
|
| | async def create_demo(): |
| | voices_by_language = await get_voices() |
| | languages = sorted(list(voices_by_language.keys())) |
| | voices_table_data = await get_voices_table() |
| | |
| | with gr.Blocks(analytics_enabled=False) as demo: |
| | gr.Markdown("# 🎙️ Edge TTS Text-to-Speech") |
| | |
| | with gr.Row(): |
| | with gr.Column(scale=1): |
| | gr.Markdown("## Text-to-Speech with Microsoft Edge TTS") |
| | gr.Markdown(""" |
| | Convert text to speech using Microsoft Edge TTS. |
| | Adjust speech rate and pitch: 0 is default, positive values increase, negative values decrease. |
| | """) |
| | |
| | with gr.Row(): |
| | update_btn = gr.Button("🔄 Update Voice List") |
| | update_status = gr.Markdown("") |
| | |
| | with gr.Row(): |
| | with gr.Column(): |
| | text_input = gr.Textbox(label="Input Text", lines=5) |
| | |
| | |
| | language_dropdown = gr.Dropdown(choices=[""] + languages, label="Select Language", value="") |
| | voice_dropdown = gr.Dropdown(choices=[""], label="Select Voice", value="", allow_custom_value=True) |
| | |
| | |
| | language_dropdown.change( |
| | fn=filter_voices_by_language, |
| | inputs=language_dropdown, |
| | outputs=voice_dropdown |
| | ) |
| | |
| | rate_slider = gr.Slider(minimum=-50, maximum=50, value=0, label="Speech Rate Adjustment (%)", step=1) |
| | pitch_slider = gr.Slider(minimum=-20, maximum=20, value=0, label="Pitch Adjustment (Hz)", step=1) |
| | |
| | generate_btn = gr.Button("Generate Speech", variant="primary") |
| | |
| | audio_output = gr.Audio(label="Generated Audio", type="filepath") |
| | warning_md = gr.Markdown(label="Warning", visible=False) |
| | |
| | generate_btn.click( |
| | fn=tts_interface, |
| | inputs=[text_input, voice_dropdown, rate_slider, pitch_slider], |
| | outputs=[audio_output, warning_md] |
| | ) |
| | |
| | gr.Markdown("## Available Voices - Click any row to select a voice") |
| | voices_table = gr.Dataframe( |
| | headers=voices_table_data[0], |
| | value=voices_table_data[1], |
| | label="Available Voices", |
| | interactive=False, |
| | wrap=True |
| | ) |
| | |
| | |
| | voices_table.select( |
| | fn=select_voice_from_table, |
| | outputs=[language_dropdown, voice_dropdown, voices_table] |
| | ) |
| | |
| | |
| | update_btn.click( |
| | fn=update_voices_handler, |
| | outputs=[update_status, voices_table] |
| | ) |
| | |
| | |
| | return demo |
| |
|
| | async def main(): |
| | demo = await create_demo() |
| | demo.queue(default_concurrency_limit=5) |
| | demo.launch(show_api=False) |
| |
|
| | if __name__ == "__main__": |
| | asyncio.run(main()) |
| |
|