|
|
import asyncio |
|
|
import edge_tts |
|
|
import gradio as gr |
|
|
import os |
|
|
from datetime import datetime |
|
|
|
|
|
AUDIO_DIR = "saved_audios" |
|
|
os.makedirs(AUDIO_DIR, exist_ok=True) |
|
|
|
|
|
def generate_unique_filename(folder, prefix="audio", ext="mp3"): |
|
|
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S_%f") |
|
|
return os.path.join(folder, f"{prefix}_{timestamp}.{ext}") |
|
|
|
|
|
async def get_voices(): |
|
|
|
|
|
voices = await edge_tts.list_voices() |
|
|
return [voice["ShortName"] for voice in voices] |
|
|
|
|
|
async def generate_speech(text, voice, rate, pitch, folder=AUDIO_DIR): |
|
|
output_file = generate_unique_filename(folder) |
|
|
communicate = edge_tts.Communicate(text, voice, rate=rate, pitch=pitch) |
|
|
await communicate.save(output_file) |
|
|
return output_file |
|
|
|
|
|
def list_saved_audios(): |
|
|
files = sorted(os.listdir(AUDIO_DIR), reverse=True) |
|
|
return [os.path.join(AUDIO_DIR, f) for f in files if f.endswith(".mp3")] |
|
|
|
|
|
async def tts_interface(text, voice, rate_percentage, pitch_hz): |
|
|
rate = f"{'+' if rate_percentage >= 0 else ''}{rate_percentage}%" |
|
|
pitch = f"{'+' if pitch_hz >= 0 else ''}{pitch_hz}Hz" |
|
|
audio_path = await generate_speech(text, voice, rate, pitch) |
|
|
return audio_path |
|
|
|
|
|
def play_saved_audio(audio_file): |
|
|
return audio_file |
|
|
|
|
|
def clear_textbox(): |
|
|
return "" |
|
|
|
|
|
|
|
|
voices = asyncio.run(get_voices()) |
|
|
default_voice = "zh-CN-XiaoxiaoNeural" if "zh-CN-XiaoxiaoNeural" in voices else voices[0] |
|
|
|
|
|
with gr.Blocks(theme=gr.themes.Soft()) as demo: |
|
|
gr.Markdown("## 🎙️ Edge TTS 語音合成工具\n\n- 支援語音合成、語音檔自動儲存與播放\n- 介面簡潔、操作直覺") |
|
|
|
|
|
with gr.Tab("語音合成"): |
|
|
with gr.Row(): |
|
|
text_input = gr.Textbox(lines=5, label="輸入文本") |
|
|
clear_btn = gr.Button("清空") |
|
|
voice_input = gr.Dropdown(voices, value=default_voice, label="選擇語音") |
|
|
rate_input = gr.Slider(-50, 50, value=0, step=1, label="語速調整 (%)") |
|
|
pitch_input = gr.Slider(-50, 50, value=0, step=1, label="音高調整 (Hz)") |
|
|
tts_btn = gr.Button("生成語音") |
|
|
audio_output = gr.Audio(type="filepath", label="生成的語音") |
|
|
tts_btn.click( |
|
|
fn=tts_interface, |
|
|
inputs=[text_input, voice_input, rate_input, pitch_input], |
|
|
outputs=audio_output |
|
|
) |
|
|
clear_btn.click(fn=clear_textbox, outputs=text_input) |
|
|
|
|
|
with gr.Tab("檢視已儲存語音"): |
|
|
audio_files = gr.Dropdown(list_saved_audios(), label="選擇已儲存語音檔案", interactive=True) |
|
|
saved_audio_output = gr.Audio(type="filepath", label="播放已儲存語音") |
|
|
audio_files.change(fn=play_saved_audio, inputs=audio_files, outputs=saved_audio_output) |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
demo.launch() |
|
|
|