| import asyncio |
| import edge_tts |
| import gradio as gr |
| import os |
| from datetime import datetime |
| from pydub import AudioSegment |
|
|
| AUDIO_DIR = "saved_audios" |
| PODCAST_DIR = "podcast_audios" |
| os.makedirs(AUDIO_DIR, exist_ok=True) |
| os.makedirs(PODCAST_DIR, exist_ok=True) |
|
|
| def generate_unique_filename(folder, prefix="audio", ext="mp3"): |
| timestamp = datetime.now().strftime("%Y%m%d_%H%M%S_%f") |
| return os.path.join(folder, f"{prefix}_{timestamp}.{ext}") |
|
|
| async def generate_speech(text, voice, rate, pitch, folder=AUDIO_DIR): |
| output_file = generate_unique_filename(folder) |
| communicate = edge_tts.Communicate(text, voice, rate=rate, pitch=pitch) |
| await communicate.save(output_file) |
| return output_file |
|
|
| async def get_voices(): |
| voices = await edge_tts.list_voices() |
| return [voice["ShortName"] for voice in voices] |
|
|
| def list_saved_audios(): |
| files = sorted(os.listdir(AUDIO_DIR), reverse=True) |
| return [os.path.join(AUDIO_DIR, f) for f in files if f.endswith(".mp3")] |
|
|
| def list_saved_podcasts(): |
| files = sorted(os.listdir(PODCAST_DIR), reverse=True) |
| return [os.path.join(PODCAST_DIR, f) for f in files if f.endswith(".mp3")] |
|
|
| async def tts_interface(text, voice, rate_percentage, pitch_hz): |
| rate = f"{'+' if rate_percentage >= 0 else ''}{rate_percentage}%" |
| pitch = f"{'+' if pitch_hz >= 0 else ''}{pitch_hz}Hz" |
| audio_path = await generate_speech(text, voice, rate, pitch) |
| return audio_path |
|
|
| def play_saved_audio(audio_file): |
| return audio_file |
|
|
| async def podcast_produce(scripts, voice, rate_percentage, pitch_hz, bgm_file, podcast_title, podcast_desc): |
| rate = f"{'+' if rate_percentage >= 0 else ''}{rate_percentage}%" |
| pitch = f"{'+' if pitch_hz >= 0 else ''}{pitch_hz}Hz" |
| audio_segments = [] |
| for idx, text in enumerate(scripts): |
| if text.strip(): |
| temp_audio = generate_unique_filename(PODCAST_DIR, prefix=f"segment{idx}") |
| communicate = edge_tts.Communicate(text, voice, rate=rate, pitch=pitch) |
| await communicate.save(temp_audio) |
| audio_segments.append(AudioSegment.from_file(temp_audio)) |
| os.remove(temp_audio) |
| if not audio_segments: |
| return None |
| podcast_audio = sum(audio_segments) |
| if bgm_file is not None and hasattr(bgm_file, "name") and os.path.isfile(bgm_file.name): |
| bgm = AudioSegment.from_file(bgm_file.name).apply_gain(-10) |
| bgm = bgm[:len(podcast_audio)] |
| podcast_audio = podcast_audio.overlay(bgm) |
| podcast_file = generate_unique_filename(PODCAST_DIR, prefix="podcast") |
| podcast_audio.export(podcast_file, format="mp3") |
| meta_file = podcast_file.replace(".mp3", ".txt") |
| with open(meta_file, "w", encoding="utf-8") as f: |
| f.write(f"Title: {podcast_title}\nDescription: {podcast_desc}\n") |
| return podcast_file |
|
|
| def clear_textbox(): |
| return "" |
|
|
| def clear_paragraphs(): |
| return [""] |
|
|
| def add_paragraph(paragraphs): |
| paragraphs = paragraphs.copy() |
| paragraphs.append("") |
| return paragraphs |
|
|
| def remove_paragraph(paragraphs): |
| paragraphs = paragraphs.copy() |
| if len(paragraphs) > 1: |
| paragraphs.pop() |
| return paragraphs |
|
|
| def update_paragraphs_ui(paragraphs): |
| |
| return [gr.Textbox(value=p, label=f"段落{i+1}內容", lines=3, interactive=True) for i, p in enumerate(paragraphs)] |
|
|
| def collect_paragraphs(*args): |
| |
| return list(args) |
|
|
| async def main(): |
| voices = await get_voices() |
| with gr.Blocks(theme=gr.themes.Soft()) as demo: |
| gr.Markdown("## 🎙️ Edge TTS 語音合成與播客製作\n\n- 多段腳本自由增減、內容可清空\n- 介面直覺、操作友善、檔案自動管理") |
|
|
| with gr.Tab("語音合成"): |
| with gr.Row(): |
| text_input = gr.Textbox(lines=5, label="輸入文本") |
| clear_btn = gr.Button("清空") |
| voice_input = gr.Dropdown(voices, label="選擇語音", value="zh-CN-XiaoxiaoNeural") |
| rate_input = gr.Slider(-50, 50, value=0, step=1, label="語速調整 (%)") |
| pitch_input = gr.Slider(-50, 50, value=0, step=1, label="音高調整 (Hz)") |
| tts_btn = gr.Button("生成語音") |
| audio_output = gr.Audio(type="filepath", label="生成的語音") |
| tts_btn.click( |
| fn=tts_interface, |
| inputs=[text_input, voice_input, rate_input, pitch_input], |
| outputs=audio_output |
| ) |
| clear_btn.click(fn=clear_textbox, outputs=text_input) |
|
|
| with gr.Tab("檢視已儲存語音"): |
| audio_files = gr.Dropdown(list_saved_audios(), label="選擇已儲存語音檔案", interactive=True) |
| saved_audio_output = gr.Audio(type="filepath", label="播放已儲存語音") |
| audio_files.change(fn=play_saved_audio, inputs=audio_files, outputs=saved_audio_output) |
|
|
| with gr.Tab("播客製作"): |
| gr.Markdown("### 📝 多段腳本輸入(可自由增減段落)") |
| paragraphs_state = gr.State([""]) |
| paragraphs_container = gr.Column() |
|
|
| |
| paragraph_boxes = update_paragraphs_ui([""]) |
| for tb in paragraph_boxes: |
| paragraphs_container.append(tb) |
|
|
| add_btn = gr.Button("新增段落") |
| remove_btn = gr.Button("刪除段落") |
| clear_all_btn = gr.Button("全部清空") |
|
|
| def on_add(paragraphs): |
| new_paragraphs = add_paragraph(paragraphs) |
| return new_paragraphs, gr.update(components=update_paragraphs_ui(new_paragraphs)) |
|
|
| def on_remove(paragraphs): |
| new_paragraphs = remove_paragraph(paragraphs) |
| return new_paragraphs, gr.update(components=update_paragraphs_ui(new_paragraphs)) |
|
|
| def on_clear(): |
| new_paragraphs = clear_paragraphs() |
| return new_paragraphs, gr.update(components=update_paragraphs_ui(new_paragraphs)) |
|
|
| add_btn.click(on_add, inputs=paragraphs_state, outputs=[paragraphs_state, paragraphs_container]) |
| remove_btn.click(on_remove, inputs=paragraphs_state, outputs=[paragraphs_state, paragraphs_container]) |
| clear_all_btn.click(on_clear, outputs=[paragraphs_state, paragraphs_container]) |
|
|
| |
| voice_input2 = gr.Dropdown(voices, label="選擇語音", value="zh-CN-XiaoxiaoNeural") |
| rate_input2 = gr.Slider(-50, 50, value=0, step=1, label="語速調整 (%)") |
| pitch_input2 = gr.Slider(-50, 50, value=0, step=1, label="音高調整 (Hz)") |
| bgm_input = gr.File(label="上傳背景音樂(可選)") |
| podcast_title = gr.Textbox(label="播客標題") |
| podcast_desc = gr.Textbox(label="播客描述") |
| podcast_btn = gr.Button("生成播客") |
| podcast_output = gr.Audio(type="filepath", label="生成的播客音檔") |
|
|
| def gather_scripts(*args): |
| return list(args) |
|
|
| def on_podcast_btn_click(*args): |
| |
| n = len(paragraphs_state.value) |
| scripts = list(args[:n]) |
| voice = args[n] |
| rate = args[n+1] |
| pitch = args[n+2] |
| bgm = args[n+3] |
| title = args[n+4] |
| desc = args[n+5] |
| return asyncio.run(podcast_produce(scripts, voice, rate, pitch, bgm, title, desc)) |
|
|
| |
| def get_inputs(): |
| return [tb for tb in paragraphs_container.children] + [voice_input2, rate_input2, pitch_input2, bgm_input, podcast_title, podcast_desc] |
|
|
| podcast_btn.click( |
| fn=on_podcast_btn_click, |
| inputs=lambda: get_inputs(), |
| outputs=podcast_output |
| ) |
|
|
| with gr.Tab("檢視已儲存播客"): |
| podcast_files = gr.Dropdown(list_saved_podcasts(), label="選擇已儲存播客檔案", interactive=True) |
| saved_podcast_output = gr.Audio(type="filepath", label="播放已儲存播客") |
| podcast_files.change(fn=play_saved_audio, inputs=podcast_files, outputs=saved_podcast_output) |
|
|
| demo.launch() |
|
|
| if __name__ == "__main__": |
| asyncio.run(main()) |
|
|