Spaces:

Ryanus
/

EdgeTTS

Sleeping

App Files Files Community

Ryanus commited on Jul 7, 2025

Commit

253b483

verified ·

1 Parent(s): 33650f8

Update app.py

Browse files

Files changed (1) hide show

app.py +109 -43

app.py CHANGED Viewed

@@ -2,68 +2,134 @@ import asyncio
 import edge_tts
 import gradio as gr
 import os
-# 定義一個異步函數來生成語音
-async def generate_speech(text: str, voice: str, rate: str, pitch: str) -> str:
-    """
-    使用 Edge TTS 生成語音並保存為音頻文件。
-    Args:
-        text (str): 要轉換為語音的文本。
-        voice (str): 要使用的語音名稱 (例如 "zh-CN-XiaoxiaoNeural")。
-        rate (str): 語速調整 (例如 "+0%", "-10%", "+20%")。
-        pitch (str): 音高調整 (例如 "+0Hz", "-5Hz", "+10Hz")。
-    Returns:
-        str: 生成的音頻文件的路徑。
-    """
-    output_file = "output.mp3"
-    # 構建 Edge TTS 命令
     communicate = edge_tts.Communicate(text, voice, rate=rate, pitch=pitch)
-    # 將語音寫入文件
     await communicate.save(output_file)
     return output_file
-# 獲取可用的語音列表
 async def get_voices():
-    """
-    獲取 Edge TTS 可用的語音列表。
-    """
     voices = await edge_tts.list_voices()
-    # 修正這裡，使用字典鍵 'ShortName' 來訪問語音名稱
     return [voice["ShortName"] for voice in voices]
-# Gradio 介面函數
 async def tts_interface(text, voice, rate_percentage, pitch_hz):
-    # 確保 rate 和 pitch 始終包含符號
     rate = f"{'+' if rate_percentage >= 0 else ''}{rate_percentage}%"
     pitch = f"{'+' if pitch_hz >= 0 else ''}{pitch_hz}Hz"
     audio_path = await generate_speech(text, voice, rate, pitch)
     return audio_path
-# 啟動 Gradio 介面
 async def main():
     voices = await get_voices()
-    iface = gr.Interface(
-        fn=tts_interface,
-        inputs=[
-            gr.Textbox(lines=5, label="輸入文本"),
-            gr.Dropdown(voices, label="選擇語音", value="zh-CN-XiaoxiaoNeural"), # 預設中文語音
-            gr.Slider(minimum=-50, maximum=50, value=0, step=1, label="語速調整 (%)"),
-            gr.Slider(minimum=-50, maximum=50, value=0, step=1, label="音高調整 (Hz)")
-        ],
-        outputs=gr.Audio(type="filepath", label="生成的語音"),
-        title="Edge TTS 語音合成",
-        description="輸入文本，選擇語音，調整語速和音高，然後生成語音。",
-        allow_flagging="never"
-    )
-    iface.launch()
 if __name__ == "__main__":
     asyncio.run(main())

 import edge_tts
 import gradio as gr
 import os
+from datetime import datetime
+from pydub import AudioSegment
+AUDIO_DIR = "saved_audios"
+PODCAST_DIR = "podcast_audios"
+os.makedirs(AUDIO_DIR, exist_ok=True)
+os.makedirs(PODCAST_DIR, exist_ok=True)
+def generate_unique_filename(folder, prefix="audio", ext="mp3"):
+    timestamp = datetime.now().strftime("%Y%m%d_%H%M%S_%f")
+    return os.path.join(folder, f"{prefix}_{timestamp}.{ext}")
+# 單段語音合成並自動儲存
+async def generate_speech(text, voice, rate, pitch):
+    output_file = generate_unique_filename(AUDIO_DIR)
     communicate = edge_tts.Communicate(text, voice, rate=rate, pitch=pitch)
     await communicate.save(output_file)
     return output_file
+# 取得 Edge TTS 可用語音
 async def get_voices():
     voices = await edge_tts.list_voices()
     return [voice["ShortName"] for voice in voices]
+# 已儲存語音列表
+def list_saved_audios():
+    files = sorted(os.listdir(AUDIO_DIR), reverse=True)
+    return [os.path.join(AUDIO_DIR, f) for f in files if f.endswith(".mp3")]
+# 已儲存播客列表
+def list_saved_podcasts():
+    files = sorted(os.listdir(PODCAST_DIR), reverse=True)
+    return [os.path.join(PODCAST_DIR, f) for f in files if f.endswith(".mp3")]
+# Gradio 單段語音合成介面
 async def tts_interface(text, voice, rate_percentage, pitch_hz):
     rate = f"{'+' if rate_percentage >= 0 else ''}{rate_percentage}%"
     pitch = f"{'+' if pitch_hz >= 0 else ''}{pitch_hz}Hz"
     audio_path = await generate_speech(text, voice, rate, pitch)
     return audio_path
+# 播放已儲存語音
+def play_saved_audio(audio_file):
+    return audio_file
+# 播客製作：多段腳本合成並拼接、可插入背景音樂
+async def podcast_produce(scripts, voice, rate_percentage, pitch_hz, bgm_file, podcast_title, podcast_desc):
+    rate = f"{'+' if rate_percentage >= 0 else ''}{rate_percentage}%"
+    pitch = f"{'+' if pitch_hz >= 0 else ''}{pitch_hz}Hz"
+    # 每段腳本合成語音
+    audio_segments = []
+    for idx, text in enumerate(scripts):
+        if text.strip():
+            temp_audio = generate_unique_filename(PODCAST_DIR, prefix=f"segment{idx}")
+            communicate = edge_tts.Communicate(text, voice, rate=rate, pitch=pitch)
+            await communicate.save(temp_audio)
+            audio_segments.append(AudioSegment.from_file(temp_audio))
+            os.remove(temp_audio)
+    # 拼接所有段落
+    podcast_audio = sum(audio_segments)
+    # 插入背景音樂（可選）
+    if bgm_file is not None and os.path.isfile(bgm_file):
+        bgm = AudioSegment.from_file(bgm_file).apply_gain(-10)
+        bgm = bgm[:len(podcast_audio)]
+        podcast_audio = podcast_audio.overlay(bgm)
+    # 儲存播客音檔
+    podcast_file = generate_unique_filename(PODCAST_DIR, prefix="podcast")
+    podcast_audio.export(podcast_file, format="mp3")
+    # 儲存元資料
+    meta_file = podcast_file.replace(".mp3", ".txt")
+    with open(meta_file, "w", encoding="utf-8") as f:
+        f.write(f"Title: {podcast_title}\nDescription: {podcast_desc}\n")
+    return podcast_file
+# Gradio 主介面
 async def main():
     voices = await get_voices()
+    with gr.Blocks() as demo:
+        gr.Markdown("## Edge TTS 語音合成與播客製作")
+        with gr.Tab("語音合成"):
+            text_input = gr.Textbox(lines=5, label="輸入文本")
+            voice_input = gr.Dropdown(voices, label="選擇語音", value="zh-CN-XiaoxiaoNeural")
+            rate_input = gr.Slider(-50, 50, value=0, step=1, label="語速調整 (%)")
+            pitch_input = gr.Slider(-50, 50, value=0, step=1, label="音高調整 (Hz)")
+            tts_btn = gr.Button("生成語音")
+            audio_output = gr.Audio(type="filepath", label="生成的語音")
+            tts_btn.click(
+                fn=tts_interface,
+                inputs=[text_input, voice_input, rate_input, pitch_input],
+                outputs=audio_output
+            )
+        with gr.Tab("檢視已儲存語音"):
+            audio_files = list_saved_audios()
+            audio_dropdown = gr.Dropdown(audio_files, label="選擇已儲存語音檔案")
+            saved_audio_output = gr.Audio(type="filepath", label="播放已儲存語音")
+            audio_dropdown.change(
+                fn=play_saved_audio,
+                inputs=audio_dropdown,
+                outputs=saved_audio_output
+            )
+        with gr.Tab("播客製作"):
+            gr.Markdown("### 輸入多段腳本，選擇語音、可插入背景音樂，快速生成播客")
+            scripts_input = gr.Dataframe(headers=["段落內容"], datatype=["str"], row_count=3, col_count=1, label="播客腳本（每行一段）")
+            voice_input2 = gr.Dropdown(voices, label="選擇語音", value="zh-CN-XiaoxiaoNeural")
+            rate_input2 = gr.Slider(-50, 50, value=0, step=1, label="語速調整 (%)")
+            pitch_input2 = gr.Slider(-50, 50, value=0, step=1, label="音高調整 (Hz)")
+            bgm_input = gr.File(label="上傳背景音樂（可選）")
+            podcast_title = gr.Textbox(label="播客標題")
+            podcast_desc = gr.Textbox(label="播客描述")
+            podcast_btn = gr.Button("生成播客")
+            podcast_output = gr.Audio(type="filepath", label="生成的播客音檔")
+            podcast_btn.click(
+                fn=podcast_produce,
+                inputs=[scripts_input, voice_input2, rate_input2, pitch_input2, bgm_input, podcast_title, podcast_desc],
+                outputs=podcast_output
+            )
+        with gr.Tab("檢視已儲存播客"):
+            podcast_files = list_saved_podcasts()
+            podcast_dropdown = gr.Dropdown(podcast_files, label="選擇已儲存播客檔案")
+            saved_podcast_output = gr.Audio(type="filepath", label="播放已儲存播客")
+            podcast_dropdown.change(
+                fn=play_saved_audio,
+                inputs=podcast_dropdown,
+                outputs=saved_podcast_output
+            )
+    demo.launch()
 if __name__ == "__main__":
     asyncio.run(main())