Update app.py
Browse files
app.py
CHANGED
|
@@ -2,68 +2,134 @@ import asyncio
|
|
| 2 |
import edge_tts
|
| 3 |
import gradio as gr
|
| 4 |
import os
|
|
|
|
|
|
|
| 5 |
|
| 6 |
-
|
| 7 |
-
|
| 8 |
-
|
| 9 |
-
|
| 10 |
|
| 11 |
-
|
| 12 |
-
|
| 13 |
-
|
| 14 |
-
rate (str): 語速調整 (例如 "+0%", "-10%", "+20%")。
|
| 15 |
-
pitch (str): 音高調整 (例如 "+0Hz", "-5Hz", "+10Hz")。
|
| 16 |
|
| 17 |
-
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
output_file = "output.mp3"
|
| 21 |
-
|
| 22 |
-
# 構建 Edge TTS 命令
|
| 23 |
communicate = edge_tts.Communicate(text, voice, rate=rate, pitch=pitch)
|
| 24 |
-
|
| 25 |
-
# 將語音寫入文件
|
| 26 |
await communicate.save(output_file)
|
| 27 |
-
|
| 28 |
return output_file
|
| 29 |
|
| 30 |
-
#
|
| 31 |
async def get_voices():
|
| 32 |
-
"""
|
| 33 |
-
獲取 Edge TTS 可用的語音列表。
|
| 34 |
-
"""
|
| 35 |
voices = await edge_tts.list_voices()
|
| 36 |
-
# 修正這裡,使用字典鍵 'ShortName' 來訪問語音名稱
|
| 37 |
return [voice["ShortName"] for voice in voices]
|
| 38 |
|
| 39 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 40 |
async def tts_interface(text, voice, rate_percentage, pitch_hz):
|
| 41 |
-
# 確保 rate 和 pitch 始終包含符號
|
| 42 |
rate = f"{'+' if rate_percentage >= 0 else ''}{rate_percentage}%"
|
| 43 |
pitch = f"{'+' if pitch_hz >= 0 else ''}{pitch_hz}Hz"
|
| 44 |
-
|
| 45 |
audio_path = await generate_speech(text, voice, rate, pitch)
|
| 46 |
return audio_path
|
| 47 |
|
| 48 |
-
#
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 49 |
async def main():
|
| 50 |
voices = await get_voices()
|
| 51 |
-
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
gr.Textbox(lines=5, label="輸入文本")
|
| 56 |
-
gr.Dropdown(voices, label="選擇語音", value="zh-CN-XiaoxiaoNeural")
|
| 57 |
-
gr.Slider(
|
| 58 |
-
gr.Slider(
|
| 59 |
-
|
| 60 |
-
|
| 61 |
-
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
|
| 65 |
-
|
| 66 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 67 |
|
| 68 |
if __name__ == "__main__":
|
| 69 |
asyncio.run(main())
|
|
|
|
| 2 |
import edge_tts
|
| 3 |
import gradio as gr
|
| 4 |
import os
|
| 5 |
+
from datetime import datetime
|
| 6 |
+
from pydub import AudioSegment
|
| 7 |
|
| 8 |
+
AUDIO_DIR = "saved_audios"
|
| 9 |
+
PODCAST_DIR = "podcast_audios"
|
| 10 |
+
os.makedirs(AUDIO_DIR, exist_ok=True)
|
| 11 |
+
os.makedirs(PODCAST_DIR, exist_ok=True)
|
| 12 |
|
| 13 |
+
def generate_unique_filename(folder, prefix="audio", ext="mp3"):
|
| 14 |
+
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S_%f")
|
| 15 |
+
return os.path.join(folder, f"{prefix}_{timestamp}.{ext}")
|
|
|
|
|
|
|
| 16 |
|
| 17 |
+
# 單段語音合成並自動儲存
|
| 18 |
+
async def generate_speech(text, voice, rate, pitch):
|
| 19 |
+
output_file = generate_unique_filename(AUDIO_DIR)
|
|
|
|
|
|
|
|
|
|
| 20 |
communicate = edge_tts.Communicate(text, voice, rate=rate, pitch=pitch)
|
|
|
|
|
|
|
| 21 |
await communicate.save(output_file)
|
|
|
|
| 22 |
return output_file
|
| 23 |
|
| 24 |
+
# 取得 Edge TTS 可用語音
|
| 25 |
async def get_voices():
|
|
|
|
|
|
|
|
|
|
| 26 |
voices = await edge_tts.list_voices()
|
|
|
|
| 27 |
return [voice["ShortName"] for voice in voices]
|
| 28 |
|
| 29 |
+
# 已儲存語音列表
|
| 30 |
+
def list_saved_audios():
|
| 31 |
+
files = sorted(os.listdir(AUDIO_DIR), reverse=True)
|
| 32 |
+
return [os.path.join(AUDIO_DIR, f) for f in files if f.endswith(".mp3")]
|
| 33 |
+
|
| 34 |
+
# 已儲存播客列表
|
| 35 |
+
def list_saved_podcasts():
|
| 36 |
+
files = sorted(os.listdir(PODCAST_DIR), reverse=True)
|
| 37 |
+
return [os.path.join(PODCAST_DIR, f) for f in files if f.endswith(".mp3")]
|
| 38 |
+
|
| 39 |
+
# Gradio 單段語音合成介面
|
| 40 |
async def tts_interface(text, voice, rate_percentage, pitch_hz):
|
|
|
|
| 41 |
rate = f"{'+' if rate_percentage >= 0 else ''}{rate_percentage}%"
|
| 42 |
pitch = f"{'+' if pitch_hz >= 0 else ''}{pitch_hz}Hz"
|
|
|
|
| 43 |
audio_path = await generate_speech(text, voice, rate, pitch)
|
| 44 |
return audio_path
|
| 45 |
|
| 46 |
+
# 播放已儲存語音
|
| 47 |
+
def play_saved_audio(audio_file):
|
| 48 |
+
return audio_file
|
| 49 |
+
|
| 50 |
+
# 播客製作:多段腳本合成並拼接、可插入背景音樂
|
| 51 |
+
async def podcast_produce(scripts, voice, rate_percentage, pitch_hz, bgm_file, podcast_title, podcast_desc):
|
| 52 |
+
rate = f"{'+' if rate_percentage >= 0 else ''}{rate_percentage}%"
|
| 53 |
+
pitch = f"{'+' if pitch_hz >= 0 else ''}{pitch_hz}Hz"
|
| 54 |
+
# 每段腳本合成語音
|
| 55 |
+
audio_segments = []
|
| 56 |
+
for idx, text in enumerate(scripts):
|
| 57 |
+
if text.strip():
|
| 58 |
+
temp_audio = generate_unique_filename(PODCAST_DIR, prefix=f"segment{idx}")
|
| 59 |
+
communicate = edge_tts.Communicate(text, voice, rate=rate, pitch=pitch)
|
| 60 |
+
await communicate.save(temp_audio)
|
| 61 |
+
audio_segments.append(AudioSegment.from_file(temp_audio))
|
| 62 |
+
os.remove(temp_audio)
|
| 63 |
+
# 拼接所有段落
|
| 64 |
+
podcast_audio = sum(audio_segments)
|
| 65 |
+
# 插入背景音樂(可選)
|
| 66 |
+
if bgm_file is not None and os.path.isfile(bgm_file):
|
| 67 |
+
bgm = AudioSegment.from_file(bgm_file).apply_gain(-10)
|
| 68 |
+
bgm = bgm[:len(podcast_audio)]
|
| 69 |
+
podcast_audio = podcast_audio.overlay(bgm)
|
| 70 |
+
# 儲存播客音檔
|
| 71 |
+
podcast_file = generate_unique_filename(PODCAST_DIR, prefix="podcast")
|
| 72 |
+
podcast_audio.export(podcast_file, format="mp3")
|
| 73 |
+
# 儲存元資料
|
| 74 |
+
meta_file = podcast_file.replace(".mp3", ".txt")
|
| 75 |
+
with open(meta_file, "w", encoding="utf-8") as f:
|
| 76 |
+
f.write(f"Title: {podcast_title}\nDescription: {podcast_desc}\n")
|
| 77 |
+
return podcast_file
|
| 78 |
+
|
| 79 |
+
# Gradio 主介面
|
| 80 |
async def main():
|
| 81 |
voices = await get_voices()
|
| 82 |
+
|
| 83 |
+
with gr.Blocks() as demo:
|
| 84 |
+
gr.Markdown("## Edge TTS 語音合成與播客製作")
|
| 85 |
+
with gr.Tab("語音合成"):
|
| 86 |
+
text_input = gr.Textbox(lines=5, label="輸入文本")
|
| 87 |
+
voice_input = gr.Dropdown(voices, label="選擇語音", value="zh-CN-XiaoxiaoNeural")
|
| 88 |
+
rate_input = gr.Slider(-50, 50, value=0, step=1, label="語速調整 (%)")
|
| 89 |
+
pitch_input = gr.Slider(-50, 50, value=0, step=1, label="音高調整 (Hz)")
|
| 90 |
+
tts_btn = gr.Button("生成語音")
|
| 91 |
+
audio_output = gr.Audio(type="filepath", label="生成的語音")
|
| 92 |
+
tts_btn.click(
|
| 93 |
+
fn=tts_interface,
|
| 94 |
+
inputs=[text_input, voice_input, rate_input, pitch_input],
|
| 95 |
+
outputs=audio_output
|
| 96 |
+
)
|
| 97 |
+
with gr.Tab("檢視已儲存語音"):
|
| 98 |
+
audio_files = list_saved_audios()
|
| 99 |
+
audio_dropdown = gr.Dropdown(audio_files, label="選擇已儲存語音檔案")
|
| 100 |
+
saved_audio_output = gr.Audio(type="filepath", label="播放已儲存語音")
|
| 101 |
+
audio_dropdown.change(
|
| 102 |
+
fn=play_saved_audio,
|
| 103 |
+
inputs=audio_dropdown,
|
| 104 |
+
outputs=saved_audio_output
|
| 105 |
+
)
|
| 106 |
+
with gr.Tab("播客製作"):
|
| 107 |
+
gr.Markdown("### 輸入多段腳本,選擇語音、可插入背景音樂,快速生成播客")
|
| 108 |
+
scripts_input = gr.Dataframe(headers=["段落內容"], datatype=["str"], row_count=3, col_count=1, label="播客腳本(每行一段)")
|
| 109 |
+
voice_input2 = gr.Dropdown(voices, label="選擇語音", value="zh-CN-XiaoxiaoNeural")
|
| 110 |
+
rate_input2 = gr.Slider(-50, 50, value=0, step=1, label="語速調整 (%)")
|
| 111 |
+
pitch_input2 = gr.Slider(-50, 50, value=0, step=1, label="音高調整 (Hz)")
|
| 112 |
+
bgm_input = gr.File(label="上傳背景音樂(可選)")
|
| 113 |
+
podcast_title = gr.Textbox(label="播客標題")
|
| 114 |
+
podcast_desc = gr.Textbox(label="播客描述")
|
| 115 |
+
podcast_btn = gr.Button("生成播客")
|
| 116 |
+
podcast_output = gr.Audio(type="filepath", label="生成的播客音檔")
|
| 117 |
+
podcast_btn.click(
|
| 118 |
+
fn=podcast_produce,
|
| 119 |
+
inputs=[scripts_input, voice_input2, rate_input2, pitch_input2, bgm_input, podcast_title, podcast_desc],
|
| 120 |
+
outputs=podcast_output
|
| 121 |
+
)
|
| 122 |
+
with gr.Tab("檢視已儲存播客"):
|
| 123 |
+
podcast_files = list_saved_podcasts()
|
| 124 |
+
podcast_dropdown = gr.Dropdown(podcast_files, label="選擇已儲存播客檔案")
|
| 125 |
+
saved_podcast_output = gr.Audio(type="filepath", label="播放已儲存播客")
|
| 126 |
+
podcast_dropdown.change(
|
| 127 |
+
fn=play_saved_audio,
|
| 128 |
+
inputs=podcast_dropdown,
|
| 129 |
+
outputs=saved_podcast_output
|
| 130 |
+
)
|
| 131 |
+
|
| 132 |
+
demo.launch()
|
| 133 |
|
| 134 |
if __name__ == "__main__":
|
| 135 |
asyncio.run(main())
|