Ryanus commited on
Commit
253b483
·
verified ·
1 Parent(s): 33650f8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +109 -43
app.py CHANGED
@@ -2,68 +2,134 @@ import asyncio
2
  import edge_tts
3
  import gradio as gr
4
  import os
 
 
5
 
6
- # 定義一個異步函數來生成語音
7
- async def generate_speech(text: str, voice: str, rate: str, pitch: str) -> str:
8
- """
9
- 使用 Edge TTS 生成語音並保存為音頻文件。
10
 
11
- Args:
12
- text (str): 要轉換為語音的文本。
13
- voice (str): 要使用的語音名稱 (例如 "zh-CN-XiaoxiaoNeural")
14
- rate (str): 語速調整 (例如 "+0%", "-10%", "+20%")。
15
- pitch (str): 音高調整 (例如 "+0Hz", "-5Hz", "+10Hz")。
16
 
17
- Returns:
18
- str: 生成的音頻文件的路徑。
19
- """
20
- output_file = "output.mp3"
21
-
22
- # 構建 Edge TTS 命令
23
  communicate = edge_tts.Communicate(text, voice, rate=rate, pitch=pitch)
24
-
25
- # 將語音寫入文件
26
  await communicate.save(output_file)
27
-
28
  return output_file
29
 
30
- # 獲取可用的語音列表
31
  async def get_voices():
32
- """
33
- 獲取 Edge TTS 可用的語音列表。
34
- """
35
  voices = await edge_tts.list_voices()
36
- # 修正這裡,使用字典鍵 'ShortName' 來訪問語音名稱
37
  return [voice["ShortName"] for voice in voices]
38
 
39
- # Gradio 介面函數
 
 
 
 
 
 
 
 
 
 
40
  async def tts_interface(text, voice, rate_percentage, pitch_hz):
41
- # 確保 rate 和 pitch 始終包含符號
42
  rate = f"{'+' if rate_percentage >= 0 else ''}{rate_percentage}%"
43
  pitch = f"{'+' if pitch_hz >= 0 else ''}{pitch_hz}Hz"
44
-
45
  audio_path = await generate_speech(text, voice, rate, pitch)
46
  return audio_path
47
 
48
- # 啟動 Gradio 介面
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
49
  async def main():
50
  voices = await get_voices()
51
-
52
- iface = gr.Interface(
53
- fn=tts_interface,
54
- inputs=[
55
- gr.Textbox(lines=5, label="輸入文本"),
56
- gr.Dropdown(voices, label="選擇語音", value="zh-CN-XiaoxiaoNeural"), # 預設中文語音
57
- gr.Slider(minimum=-50, maximum=50, value=0, step=1, label="語速調整 (%)"),
58
- gr.Slider(minimum=-50, maximum=50, value=0, step=1, label="音高調整 (Hz)")
59
- ],
60
- outputs=gr.Audio(type="filepath", label="生成的語音"),
61
- title="Edge TTS 語音合成",
62
- description="輸入文本,選擇語音,調整語速和音高,然後生成語音。",
63
- allow_flagging="never"
64
- )
65
-
66
- iface.launch()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
67
 
68
  if __name__ == "__main__":
69
  asyncio.run(main())
 
2
  import edge_tts
3
  import gradio as gr
4
  import os
5
+ from datetime import datetime
6
+ from pydub import AudioSegment
7
 
8
+ AUDIO_DIR = "saved_audios"
9
+ PODCAST_DIR = "podcast_audios"
10
+ os.makedirs(AUDIO_DIR, exist_ok=True)
11
+ os.makedirs(PODCAST_DIR, exist_ok=True)
12
 
13
+ def generate_unique_filename(folder, prefix="audio", ext="mp3"):
14
+ timestamp = datetime.now().strftime("%Y%m%d_%H%M%S_%f")
15
+ return os.path.join(folder, f"{prefix}_{timestamp}.{ext}")
 
 
16
 
17
+ # 單段語音合成並自動儲存
18
+ async def generate_speech(text, voice, rate, pitch):
19
+ output_file = generate_unique_filename(AUDIO_DIR)
 
 
 
20
  communicate = edge_tts.Communicate(text, voice, rate=rate, pitch=pitch)
 
 
21
  await communicate.save(output_file)
 
22
  return output_file
23
 
24
+ # 取得 Edge TTS 可用語音
25
  async def get_voices():
 
 
 
26
  voices = await edge_tts.list_voices()
 
27
  return [voice["ShortName"] for voice in voices]
28
 
29
+ # 已儲存語音列表
30
+ def list_saved_audios():
31
+ files = sorted(os.listdir(AUDIO_DIR), reverse=True)
32
+ return [os.path.join(AUDIO_DIR, f) for f in files if f.endswith(".mp3")]
33
+
34
+ # 已儲存播客列表
35
+ def list_saved_podcasts():
36
+ files = sorted(os.listdir(PODCAST_DIR), reverse=True)
37
+ return [os.path.join(PODCAST_DIR, f) for f in files if f.endswith(".mp3")]
38
+
39
+ # Gradio 單段語音合成介面
40
  async def tts_interface(text, voice, rate_percentage, pitch_hz):
 
41
  rate = f"{'+' if rate_percentage >= 0 else ''}{rate_percentage}%"
42
  pitch = f"{'+' if pitch_hz >= 0 else ''}{pitch_hz}Hz"
 
43
  audio_path = await generate_speech(text, voice, rate, pitch)
44
  return audio_path
45
 
46
+ # 播放已儲存語音
47
+ def play_saved_audio(audio_file):
48
+ return audio_file
49
+
50
+ # 播客製作:多段腳本合成並拼接、可插入背景音樂
51
+ async def podcast_produce(scripts, voice, rate_percentage, pitch_hz, bgm_file, podcast_title, podcast_desc):
52
+ rate = f"{'+' if rate_percentage >= 0 else ''}{rate_percentage}%"
53
+ pitch = f"{'+' if pitch_hz >= 0 else ''}{pitch_hz}Hz"
54
+ # 每段腳本合成語音
55
+ audio_segments = []
56
+ for idx, text in enumerate(scripts):
57
+ if text.strip():
58
+ temp_audio = generate_unique_filename(PODCAST_DIR, prefix=f"segment{idx}")
59
+ communicate = edge_tts.Communicate(text, voice, rate=rate, pitch=pitch)
60
+ await communicate.save(temp_audio)
61
+ audio_segments.append(AudioSegment.from_file(temp_audio))
62
+ os.remove(temp_audio)
63
+ # 拼接所有段落
64
+ podcast_audio = sum(audio_segments)
65
+ # 插入背景音樂(可選)
66
+ if bgm_file is not None and os.path.isfile(bgm_file):
67
+ bgm = AudioSegment.from_file(bgm_file).apply_gain(-10)
68
+ bgm = bgm[:len(podcast_audio)]
69
+ podcast_audio = podcast_audio.overlay(bgm)
70
+ # 儲存播客音檔
71
+ podcast_file = generate_unique_filename(PODCAST_DIR, prefix="podcast")
72
+ podcast_audio.export(podcast_file, format="mp3")
73
+ # 儲存元資料
74
+ meta_file = podcast_file.replace(".mp3", ".txt")
75
+ with open(meta_file, "w", encoding="utf-8") as f:
76
+ f.write(f"Title: {podcast_title}\nDescription: {podcast_desc}\n")
77
+ return podcast_file
78
+
79
+ # Gradio 主介面
80
  async def main():
81
  voices = await get_voices()
82
+
83
+ with gr.Blocks() as demo:
84
+ gr.Markdown("## Edge TTS 語音合成與播客製作")
85
+ with gr.Tab("語音合成"):
86
+ text_input = gr.Textbox(lines=5, label="輸入文本")
87
+ voice_input = gr.Dropdown(voices, label="選擇語音", value="zh-CN-XiaoxiaoNeural")
88
+ rate_input = gr.Slider(-50, 50, value=0, step=1, label="語速調整 (%)")
89
+ pitch_input = gr.Slider(-50, 50, value=0, step=1, label="音高調整 (Hz)")
90
+ tts_btn = gr.Button("生成語音")
91
+ audio_output = gr.Audio(type="filepath", label="生成的語音")
92
+ tts_btn.click(
93
+ fn=tts_interface,
94
+ inputs=[text_input, voice_input, rate_input, pitch_input],
95
+ outputs=audio_output
96
+ )
97
+ with gr.Tab("檢視已儲存語音"):
98
+ audio_files = list_saved_audios()
99
+ audio_dropdown = gr.Dropdown(audio_files, label="選擇已儲存語音檔案")
100
+ saved_audio_output = gr.Audio(type="filepath", label="播放已儲存語音")
101
+ audio_dropdown.change(
102
+ fn=play_saved_audio,
103
+ inputs=audio_dropdown,
104
+ outputs=saved_audio_output
105
+ )
106
+ with gr.Tab("播客製作"):
107
+ gr.Markdown("### 輸入多段腳本,選擇語音、可插入背景音樂,快速生成播客")
108
+ scripts_input = gr.Dataframe(headers=["段落內容"], datatype=["str"], row_count=3, col_count=1, label="播客腳本(每行一段)")
109
+ voice_input2 = gr.Dropdown(voices, label="選擇語音", value="zh-CN-XiaoxiaoNeural")
110
+ rate_input2 = gr.Slider(-50, 50, value=0, step=1, label="語速調整 (%)")
111
+ pitch_input2 = gr.Slider(-50, 50, value=0, step=1, label="音高調整 (Hz)")
112
+ bgm_input = gr.File(label="上傳背景音樂(可選)")
113
+ podcast_title = gr.Textbox(label="播客標題")
114
+ podcast_desc = gr.Textbox(label="播客描述")
115
+ podcast_btn = gr.Button("生成播客")
116
+ podcast_output = gr.Audio(type="filepath", label="生成的播客音檔")
117
+ podcast_btn.click(
118
+ fn=podcast_produce,
119
+ inputs=[scripts_input, voice_input2, rate_input2, pitch_input2, bgm_input, podcast_title, podcast_desc],
120
+ outputs=podcast_output
121
+ )
122
+ with gr.Tab("檢視已儲存播客"):
123
+ podcast_files = list_saved_podcasts()
124
+ podcast_dropdown = gr.Dropdown(podcast_files, label="選擇已儲存播客檔案")
125
+ saved_podcast_output = gr.Audio(type="filepath", label="播放已儲存播客")
126
+ podcast_dropdown.change(
127
+ fn=play_saved_audio,
128
+ inputs=podcast_dropdown,
129
+ outputs=saved_podcast_output
130
+ )
131
+
132
+ demo.launch()
133
 
134
  if __name__ == "__main__":
135
  asyncio.run(main())