Update app.py
Browse files
app.py
CHANGED
|
@@ -6,9 +6,7 @@ from datetime import datetime
|
|
| 6 |
from pydub import AudioSegment
|
| 7 |
|
| 8 |
AUDIO_DIR = "saved_audios"
|
| 9 |
-
PODCAST_DIR = "podcast_audios"
|
| 10 |
os.makedirs(AUDIO_DIR, exist_ok=True)
|
| 11 |
-
os.makedirs(PODCAST_DIR, exist_ok=True)
|
| 12 |
|
| 13 |
def generate_unique_filename(folder, prefix="audio", ext="mp3"):
|
| 14 |
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S_%f")
|
|
@@ -28,10 +26,6 @@ def list_saved_audios():
|
|
| 28 |
files = sorted(os.listdir(AUDIO_DIR), reverse=True)
|
| 29 |
return [os.path.join(AUDIO_DIR, f) for f in files if f.endswith(".mp3")]
|
| 30 |
|
| 31 |
-
def list_saved_podcasts():
|
| 32 |
-
files = sorted(os.listdir(PODCAST_DIR), reverse=True)
|
| 33 |
-
return [os.path.join(PODCAST_DIR, f) for f in files if f.endswith(".mp3")]
|
| 34 |
-
|
| 35 |
async def tts_interface(text, voice, rate_percentage, pitch_hz):
|
| 36 |
rate = f"{'+' if rate_percentage >= 0 else ''}{rate_percentage}%"
|
| 37 |
pitch = f"{'+' if pitch_hz >= 0 else ''}{pitch_hz}Hz"
|
|
@@ -41,53 +35,12 @@ async def tts_interface(text, voice, rate_percentage, pitch_hz):
|
|
| 41 |
def play_saved_audio(audio_file):
|
| 42 |
return audio_file
|
| 43 |
|
| 44 |
-
async def podcast_produce(scripts, voice, rate_percentage, pitch_hz, bgm_file, podcast_title, podcast_desc):
|
| 45 |
-
rate = f"{'+' if rate_percentage >= 0 else ''}{rate_percentage}%"
|
| 46 |
-
pitch = f"{'+' if pitch_hz >= 0 else ''}{pitch_hz}Hz"
|
| 47 |
-
audio_segments = []
|
| 48 |
-
for idx, text in enumerate(scripts):
|
| 49 |
-
if text.strip():
|
| 50 |
-
temp_audio = generate_unique_filename(PODCAST_DIR, prefix=f"segment{idx}")
|
| 51 |
-
communicate = edge_tts.Communicate(text, voice, rate=rate, pitch=pitch)
|
| 52 |
-
await communicate.save(temp_audio)
|
| 53 |
-
audio_segments.append(AudioSegment.from_file(temp_audio))
|
| 54 |
-
os.remove(temp_audio)
|
| 55 |
-
if not audio_segments:
|
| 56 |
-
return None
|
| 57 |
-
podcast_audio = sum(audio_segments)
|
| 58 |
-
if bgm_file is not None and hasattr(bgm_file, "name") and os.path.isfile(bgm_file.name):
|
| 59 |
-
bgm = AudioSegment.from_file(bgm_file.name).apply_gain(-10)
|
| 60 |
-
bgm = bgm[:len(podcast_audio)]
|
| 61 |
-
podcast_audio = podcast_audio.overlay(bgm)
|
| 62 |
-
podcast_file = generate_unique_filename(PODCAST_DIR, prefix="podcast")
|
| 63 |
-
podcast_audio.export(podcast_file, format="mp3")
|
| 64 |
-
meta_file = podcast_file.replace(".mp3", ".txt")
|
| 65 |
-
with open(meta_file, "w", encoding="utf-8") as f:
|
| 66 |
-
f.write(f"Title: {podcast_title}\nDescription: {podcast_desc}\n")
|
| 67 |
-
return podcast_file
|
| 68 |
-
|
| 69 |
def clear_textbox():
|
| 70 |
return ""
|
| 71 |
|
| 72 |
-
def clear_paragraphs():
|
| 73 |
-
return [""]
|
| 74 |
-
|
| 75 |
-
def add_paragraph(paragraphs):
|
| 76 |
-
return paragraphs + [""]
|
| 77 |
-
|
| 78 |
-
def remove_paragraph(paragraphs):
|
| 79 |
-
if len(paragraphs) > 1:
|
| 80 |
-
return paragraphs[:-1]
|
| 81 |
-
else:
|
| 82 |
-
return paragraphs
|
| 83 |
-
|
| 84 |
-
def render_paragraphs(paragraphs):
|
| 85 |
-
return [gr.Textbox(value=p, label=f"段落{i+1}內容", lines=3, interactive=True) for i, p in enumerate(paragraphs)]
|
| 86 |
-
|
| 87 |
with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
| 88 |
-
gr.Markdown("## 🎙️ Edge TTS
|
| 89 |
|
| 90 |
-
# 語音合成分頁
|
| 91 |
with gr.Tab("語音合成"):
|
| 92 |
with gr.Row():
|
| 93 |
text_input = gr.Textbox(lines=5, label="輸入文本")
|
|
@@ -104,90 +57,15 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
|
| 104 |
)
|
| 105 |
clear_btn.click(fn=clear_textbox, outputs=text_input)
|
| 106 |
|
| 107 |
-
# 已存語音分頁
|
| 108 |
with gr.Tab("檢視已儲存語音"):
|
| 109 |
audio_files = gr.Dropdown(list_saved_audios(), label="選擇已儲存語音檔案", interactive=True)
|
| 110 |
saved_audio_output = gr.Audio(type="filepath", label="播放已儲存語音")
|
| 111 |
audio_files.change(fn=play_saved_audio, inputs=audio_files, outputs=saved_audio_output)
|
| 112 |
|
| 113 |
-
# 播客製作分頁
|
| 114 |
-
with gr.Tab("播客製作"):
|
| 115 |
-
gr.Markdown("### 📝 多段腳本輸入(可自由增減段落)")
|
| 116 |
-
paragraphs_state = gr.State([""])
|
| 117 |
-
|
| 118 |
-
# 用 gr.Column 重新渲染段落
|
| 119 |
-
paragraph_column = gr.Column()
|
| 120 |
-
# 初始化段落
|
| 121 |
-
paragraph_boxes = render_paragraphs([""])
|
| 122 |
-
for tb in paragraph_boxes:
|
| 123 |
-
paragraph_column.children += (tb,)
|
| 124 |
-
|
| 125 |
-
add_btn = gr.Button("新增段落")
|
| 126 |
-
remove_btn = gr.Button("刪除段落")
|
| 127 |
-
clear_all_btn = gr.Button("全部清空")
|
| 128 |
-
|
| 129 |
-
def update_paragraph_ui(paragraphs):
|
| 130 |
-
paragraph_column.children = tuple(render_paragraphs(paragraphs))
|
| 131 |
-
return gr.update()
|
| 132 |
-
|
| 133 |
-
add_btn.click(
|
| 134 |
-
lambda p: (add_paragraph(p), update_paragraph_ui(add_paragraph(p))),
|
| 135 |
-
inputs=paragraphs_state,
|
| 136 |
-
outputs=[paragraphs_state, paragraph_column]
|
| 137 |
-
)
|
| 138 |
-
remove_btn.click(
|
| 139 |
-
lambda p: (remove_paragraph(p), update_paragraph_ui(remove_paragraph(p))),
|
| 140 |
-
inputs=paragraphs_state,
|
| 141 |
-
outputs=[paragraphs_state, paragraph_column]
|
| 142 |
-
)
|
| 143 |
-
clear_all_btn.click(
|
| 144 |
-
lambda: (clear_paragraphs(), update_paragraph_ui(clear_paragraphs())),
|
| 145 |
-
outputs=[paragraphs_state, paragraph_column]
|
| 146 |
-
)
|
| 147 |
-
|
| 148 |
-
voice_input2 = gr.Dropdown([], label="選擇語音")
|
| 149 |
-
rate_input2 = gr.Slider(-50, 50, value=0, step=1, label="語速調整 (%)")
|
| 150 |
-
pitch_input2 = gr.Slider(-50, 50, value=0, step=1, label="音高調整 (Hz)")
|
| 151 |
-
bgm_input = gr.File(label="上傳背景音樂(可選)")
|
| 152 |
-
podcast_title = gr.Textbox(label="播客標題")
|
| 153 |
-
podcast_desc = gr.Textbox(label="播客描述")
|
| 154 |
-
podcast_btn = gr.Button("生成播客")
|
| 155 |
-
podcast_output = gr.Audio(type="filepath", label="生成的播客音檔")
|
| 156 |
-
|
| 157 |
-
def on_podcast_btn_click(*args):
|
| 158 |
-
n = len(paragraph_column.children)
|
| 159 |
-
scripts = list(args[:n])
|
| 160 |
-
voice = args[n]
|
| 161 |
-
rate = args[n+1]
|
| 162 |
-
pitch = args[n+2]
|
| 163 |
-
bgm = args[n+3]
|
| 164 |
-
title = args[n+4]
|
| 165 |
-
desc = args[n+5]
|
| 166 |
-
return asyncio.run(podcast_produce(scripts, voice, rate, pitch, bgm, title, desc))
|
| 167 |
-
|
| 168 |
-
# 收集 paragraph_column.children 作為 inputs
|
| 169 |
-
def get_inputs():
|
| 170 |
-
return list(paragraph_column.children) + [voice_input2, rate_input2, pitch_input2, bgm_input, podcast_title, podcast_desc]
|
| 171 |
-
|
| 172 |
-
podcast_btn.click(
|
| 173 |
-
fn=on_podcast_btn_click,
|
| 174 |
-
inputs=get_inputs(),
|
| 175 |
-
outputs=podcast_output
|
| 176 |
-
)
|
| 177 |
-
|
| 178 |
-
# 已存播客分頁
|
| 179 |
-
with gr.Tab("檢視已儲存播客"):
|
| 180 |
-
podcast_files = gr.Dropdown(list_saved_podcasts(), label="選擇已儲存播客檔案", interactive=True)
|
| 181 |
-
saved_podcast_output = gr.Audio(type="filepath", label="播放已儲存播客")
|
| 182 |
-
podcast_files.change(fn=play_saved_audio, inputs=podcast_files, outputs=saved_podcast_output)
|
| 183 |
-
|
| 184 |
-
# 語音清單初始化
|
| 185 |
async def init_voices():
|
| 186 |
voices = await get_voices()
|
| 187 |
voice_input.choices = voices
|
| 188 |
voice_input.value = "zh-CN-XiaoxiaoNeural" if "zh-CN-XiaoxiaoNeural" in voices else voices[0]
|
| 189 |
-
voice_input2.choices = voices
|
| 190 |
-
voice_input2.value = "zh-CN-XiaoxiaoNeural" if "zh-CN-XiaoxiaoNeural" in voices else voices[0]
|
| 191 |
asyncio.get_event_loop().run_until_complete(init_voices())
|
| 192 |
|
| 193 |
demo.launch()
|
|
|
|
| 6 |
from pydub import AudioSegment
|
| 7 |
|
| 8 |
AUDIO_DIR = "saved_audios"
|
|
|
|
| 9 |
os.makedirs(AUDIO_DIR, exist_ok=True)
|
|
|
|
| 10 |
|
| 11 |
def generate_unique_filename(folder, prefix="audio", ext="mp3"):
|
| 12 |
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S_%f")
|
|
|
|
| 26 |
files = sorted(os.listdir(AUDIO_DIR), reverse=True)
|
| 27 |
return [os.path.join(AUDIO_DIR, f) for f in files if f.endswith(".mp3")]
|
| 28 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 29 |
async def tts_interface(text, voice, rate_percentage, pitch_hz):
|
| 30 |
rate = f"{'+' if rate_percentage >= 0 else ''}{rate_percentage}%"
|
| 31 |
pitch = f"{'+' if pitch_hz >= 0 else ''}{pitch_hz}Hz"
|
|
|
|
| 35 |
def play_saved_audio(audio_file):
|
| 36 |
return audio_file
|
| 37 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 38 |
def clear_textbox():
|
| 39 |
return ""
|
| 40 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 41 |
with gr.Blocks(theme=gr.themes.Soft()) as demo:
|
| 42 |
+
gr.Markdown("## 🎙️ Edge TTS 語音合成工具\n\n- 支援語音合成、語音檔自動儲存與播放\n- 介面簡潔、操作直覺")
|
| 43 |
|
|
|
|
| 44 |
with gr.Tab("語音合成"):
|
| 45 |
with gr.Row():
|
| 46 |
text_input = gr.Textbox(lines=5, label="輸入文本")
|
|
|
|
| 57 |
)
|
| 58 |
clear_btn.click(fn=clear_textbox, outputs=text_input)
|
| 59 |
|
|
|
|
| 60 |
with gr.Tab("檢視已儲存語音"):
|
| 61 |
audio_files = gr.Dropdown(list_saved_audios(), label="選擇已儲存語音檔案", interactive=True)
|
| 62 |
saved_audio_output = gr.Audio(type="filepath", label="播放已儲存語音")
|
| 63 |
audio_files.change(fn=play_saved_audio, inputs=audio_files, outputs=saved_audio_output)
|
| 64 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 65 |
async def init_voices():
|
| 66 |
voices = await get_voices()
|
| 67 |
voice_input.choices = voices
|
| 68 |
voice_input.value = "zh-CN-XiaoxiaoNeural" if "zh-CN-XiaoxiaoNeural" in voices else voices[0]
|
|
|
|
|
|
|
| 69 |
asyncio.get_event_loop().run_until_complete(init_voices())
|
| 70 |
|
| 71 |
demo.launch()
|