Spaces:

Ryanus
/

EdgeTTS

Sleeping

App Files Files Community

Ryanus commited on Jul 7, 2025

Commit

579e57f

verified ·

1 Parent(s): 37f5752

Update app.py

Browse files

Files changed (1) hide show

app.py +1 -123

app.py CHANGED Viewed

@@ -6,9 +6,7 @@ from datetime import datetime
 from pydub import AudioSegment
 AUDIO_DIR = "saved_audios"
-PODCAST_DIR = "podcast_audios"
 os.makedirs(AUDIO_DIR, exist_ok=True)
-os.makedirs(PODCAST_DIR, exist_ok=True)
 def generate_unique_filename(folder, prefix="audio", ext="mp3"):
     timestamp = datetime.now().strftime("%Y%m%d_%H%M%S_%f")
@@ -28,10 +26,6 @@ def list_saved_audios():
     files = sorted(os.listdir(AUDIO_DIR), reverse=True)
     return [os.path.join(AUDIO_DIR, f) for f in files if f.endswith(".mp3")]
-def list_saved_podcasts():
-    files = sorted(os.listdir(PODCAST_DIR), reverse=True)
-    return [os.path.join(PODCAST_DIR, f) for f in files if f.endswith(".mp3")]
 async def tts_interface(text, voice, rate_percentage, pitch_hz):
     rate = f"{'+' if rate_percentage >= 0 else ''}{rate_percentage}%"
     pitch = f"{'+' if pitch_hz >= 0 else ''}{pitch_hz}Hz"
@@ -41,53 +35,12 @@ async def tts_interface(text, voice, rate_percentage, pitch_hz):
 def play_saved_audio(audio_file):
     return audio_file
-async def podcast_produce(scripts, voice, rate_percentage, pitch_hz, bgm_file, podcast_title, podcast_desc):
-    rate = f"{'+' if rate_percentage >= 0 else ''}{rate_percentage}%"
-    pitch = f"{'+' if pitch_hz >= 0 else ''}{pitch_hz}Hz"
-    audio_segments = []
-    for idx, text in enumerate(scripts):
-        if text.strip():
-            temp_audio = generate_unique_filename(PODCAST_DIR, prefix=f"segment{idx}")
-            communicate = edge_tts.Communicate(text, voice, rate=rate, pitch=pitch)
-            await communicate.save(temp_audio)
-            audio_segments.append(AudioSegment.from_file(temp_audio))
-            os.remove(temp_audio)
-    if not audio_segments:
-        return None
-    podcast_audio = sum(audio_segments)
-    if bgm_file is not None and hasattr(bgm_file, "name") and os.path.isfile(bgm_file.name):
-        bgm = AudioSegment.from_file(bgm_file.name).apply_gain(-10)
-        bgm = bgm[:len(podcast_audio)]
-        podcast_audio = podcast_audio.overlay(bgm)
-    podcast_file = generate_unique_filename(PODCAST_DIR, prefix="podcast")
-    podcast_audio.export(podcast_file, format="mp3")
-    meta_file = podcast_file.replace(".mp3", ".txt")
-    with open(meta_file, "w", encoding="utf-8") as f:
-        f.write(f"Title: {podcast_title}\nDescription: {podcast_desc}\n")
-    return podcast_file
 def clear_textbox():
     return ""
-def clear_paragraphs():
-    return [""]
-def add_paragraph(paragraphs):
-    return paragraphs + [""]
-def remove_paragraph(paragraphs):
-    if len(paragraphs) > 1:
-        return paragraphs[:-1]
-    else:
-        return paragraphs
-def render_paragraphs(paragraphs):
-    return [gr.Textbox(value=p, label=f"段落{i+1}內容", lines=3, interactive=True) for i, p in enumerate(paragraphs)]
 with gr.Blocks(theme=gr.themes.Soft()) as demo:
-    gr.Markdown("## 🎙️ Edge TTS 語音合成與播客製作\n\n- 多段腳本自由增減、內容可清空\n- 介面直覺、操作友善、檔案自動管理")
-    # 語音合成分頁
     with gr.Tab("語音合成"):
         with gr.Row():
             text_input = gr.Textbox(lines=5, label="輸入文本")
@@ -104,90 +57,15 @@ with gr.Blocks(theme=gr.themes.Soft()) as demo:
         )
         clear_btn.click(fn=clear_textbox, outputs=text_input)
-    # 已存語音分頁
     with gr.Tab("檢視已儲存語音"):
         audio_files = gr.Dropdown(list_saved_audios(), label="選擇已儲存語音檔案", interactive=True)
         saved_audio_output = gr.Audio(type="filepath", label="播放已儲存語音")
         audio_files.change(fn=play_saved_audio, inputs=audio_files, outputs=saved_audio_output)
-    # 播客製作分頁
-    with gr.Tab("播客製作"):
-        gr.Markdown("### 📝 多段腳本輸入（可自由增減段落）")
-        paragraphs_state = gr.State([""])
-        # 用 gr.Column 重新渲染段落
-        paragraph_column = gr.Column()
-        # 初始化段落
-        paragraph_boxes = render_paragraphs([""])
-        for tb in paragraph_boxes:
-            paragraph_column.children += (tb,)
-        add_btn = gr.Button("新增段落")
-        remove_btn = gr.Button("刪除段落")
-        clear_all_btn = gr.Button("全部清空")
-        def update_paragraph_ui(paragraphs):
-            paragraph_column.children = tuple(render_paragraphs(paragraphs))
-            return gr.update()
-        add_btn.click(
-            lambda p: (add_paragraph(p), update_paragraph_ui(add_paragraph(p))),
-            inputs=paragraphs_state,
-            outputs=[paragraphs_state, paragraph_column]
-        )
-        remove_btn.click(
-            lambda p: (remove_paragraph(p), update_paragraph_ui(remove_paragraph(p))),
-            inputs=paragraphs_state,
-            outputs=[paragraphs_state, paragraph_column]
-        )
-        clear_all_btn.click(
-            lambda: (clear_paragraphs(), update_paragraph_ui(clear_paragraphs())),
-            outputs=[paragraphs_state, paragraph_column]
-        )
-        voice_input2 = gr.Dropdown([], label="選擇語音")
-        rate_input2 = gr.Slider(-50, 50, value=0, step=1, label="語速調整 (%)")
-        pitch_input2 = gr.Slider(-50, 50, value=0, step=1, label="音高調整 (Hz)")
-        bgm_input = gr.File(label="上傳背景音樂（可選）")
-        podcast_title = gr.Textbox(label="播客標題")
-        podcast_desc = gr.Textbox(label="播客描述")
-        podcast_btn = gr.Button("生成播客")
-        podcast_output = gr.Audio(type="filepath", label="生成的播客音檔")
-        def on_podcast_btn_click(*args):
-            n = len(paragraph_column.children)
-            scripts = list(args[:n])
-            voice = args[n]
-            rate = args[n+1]
-            pitch = args[n+2]
-            bgm = args[n+3]
-            title = args[n+4]
-            desc = args[n+5]
-            return asyncio.run(podcast_produce(scripts, voice, rate, pitch, bgm, title, desc))
-        # 收集 paragraph_column.children 作為 inputs
-        def get_inputs():
-            return list(paragraph_column.children) + [voice_input2, rate_input2, pitch_input2, bgm_input, podcast_title, podcast_desc]
-        podcast_btn.click(
-            fn=on_podcast_btn_click,
-            inputs=get_inputs(),
-            outputs=podcast_output
-        )
-    # 已存播客分頁
-    with gr.Tab("檢視已儲存播客"):
-        podcast_files = gr.Dropdown(list_saved_podcasts(), label="選擇已儲存播客檔案", interactive=True)
-        saved_podcast_output = gr.Audio(type="filepath", label="播放已儲存播客")
-        podcast_files.change(fn=play_saved_audio, inputs=podcast_files, outputs=saved_podcast_output)
-    # 語音清單初始化
     async def init_voices():
         voices = await get_voices()
         voice_input.choices = voices
         voice_input.value = "zh-CN-XiaoxiaoNeural" if "zh-CN-XiaoxiaoNeural" in voices else voices[0]
-        voice_input2.choices = voices
-        voice_input2.value = "zh-CN-XiaoxiaoNeural" if "zh-CN-XiaoxiaoNeural" in voices else voices[0]
     asyncio.get_event_loop().run_until_complete(init_voices())
 demo.launch()

 from pydub import AudioSegment
 AUDIO_DIR = "saved_audios"
 os.makedirs(AUDIO_DIR, exist_ok=True)
 def generate_unique_filename(folder, prefix="audio", ext="mp3"):
     timestamp = datetime.now().strftime("%Y%m%d_%H%M%S_%f")
     files = sorted(os.listdir(AUDIO_DIR), reverse=True)
     return [os.path.join(AUDIO_DIR, f) for f in files if f.endswith(".mp3")]
 async def tts_interface(text, voice, rate_percentage, pitch_hz):
     rate = f"{'+' if rate_percentage >= 0 else ''}{rate_percentage}%"
     pitch = f"{'+' if pitch_hz >= 0 else ''}{pitch_hz}Hz"
 def play_saved_audio(audio_file):
     return audio_file
 def clear_textbox():
     return ""
 with gr.Blocks(theme=gr.themes.Soft()) as demo:
+    gr.Markdown("## 🎙️ Edge TTS 語音合成工具\n\n- 支援語音合成、語音檔自動儲存與播放\n- 介面簡潔、操作直覺")
     with gr.Tab("語音合成"):
         with gr.Row():
             text_input = gr.Textbox(lines=5, label="輸入文本")
         )
         clear_btn.click(fn=clear_textbox, outputs=text_input)
     with gr.Tab("檢視已儲存語音"):
         audio_files = gr.Dropdown(list_saved_audios(), label="選擇已儲存語音檔案", interactive=True)
         saved_audio_output = gr.Audio(type="filepath", label="播放已儲存語音")
         audio_files.change(fn=play_saved_audio, inputs=audio_files, outputs=saved_audio_output)
     async def init_voices():
         voices = await get_voices()
         voice_input.choices = voices
         voice_input.value = "zh-CN-XiaoxiaoNeural" if "zh-CN-XiaoxiaoNeural" in voices else voices[0]
     asyncio.get_event_loop().run_until_complete(init_voices())
 demo.launch()