Spaces:

Ryanus
/

EdgeTTS

Runtime error

App Files Files Community

EdgeTTS / app.py

Ryanus

Update app.py

dcedafb verified 10 months ago

raw

history blame

8.36 kB

	import asyncio
	import edge_tts
	import gradio as gr
	import os
	from datetime import datetime
	from pydub import AudioSegment

	AUDIO_DIR = "saved_audios"
	PODCAST_DIR = "podcast_audios"
	os.makedirs(AUDIO_DIR, exist_ok=True)
	os.makedirs(PODCAST_DIR, exist_ok=True)

	def generate_unique_filename(folder, prefix="audio", ext="mp3"):
	timestamp = datetime.now().strftime("%Y%m%d_%H%M%S_%f")
	return os.path.join(folder, f"{prefix}_{timestamp}.{ext}")

	async def generate_speech(text, voice, rate, pitch, folder=AUDIO_DIR):
	output_file = generate_unique_filename(folder)
	communicate = edge_tts.Communicate(text, voice, rate=rate, pitch=pitch)
	await communicate.save(output_file)
	return output_file

	async def get_voices():
	voices = await edge_tts.list_voices()
	return [voice["ShortName"] for voice in voices]

	def list_saved_audios():
	files = sorted(os.listdir(AUDIO_DIR), reverse=True)
	return [os.path.join(AUDIO_DIR, f) for f in files if f.endswith(".mp3")]

	def list_saved_podcasts():
	files = sorted(os.listdir(PODCAST_DIR), reverse=True)
	return [os.path.join(PODCAST_DIR, f) for f in files if f.endswith(".mp3")]

	async def tts_interface(text, voice, rate_percentage, pitch_hz):
	rate = f"{'+' if rate_percentage >= 0 else ''}{rate_percentage}%"
	pitch = f"{'+' if pitch_hz >= 0 else ''}{pitch_hz}Hz"
	audio_path = await generate_speech(text, voice, rate, pitch)
	return audio_path

	def play_saved_audio(audio_file):
	return audio_file

	async def podcast_produce(scripts, voice, rate_percentage, pitch_hz, bgm_file, podcast_title, podcast_desc):
	rate = f"{'+' if rate_percentage >= 0 else ''}{rate_percentage}%"
	pitch = f"{'+' if pitch_hz >= 0 else ''}{pitch_hz}Hz"
	audio_segments = []
	for idx, text in enumerate(scripts):
	if text.strip():
	temp_audio = generate_unique_filename(PODCAST_DIR, prefix=f"segment{idx}")
	communicate = edge_tts.Communicate(text, voice, rate=rate, pitch=pitch)
	await communicate.save(temp_audio)
	audio_segments.append(AudioSegment.from_file(temp_audio))
	os.remove(temp_audio)
	if not audio_segments:
	return None
	podcast_audio = sum(audio_segments)
	if bgm_file is not None and hasattr(bgm_file, "name") and os.path.isfile(bgm_file.name):
	bgm = AudioSegment.from_file(bgm_file.name).apply_gain(-10)
	bgm = bgm[:len(podcast_audio)]
	podcast_audio = podcast_audio.overlay(bgm)
	podcast_file = generate_unique_filename(PODCAST_DIR, prefix="podcast")
	podcast_audio.export(podcast_file, format="mp3")
	meta_file = podcast_file.replace(".mp3", ".txt")
	with open(meta_file, "w", encoding="utf-8") as f:
	f.write(f"Title: {podcast_title}\nDescription: {podcast_desc}\n")
	return podcast_file

	def clear_textbox():
	return ""

	def clear_paragraphs():
	return [""]

	def add_paragraph(paragraphs):
	paragraphs = paragraphs.copy()
	paragraphs.append("")
	return paragraphs

	def remove_paragraph(paragraphs):
	paragraphs = paragraphs.copy()
	if len(paragraphs) > 1:
	paragraphs.pop()
	return paragraphs

	def update_paragraphs_ui(paragraphs):
	# 回傳一組 Textbox 元件
	return [gr.Textbox(value=p, label=f"段落{i+1}內容", lines=3, interactive=True) for i, p in enumerate(paragraphs)]

	def collect_paragraphs(*args):
	# 收集所有段落內容
	return list(args)

	async def main():
	voices = await get_voices()
	with gr.Blocks(theme=gr.themes.Soft()) as demo:
	gr.Markdown("## 🎙️ Edge TTS 語音合成與播客製作\n\n- 多段腳本自由增減、內容可清空\n- 介面直覺、操作友善、檔案自動管理")

	with gr.Tab("語音合成"):
	with gr.Row():
	text_input = gr.Textbox(lines=5, label="輸入文本")
	clear_btn = gr.Button("清空")
	voice_input = gr.Dropdown(voices, label="選擇語音", value="zh-CN-XiaoxiaoNeural")
	rate_input = gr.Slider(-50, 50, value=0, step=1, label="語速調整 (%)")
	pitch_input = gr.Slider(-50, 50, value=0, step=1, label="音高調整 (Hz)")
	tts_btn = gr.Button("生成語音")
	audio_output = gr.Audio(type="filepath", label="生成的語音")
	tts_btn.click(
	fn=tts_interface,
	inputs=[text_input, voice_input, rate_input, pitch_input],
	outputs=audio_output
	)
	clear_btn.click(fn=clear_textbox, outputs=text_input)

	with gr.Tab("檢視已儲存語音"):
	audio_files = gr.Dropdown(list_saved_audios(), label="選擇已儲存語音檔案", interactive=True)
	saved_audio_output = gr.Audio(type="filepath", label="播放已儲存語音")
	audio_files.change(fn=play_saved_audio, inputs=audio_files, outputs=saved_audio_output)

	with gr.Tab("播客製作"):
	gr.Markdown("### 📝 多段腳本輸入（可自由增減段落）")
	paragraphs_state = gr.State([""])
	paragraphs_container = gr.Column()

	# 初始渲染
	paragraph_boxes = update_paragraphs_ui([""])
	for tb in paragraph_boxes:
	paragraphs_container.append(tb)

	add_btn = gr.Button("新增段落")
	remove_btn = gr.Button("刪除段落")
	clear_all_btn = gr.Button("全部清空")

	def on_add(paragraphs):
	new_paragraphs = add_paragraph(paragraphs)
	return new_paragraphs, gr.update(components=update_paragraphs_ui(new_paragraphs))

	def on_remove(paragraphs):
	new_paragraphs = remove_paragraph(paragraphs)
	return new_paragraphs, gr.update(components=update_paragraphs_ui(new_paragraphs))

	def on_clear():
	new_paragraphs = clear_paragraphs()
	return new_paragraphs, gr.update(components=update_paragraphs_ui(new_paragraphs))

	add_btn.click(on_add, inputs=paragraphs_state, outputs=[paragraphs_state, paragraphs_container])
	remove_btn.click(on_remove, inputs=paragraphs_state, outputs=[paragraphs_state, paragraphs_container])
	clear_all_btn.click(on_clear, outputs=[paragraphs_state, paragraphs_container])

	# 參數設定
	voice_input2 = gr.Dropdown(voices, label="選擇語音", value="zh-CN-XiaoxiaoNeural")
	rate_input2 = gr.Slider(-50, 50, value=0, step=1, label="語速調整 (%)")
	pitch_input2 = gr.Slider(-50, 50, value=0, step=1, label="音高調整 (Hz)")
	bgm_input = gr.File(label="上傳背景音樂（可選）")
	podcast_title = gr.Textbox(label="播客標題")
	podcast_desc = gr.Textbox(label="播客描述")
	podcast_btn = gr.Button("生成播客")
	podcast_output = gr.Audio(type="filepath", label="生成的播客音檔")

	def gather_scripts(*args):
	return list(args)

	def on_podcast_btn_click(*args):
	# args: 段落內容 + 參數
	n = len(paragraphs_state.value)
	scripts = list(args[:n])
	voice = args[n]
	rate = args[n+1]
	pitch = args[n+2]
	bgm = args[n+3]
	title = args[n+4]
	desc = args[n+5]
	return asyncio.run(podcast_produce(scripts, voice, rate, pitch, bgm, title, desc))

	# 這裡需要用最新的段落數來組合 inputs
	def get_inputs():
	return [tb for tb in paragraphs_container.children] + [voice_input2, rate_input2, pitch_input2, bgm_input, podcast_title, podcast_desc]

	podcast_btn.click(
	fn=on_podcast_btn_click,
	inputs=lambda: get_inputs(),
	outputs=podcast_output
	)

	with gr.Tab("檢視已儲存播客"):
	podcast_files = gr.Dropdown(list_saved_podcasts(), label="選擇已儲存播客檔案", interactive=True)
	saved_podcast_output = gr.Audio(type="filepath", label="播放已儲存播客")
	podcast_files.change(fn=play_saved_audio, inputs=podcast_files, outputs=saved_podcast_output)

	demo.launch()

	if __name__ == "__main__":
	asyncio.run(main())