Spaces:

chaore
/

egtts

Sleeping

App Files Files Community

egtts / main.py

chaore

Upload 5 files

6ad2a4c verified about 1 month ago

raw

history blame contribute delete

19.7 kB

	"""
	本地配音软件 - 基于Edge TTS的文本转语音应用
	"""
	import gradio as gr
	import asyncio
	import os
	from pydub import AudioSegment
	from pydub.playback import play
	import tempfile
	from api import tts_api

	class TTSApp:
	def __init__(self):
	self.app = self.create_interface()

	def create_interface(self):
	"""创建Gradio界面"""
	with gr.Blocks(title="本地配音软件") as app: # 移除了theme参数
	gr.Markdown("# <center> 🎙️ 本地配音软件 </center>")
	gr.Markdown("基于Edge TTS和Hugging Face Spaces的文本转语音工具，支持多语言和多种语音")

	with gr.Tab("文本配音"):
	with gr.Row():
	with gr.Column(scale=2):
	text_input = gr.TextArea(
	label="📝 输入文本",
	placeholder="在此输入您要转换为语音的文本...",
	lines=12
	)

	with gr.Row():
	voice_selection = gr.Dropdown(
	choices=tts_api.get_available_voices(),
	value="zh-CN-XiaoxiaoNeural",
	label="🗣️ 选择语音",
	multiselect=False
	)

	language_filter = gr.Dropdown(
	choices=["全部", "中文", "英文", "日文", "韩文", "其他"],
	value="全部",
	label="🌐 语言筛选"
	)

	with gr.Row():
	rate_slider = gr.Slider(
	minimum=-50,
	maximum=50,
	value=0,
	step=1,
	label="⏩ 语速调整 (%)"
	)

	pitch_slider = gr.Slider(
	minimum=-50,
	maximum=50,
	value=0,
	step=1,
	label="🎵 音调调整 (Hz)"
	)

	with gr.Row():
	api_selection = gr.Radio(
	choices=["Edge TTS (本地)", "Hugging Face API"],
	value="Edge TTS (本地)",
	label="🌐 API选择"
	)

	with gr.Row():
	generate_btn = gr.Button("🔊 生成语音", variant="primary", scale=1)
	batch_generate_btn = gr.Button("📦 批量生成", variant="secondary", scale=1)

	with gr.Column(scale=1):
	audio_output = gr.Audio(label="🎧 生成的语音", type="filepath")
	status_output = gr.Textbox(label="📊 状态信息", interactive=False)

	with gr.Group():
	gr.Markdown("### 📁 输出选项")
	output_format = gr.Radio(
	choices=["MP3", "WAV"],
	value="MP3",
	label="输出格式"
	)

	with gr.Group():
	gr.Markdown("### 📚 语音预览")
	voice_info_btn = gr.Button("🔍 查看语音信息")
	voice_info_output = gr.JSON(label="语音详情")

	with gr.Tab("批量处理"):
	with gr.Row():
	batch_text_input = gr.TextArea(
	label="📝 批量文本输入（每行一段）",
	placeholder="每行输入一段文本，将为每段文本生成对应的语音",
	lines=10
	)

	with gr.Row():
	batch_voice_selection = gr.Dropdown(
	choices=tts_api.get_available_voices(),
	value="zh-CN-XiaoxiaoNeural",
	label="🗣️ 选择语音"
	)

	batch_rate_slider = gr.Slider(
	minimum=-50,
	maximum=50,
	value=0,
	step=1,
	label="⏩ 语速调整 (%)"
	)

	batch_pitch_slider = gr.Slider(
	minimum=-50,
	maximum=50,
	value=0,
	step=1,
	label="🎵 音调调整 (Hz)"
	)

	with gr.Row():
	batch_api_selection = gr.Radio(
	choices=["Edge TTS (本地)", "Hugging Face API"],
	value="Edge TTS (本地)",
	label="🌐 API选择"
	)

	batch_generate_btn2 = gr.Button("📦 生成批量语音", variant="primary")
	batch_output = gr.File(label="📥 下载批量生成的音频", interactive=False)

	with gr.Tab("音频项目"):
	with gr.Row():
	with gr.Column():
	project_name = gr.Textbox(
	label="📋 项目名称",
	placeholder="输入项目名称",
	value="my_audio_project"
	)

	segments_input = gr.JSON(
	label="📝 音频片段",
	value=[{"text": "第一段文本", "delay": 0}, {"text": "第二段文本", "delay": 1000}]
	)

	with gr.Row():
	add_segment_btn = gr.Button("➕ 添加片段")
	remove_segment_btn = gr.Button("➖ 删除片段")

	project_voice_selection = gr.Dropdown(
	choices=tts_api.get_available_voices(),
	value="zh-CN-XiaoxiaoNeural",
	label="🗣️ 选择语音"
	)

	with gr.Row():
	project_rate_slider = gr.Slider(
	minimum=-50,
	maximum=50,
	value=0,
	step=1,
	label="⏩ 语速调整 (%)"
	)

	project_pitch_slider = gr.Slider(
	minimum=-50,
	maximum=50,
	value=0,
	step=1,
	label="🎵 音调调整 (Hz)"
	)

	with gr.Row():
	project_api_selection = gr.Radio(
	choices=["Edge TTS (本地)", "Hugging Face API"],
	value="Edge TTS (本地)",
	label="🌐 API选择"
	)

	create_project_btn = gr.Button("🎬 创建音频项目", variant="primary")
	project_output = gr.Audio(label="🎧 项目音频输出", type="filepath")

	with gr.Tab("语音库"):
	with gr.Row():
	voice_table = gr.Dataframe(
	headers=["语音名称", "语言", "性别"],
	datatype=["str", "str", "str"],
	value=[[v, v.split('-')[0]+'-'+v.split('-')[1], "女声" if any(x in v.lower() for x in ['xiaoxiao', 'xiaoyi', 'nanami', 'sarah', 'jenny', 'aria']) else "男声"] for v in tts_api.get_available_voices()],
	label="可用语音列表",
	interactive=False
	)

	# 绑定事件
	def update_voice_list(language):
	if language == "全部":
	voices = tts_api.get_available_voices()
	elif language == "中文":
	voices = tts_api.get_available_voices('zh')
	elif language == "英文":
	voices = tts_api.get_available_voices('en')
	elif language == "日文":
	voices = tts_api.get_available_voices('ja')
	elif language == "韩文":
	voices = tts_api.get_available_voices('ko')
	else:
	voices = tts_api.get_available_voices()

	return gr.Dropdown(choices=voices, value=voices[0] if voices else "zh-CN-XiaoxiaoNeural")

	language_filter.change(
	fn=update_voice_list,
	inputs=language_filter,
	outputs=voice_selection
	)

	async def generate_speech_async(text, voice, rate, pitch, format_type, api_type):
	if not text.strip():
	return None, "请输入要转换的文本"

	# 根据选择的格式确定文件扩展名
	ext = ".mp3" if format_type == "MP3" else ".wav"

	with tempfile.NamedTemporaryFile(delete=False, suffix=ext) as temp_file:
	output_path = temp_file.name

	try:
	if api_type == "Hugging Face API":
	# 使用Hugging Face API
	result = await tts_api.text_to_speech_hf(text, voice, rate, pitch, output_path, format_type.lower())
	else:
	# 使用本地Edge TTS
	result = await tts_api.text_to_speech(text, voice, rate, pitch, output_path, format_type.lower())

	if result:
	return result, "语音生成成功"
	else:
	return None, "语音生成失败"
	except Exception as e:
	return None, f"生成语音时出错: {str(e)}"

	generate_btn.click(
	fn=lambda text, voice, rate, pitch, fmt, api: asyncio.run(
	generate_speech_async(text, voice, rate, pitch, fmt, api)
	),
	inputs=[text_input, voice_selection, rate_slider, pitch_slider, output_format, api_selection],
	outputs=[audio_output, status_output]
	)

	def play_audio(audio_path):
	if audio_path and os.path.exists(audio_path):
	try:
	audio = AudioSegment.from_file(audio_path)
	play(audio)
	return "音频播放成功"
	except Exception as e:
	return f"播放失败: {str(e)}"
	return "没有可播放的音频文件"

	def get_voice_info(voice):
	import asyncio
	try:
	info = asyncio.run(tts_api.get_voice_info(voice))
	return info or {"错误": "未找到语音信息"}
	except Exception as e:
	return {"错误": str(e)}

	voice_info_btn.click(
	fn=get_voice_info,
	inputs=voice_info_btn, # 实际上我们需要传递voice_selection的值，这里先简化
	outputs=voice_info_output
	)

	# 为voice_selection添加change事件来更新语音信息
	voice_selection.change(
	fn=get_voice_info,
	inputs=voice_selection,
	outputs=voice_info_output
	)

	# 批量处理功能
	async def batch_generate(texts, voice, rate, pitch, api_type):
	if not texts.strip():
	return None, "请输入要转换的文本"

	# 按行分割文本
	text_list = [t.strip() for t in texts.split('\n') if t.strip()]
	if not text_list:
	return None, "没有有效的文本段落"

	try:
	# 根据API类型选择处理方式
	if api_type == "Hugging Face API":
	audio_files = []
	for text in text_list:
	if text.strip():
	audio_file = await tts_api.text_to_speech_hf(text, voice, rate, pitch, output_format="mp3")
	audio_files.append(audio_file)
	else:
	audio_files.append(None)
	else:
	audio_files = await tts_api.batch_text_to_speech(text_list, voice, rate, pitch)

	# 将音频文件打包成zip
	import zipfile
	with tempfile.NamedTemporaryFile(delete=False, suffix='.zip') as zip_file:
	with zipfile.ZipFile(zip_file.name, 'w') as zf:
	for i, audio_file in enumerate(audio_files):
	if audio_file:
	zf.write(audio_file, f"audio_{i+1}.mp3")

	return zip_file.name, f"成功生成 {len([f for f in audio_files if f])} 个音频文件"
	except Exception as e:
	return None, f"批量生成失败: {str(e)}"

	batch_generate_btn2.click(
	fn=lambda texts, voice, rate, pitch, api: asyncio.run(
	batch_generate(texts, voice, rate, pitch, api)
	),
	inputs=[batch_text_input, batch_voice_selection, batch_rate_slider, batch_pitch_slider, batch_api_selection],
	outputs=[batch_output, status_output]
	)

	# 音频项目功能
	async def create_audio_project(name, segments, voice, rate, pitch, api_type):
	if not name.strip():
	return None, "请输入项目名称"

	try:
	# 根据API类型选择处理方式
	if api_type == "Hugging Face API":
	# 对于项目，我们逐个生成片段然后合并
	temp_dir = tempfile.mkdtemp()
	segment_files = []

	for i, segment in enumerate(segments):
	text = segment.get("text", "")
	if not text.strip():
	continue

	delay = segment.get("delay", 0) # 延迟时间（毫秒）

	# 使用Hugging Face API生成音频片段
	segment_file = os.path.join(temp_dir, f"segment_{i}.mp3")
	result = await tts_api.text_to_speech_hf(text, voice, rate, pitch, segment_file, "mp3")

	if result:
	segment_files.append((result, delay))
	else:
	# 使用本地API创建项目
	project_file = await tts_api.create_audio_project(
	name, segments, voice, rate, pitch
	)
	if project_file:
	return project_file, f"项目 '{name}' 创建成功"
	else:
	return None, "项目创建失败"
	return None, "项目创建失败"

	# 合并音频片段（如果使用Hugging Face API）
	if api_type == "Hugging Face API" and segment_files:
	from pydub import AudioSegment
	combined_audio = AudioSegment.empty()

	for audio_file, delay in segment_files:
	if delay > 0:
	# 添加静音间隔
	silence = AudioSegment.silent(duration=delay)
	combined_audio += silence

	# 添加音频片段
	segment_audio = AudioSegment.from_file(audio_file, format="mp3")
	combined_audio += segment_audio

	# 生成最终输出文件
	output_path = os.path.join(temp_dir, f"{name}.mp3")
	combined_audio.export(output_path, format="mp3")

	# 清理临时片段文件
	for audio_file, _ in segment_files:
	try:
	os.remove(audio_file)
	except:
	pass

	return output_path, f"项目 '{name}' 创建成功"
	else:
	return None, "项目创建失败"

	except Exception as e:
	return None, f"创建项目时出错: {str(e)}"

	create_project_btn.click(
	fn=lambda name, segments, voice, rate, pitch, api: asyncio.run(
	create_audio_project(name, segments, voice, rate, pitch, api)
	),
	inputs=[project_name, segments_input, project_voice_selection, project_rate_slider, project_pitch_slider, project_api_selection],
	outputs=[project_output, status_output]
	)

	return app

	def run(self, share=False):
	"""启动应用"""
	self.app.launch(server_name="127.0.0.1", server_port=7860, share=share)

	if __name__ == "__main__":
	app = TTSApp()
	app.run()