| """
|
| 本地配音软件 - 基于Edge TTS的文本转语音应用
|
| """
|
| import gradio as gr
|
| import asyncio
|
| import os
|
| from pydub import AudioSegment
|
| from pydub.playback import play
|
| import tempfile
|
| from api import tts_api
|
|
|
| class TTSApp:
|
| def __init__(self):
|
| self.app = self.create_interface()
|
|
|
| def create_interface(self):
|
| """创建Gradio界面"""
|
| with gr.Blocks(title="本地配音软件") as app:
|
| gr.Markdown("# <center> 🎙️ 本地配音软件 </center>")
|
| gr.Markdown("基于Edge TTS和Hugging Face Spaces的文本转语音工具,支持多语言和多种语音")
|
|
|
| with gr.Tab("文本配音"):
|
| with gr.Row():
|
| with gr.Column(scale=2):
|
| text_input = gr.TextArea(
|
| label="📝 输入文本",
|
| placeholder="在此输入您要转换为语音的文本...",
|
| lines=12
|
| )
|
|
|
| with gr.Row():
|
| voice_selection = gr.Dropdown(
|
| choices=tts_api.get_available_voices(),
|
| value="zh-CN-XiaoxiaoNeural",
|
| label="🗣️ 选择语音",
|
| multiselect=False
|
| )
|
|
|
| language_filter = gr.Dropdown(
|
| choices=["全部", "中文", "英文", "日文", "韩文", "其他"],
|
| value="全部",
|
| label="🌐 语言筛选"
|
| )
|
|
|
| with gr.Row():
|
| rate_slider = gr.Slider(
|
| minimum=-50,
|
| maximum=50,
|
| value=0,
|
| step=1,
|
| label="⏩ 语速调整 (%)"
|
| )
|
|
|
| pitch_slider = gr.Slider(
|
| minimum=-50,
|
| maximum=50,
|
| value=0,
|
| step=1,
|
| label="🎵 音调调整 (Hz)"
|
| )
|
|
|
| with gr.Row():
|
| api_selection = gr.Radio(
|
| choices=["Edge TTS (本地)", "Hugging Face API"],
|
| value="Edge TTS (本地)",
|
| label="🌐 API选择"
|
| )
|
|
|
| with gr.Row():
|
| generate_btn = gr.Button("🔊 生成语音", variant="primary", scale=1)
|
| batch_generate_btn = gr.Button("📦 批量生成", variant="secondary", scale=1)
|
|
|
| with gr.Column(scale=1):
|
| audio_output = gr.Audio(label="🎧 生成的语音", type="filepath")
|
| status_output = gr.Textbox(label="📊 状态信息", interactive=False)
|
|
|
| with gr.Group():
|
| gr.Markdown("### 📁 输出选项")
|
| output_format = gr.Radio(
|
| choices=["MP3", "WAV"],
|
| value="MP3",
|
| label="输出格式"
|
| )
|
|
|
| with gr.Group():
|
| gr.Markdown("### 📚 语音预览")
|
| voice_info_btn = gr.Button("🔍 查看语音信息")
|
| voice_info_output = gr.JSON(label="语音详情")
|
|
|
| with gr.Tab("批量处理"):
|
| with gr.Row():
|
| batch_text_input = gr.TextArea(
|
| label="📝 批量文本输入(每行一段)",
|
| placeholder="每行输入一段文本,将为每段文本生成对应的语音",
|
| lines=10
|
| )
|
|
|
| with gr.Row():
|
| batch_voice_selection = gr.Dropdown(
|
| choices=tts_api.get_available_voices(),
|
| value="zh-CN-XiaoxiaoNeural",
|
| label="🗣️ 选择语音"
|
| )
|
|
|
| batch_rate_slider = gr.Slider(
|
| minimum=-50,
|
| maximum=50,
|
| value=0,
|
| step=1,
|
| label="⏩ 语速调整 (%)"
|
| )
|
|
|
| batch_pitch_slider = gr.Slider(
|
| minimum=-50,
|
| maximum=50,
|
| value=0,
|
| step=1,
|
| label="🎵 音调调整 (Hz)"
|
| )
|
|
|
| with gr.Row():
|
| batch_api_selection = gr.Radio(
|
| choices=["Edge TTS (本地)", "Hugging Face API"],
|
| value="Edge TTS (本地)",
|
| label="🌐 API选择"
|
| )
|
|
|
| batch_generate_btn2 = gr.Button("📦 生成批量语音", variant="primary")
|
| batch_output = gr.File(label="📥 下载批量生成的音频", interactive=False)
|
|
|
| with gr.Tab("音频项目"):
|
| with gr.Row():
|
| with gr.Column():
|
| project_name = gr.Textbox(
|
| label="📋 项目名称",
|
| placeholder="输入项目名称",
|
| value="my_audio_project"
|
| )
|
|
|
| segments_input = gr.JSON(
|
| label="📝 音频片段",
|
| value=[{"text": "第一段文本", "delay": 0}, {"text": "第二段文本", "delay": 1000}]
|
| )
|
|
|
| with gr.Row():
|
| add_segment_btn = gr.Button("➕ 添加片段")
|
| remove_segment_btn = gr.Button("➖ 删除片段")
|
|
|
| project_voice_selection = gr.Dropdown(
|
| choices=tts_api.get_available_voices(),
|
| value="zh-CN-XiaoxiaoNeural",
|
| label="🗣️ 选择语音"
|
| )
|
|
|
| with gr.Row():
|
| project_rate_slider = gr.Slider(
|
| minimum=-50,
|
| maximum=50,
|
| value=0,
|
| step=1,
|
| label="⏩ 语速调整 (%)"
|
| )
|
|
|
| project_pitch_slider = gr.Slider(
|
| minimum=-50,
|
| maximum=50,
|
| value=0,
|
| step=1,
|
| label="🎵 音调调整 (Hz)"
|
| )
|
|
|
| with gr.Row():
|
| project_api_selection = gr.Radio(
|
| choices=["Edge TTS (本地)", "Hugging Face API"],
|
| value="Edge TTS (本地)",
|
| label="🌐 API选择"
|
| )
|
|
|
| create_project_btn = gr.Button("🎬 创建音频项目", variant="primary")
|
| project_output = gr.Audio(label="🎧 项目音频输出", type="filepath")
|
|
|
| with gr.Tab("语音库"):
|
| with gr.Row():
|
| voice_table = gr.Dataframe(
|
| headers=["语音名称", "语言", "性别"],
|
| datatype=["str", "str", "str"],
|
| value=[[v, v.split('-')[0]+'-'+v.split('-')[1], "女声" if any(x in v.lower() for x in ['xiaoxiao', 'xiaoyi', 'nanami', 'sarah', 'jenny', 'aria']) else "男声"] for v in tts_api.get_available_voices()],
|
| label="可用语音列表",
|
| interactive=False
|
| )
|
|
|
|
|
| def update_voice_list(language):
|
| if language == "全部":
|
| voices = tts_api.get_available_voices()
|
| elif language == "中文":
|
| voices = tts_api.get_available_voices('zh')
|
| elif language == "英文":
|
| voices = tts_api.get_available_voices('en')
|
| elif language == "日文":
|
| voices = tts_api.get_available_voices('ja')
|
| elif language == "韩文":
|
| voices = tts_api.get_available_voices('ko')
|
| else:
|
| voices = tts_api.get_available_voices()
|
|
|
| return gr.Dropdown(choices=voices, value=voices[0] if voices else "zh-CN-XiaoxiaoNeural")
|
|
|
| language_filter.change(
|
| fn=update_voice_list,
|
| inputs=language_filter,
|
| outputs=voice_selection
|
| )
|
|
|
| async def generate_speech_async(text, voice, rate, pitch, format_type, api_type):
|
| if not text.strip():
|
| return None, "请输入要转换的文本"
|
|
|
|
|
| ext = ".mp3" if format_type == "MP3" else ".wav"
|
|
|
| with tempfile.NamedTemporaryFile(delete=False, suffix=ext) as temp_file:
|
| output_path = temp_file.name
|
|
|
| try:
|
| if api_type == "Hugging Face API":
|
|
|
| result = await tts_api.text_to_speech_hf(text, voice, rate, pitch, output_path, format_type.lower())
|
| else:
|
|
|
| result = await tts_api.text_to_speech(text, voice, rate, pitch, output_path, format_type.lower())
|
|
|
| if result:
|
| return result, "语音生成成功"
|
| else:
|
| return None, "语音生成失败"
|
| except Exception as e:
|
| return None, f"生成语音时出错: {str(e)}"
|
|
|
| generate_btn.click(
|
| fn=lambda text, voice, rate, pitch, fmt, api: asyncio.run(
|
| generate_speech_async(text, voice, rate, pitch, fmt, api)
|
| ),
|
| inputs=[text_input, voice_selection, rate_slider, pitch_slider, output_format, api_selection],
|
| outputs=[audio_output, status_output]
|
| )
|
|
|
| def play_audio(audio_path):
|
| if audio_path and os.path.exists(audio_path):
|
| try:
|
| audio = AudioSegment.from_file(audio_path)
|
| play(audio)
|
| return "音频播放成功"
|
| except Exception as e:
|
| return f"播放失败: {str(e)}"
|
| return "没有可播放的音频文件"
|
|
|
| def get_voice_info(voice):
|
| import asyncio
|
| try:
|
| info = asyncio.run(tts_api.get_voice_info(voice))
|
| return info or {"错误": "未找到语音信息"}
|
| except Exception as e:
|
| return {"错误": str(e)}
|
|
|
| voice_info_btn.click(
|
| fn=get_voice_info,
|
| inputs=voice_info_btn,
|
| outputs=voice_info_output
|
| )
|
|
|
|
|
| voice_selection.change(
|
| fn=get_voice_info,
|
| inputs=voice_selection,
|
| outputs=voice_info_output
|
| )
|
|
|
|
|
| async def batch_generate(texts, voice, rate, pitch, api_type):
|
| if not texts.strip():
|
| return None, "请输入要转换的文本"
|
|
|
|
|
| text_list = [t.strip() for t in texts.split('\n') if t.strip()]
|
| if not text_list:
|
| return None, "没有有效的文本段落"
|
|
|
| try:
|
|
|
| if api_type == "Hugging Face API":
|
| audio_files = []
|
| for text in text_list:
|
| if text.strip():
|
| audio_file = await tts_api.text_to_speech_hf(text, voice, rate, pitch, output_format="mp3")
|
| audio_files.append(audio_file)
|
| else:
|
| audio_files.append(None)
|
| else:
|
| audio_files = await tts_api.batch_text_to_speech(text_list, voice, rate, pitch)
|
|
|
|
|
| import zipfile
|
| with tempfile.NamedTemporaryFile(delete=False, suffix='.zip') as zip_file:
|
| with zipfile.ZipFile(zip_file.name, 'w') as zf:
|
| for i, audio_file in enumerate(audio_files):
|
| if audio_file:
|
| zf.write(audio_file, f"audio_{i+1}.mp3")
|
|
|
| return zip_file.name, f"成功生成 {len([f for f in audio_files if f])} 个音频文件"
|
| except Exception as e:
|
| return None, f"批量生成失败: {str(e)}"
|
|
|
| batch_generate_btn2.click(
|
| fn=lambda texts, voice, rate, pitch, api: asyncio.run(
|
| batch_generate(texts, voice, rate, pitch, api)
|
| ),
|
| inputs=[batch_text_input, batch_voice_selection, batch_rate_slider, batch_pitch_slider, batch_api_selection],
|
| outputs=[batch_output, status_output]
|
| )
|
|
|
|
|
| async def create_audio_project(name, segments, voice, rate, pitch, api_type):
|
| if not name.strip():
|
| return None, "请输入项目名称"
|
|
|
| try:
|
|
|
| if api_type == "Hugging Face API":
|
|
|
| temp_dir = tempfile.mkdtemp()
|
| segment_files = []
|
|
|
| for i, segment in enumerate(segments):
|
| text = segment.get("text", "")
|
| if not text.strip():
|
| continue
|
|
|
| delay = segment.get("delay", 0)
|
|
|
|
|
| segment_file = os.path.join(temp_dir, f"segment_{i}.mp3")
|
| result = await tts_api.text_to_speech_hf(text, voice, rate, pitch, segment_file, "mp3")
|
|
|
| if result:
|
| segment_files.append((result, delay))
|
| else:
|
|
|
| project_file = await tts_api.create_audio_project(
|
| name, segments, voice, rate, pitch
|
| )
|
| if project_file:
|
| return project_file, f"项目 '{name}' 创建成功"
|
| else:
|
| return None, "项目创建失败"
|
| return None, "项目创建失败"
|
|
|
|
|
| if api_type == "Hugging Face API" and segment_files:
|
| from pydub import AudioSegment
|
| combined_audio = AudioSegment.empty()
|
|
|
| for audio_file, delay in segment_files:
|
| if delay > 0:
|
|
|
| silence = AudioSegment.silent(duration=delay)
|
| combined_audio += silence
|
|
|
|
|
| segment_audio = AudioSegment.from_file(audio_file, format="mp3")
|
| combined_audio += segment_audio
|
|
|
|
|
| output_path = os.path.join(temp_dir, f"{name}.mp3")
|
| combined_audio.export(output_path, format="mp3")
|
|
|
|
|
| for audio_file, _ in segment_files:
|
| try:
|
| os.remove(audio_file)
|
| except:
|
| pass
|
|
|
| return output_path, f"项目 '{name}' 创建成功"
|
| else:
|
| return None, "项目创建失败"
|
|
|
| except Exception as e:
|
| return None, f"创建项目时出错: {str(e)}"
|
|
|
| create_project_btn.click(
|
| fn=lambda name, segments, voice, rate, pitch, api: asyncio.run(
|
| create_audio_project(name, segments, voice, rate, pitch, api)
|
| ),
|
| inputs=[project_name, segments_input, project_voice_selection, project_rate_slider, project_pitch_slider, project_api_selection],
|
| outputs=[project_output, status_output]
|
| )
|
|
|
| return app
|
|
|
| def run(self, share=False):
|
| """启动应用"""
|
| self.app.launch(server_name="127.0.0.1", server_port=7860, share=share)
|
|
|
| if __name__ == "__main__":
|
| app = TTSApp()
|
| app.run() |