|
|
import gradio as gr |
|
|
import subprocess |
|
|
import os |
|
|
|
|
|
|
|
|
|
|
|
def synthesize_espeak(text: str, lang: str = "en-us") -> str | None: |
|
|
""" |
|
|
使用 espeak-ng 合成语音。 |
|
|
需要在 Space 环境中安装 espeak-ng (通过 Dockerfile)。 |
|
|
""" |
|
|
output_file = "espeak_output.wav" |
|
|
|
|
|
|
|
|
if os.path.exists(output_file): |
|
|
os.remove(output_file) |
|
|
|
|
|
try: |
|
|
command = ["espeak-ng", f"-v{lang}", "--stdout", text] |
|
|
|
|
|
process = subprocess.run(command, capture_output=True, check=True, timeout=10) |
|
|
|
|
|
if not process.stdout: |
|
|
gr.Warning("eSpeak-ng 没有为给定文本生成任何音频输出。请尝试不同的文本。") |
|
|
print(f"eSpeak-ng 为文本 '{text}' 未产生输出。") |
|
|
return None |
|
|
|
|
|
with open(output_file, "wb") as f: |
|
|
f.write(process.stdout) |
|
|
|
|
|
print(f"eSpeak-ng 合成成功: {output_file}") |
|
|
return output_file |
|
|
|
|
|
except FileNotFoundError: |
|
|
error_msg = "错误:未找到 espeak-ng。请确保它已安装在您 Space 的 Dockerfile 中,并且 Space 已重建。" |
|
|
print(error_msg) |
|
|
gr.Error(error_msg) |
|
|
return None |
|
|
except subprocess.CalledProcessError as e: |
|
|
error_msg = f"eSpeak-ng 合成过程中出现错误。命令以代码 {e.returncode} 退出。错误输出:{e.stderr.decode()}" |
|
|
print(error_msg) |
|
|
gr.Error(error_msg) |
|
|
return None |
|
|
except subprocess.TimeoutExpired: |
|
|
error_msg = "eSpeak-ng 命令超时。文本可能过长或过于复杂。" |
|
|
print(error_msg) |
|
|
gr.Warning(error_msg) |
|
|
return None |
|
|
except Exception as e: |
|
|
error_msg = f"eSpeak-ng 合成过程中发生意外错误:{e}" |
|
|
print(error_msg) |
|
|
gr.Error(error_msg) |
|
|
return None |
|
|
|
|
|
def synthesize_api_tts(text: str) -> str | None: |
|
|
""" |
|
|
基于 API 的文本转语音服务(例如 Azure TTS, Google TTS)的占位符。 |
|
|
在真实应用中,您将在这里发出 HTTP 请求到 API。 |
|
|
对于此演示,它返回一个占位符音频文件。 |
|
|
""" |
|
|
print(f"正在模拟 API TTS:'{text}'") |
|
|
return "https://www.soundhelix.com/examples/mp3/SoundHelix-Song-2.mp3" |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
with gr.Blocks() as demo: |
|
|
gr.Markdown("# 🎙️ 文本转音频模型对比实验") |
|
|
|
|
|
gr.Markdown("这是一个最简化的版本,用于排查启动问题。") |
|
|
|
|
|
text_input = gr.Textbox( |
|
|
label="输入您想要合成的文本", |
|
|
lines=3, |
|
|
placeholder="例如:您好,这是一个文本转音频的测试。" |
|
|
) |
|
|
with gr.Row(): |
|
|
espeak_button = gr.Button("🎤 合成 (eSpeak-ng)") |
|
|
api_tts_button = gr.Button("🎧 合成 (API TTS 示例)") |
|
|
|
|
|
with gr.Row(): |
|
|
espeak_output = gr.Audio(label="eSpeak-ng 输出", type="filepath") |
|
|
api_tts_output = gr.Audio(label="API TTS 示例输出", type="filepath") |
|
|
|
|
|
espeak_button.click( |
|
|
synthesize_espeak, |
|
|
inputs=[text_input], |
|
|
outputs=[espeak_output] |
|
|
) |
|
|
api_tts_button.click( |
|
|
synthesize_api_tts, |
|
|
inputs=[text_input], |
|
|
outputs=[api_tts_output] |
|
|
) |
|
|
|
|
|
|
|
|
demo.queue().launch() |