voice / app.py
johnwang2026's picture
Update app.py
d88cd51 verified
raw
history blame
2.09 kB
import gradio as gr
from transformers import AutoModel, AutoTokenizer # 彻底不用AutoModelForTextToSpeech
import soundfile as sf
import torch
import os
# 换用超轻量中文TTS模型(体积仅1.2GB,免费Space无压力)
model_name = "yeyupiaoling/PP-TTS-v2"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModel.from_pretrained(model_name)
# 手动分配设备(CPU优先,避免任何依赖冲突)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = model.to(device)
# 语音生成函数(简化逻辑,确保稳定)
def generate_speech(text):
if not text.strip():
return None, "错误:请输入有效文本!"
# 文本编码(适配模型要求)
inputs = tokenizer(text, return_tensors="pt", padding=True, truncation=True, max_length=512).to(device)
# 生成语音(关闭梯度计算,节省内存)
with torch.no_grad():
output = model.generate(**inputs)
audio_output = output["wav"].cpu().numpy()[0] # 提取音频数据
# 保存音频(采样率24000Hz,适配模型输出)
output_path = "output.wav"
sf.write(output_path, audio_output, samplerate=24000)
return output_path, "语音生成成功!(超轻量模型,适配免费Space)"
# 简洁界面(减少资源占用)
with gr.Blocks(title="轻量中文TTS") as demo:
gr.Markdown("# 🎤 免费中文文本转语音")
gr.Markdown("基于PP-TTS-v2模型(体积1.2GB),适配免费Space,生成快速稳定")
text_input = gr.Textbox(
label="输入中文文本",
placeholder="请输入中文文本(建议≤500字)...",
lines=4
)
audio_output = gr.Audio(label="生成的语音", type="filepath")
status_text = gr.Textbox(label="状态", interactive=False)
generate_btn = gr.Button("🚀 开始生成", variant="primary")
generate_btn.click(
fn=generate_speech,
inputs=text_input,
outputs=[audio_output, status_text]
)
if __name__ == "__main__":
demo.launch()