Spaces:
Running
Running
| import gradio as gr | |
| from transformers import AutoModelForTextToSpeech, AutoTokenizer | |
| import soundfile as sf | |
| import torch | |
| import os | |
| # 加载模型和Tokenizer(自动下载SoulX模型,首次构建会慢一点) | |
| model_name = "Soul-AILab/SoulX-Podcast-1.7B" | |
| tokenizer = AutoTokenizer.from_pretrained(model_name) | |
| model = AutoModelForTextToSpeech.from_pretrained( | |
| model_name, | |
| torch_dtype=torch.float16, # 适配GPU,无GPU会自动切换CPU | |
| device_map="auto" # 自动分配运行设备 | |
| ) | |
| # 语音生成函数(对接Gradio界面) | |
| def generate_speech(text): | |
| if not text.strip(): | |
| return None, "错误:请输入有效文本!" | |
| # 文本编码(模型要求的格式) | |
| inputs = tokenizer(text, return_tensors="pt").to(model.device) | |
| # 生成音频(核心逻辑) | |
| with torch.no_grad(): # 关闭梯度计算,节省内存 | |
| audio_output = model.generate(**inputs) | |
| # 保存音频文件(临时存储,Gradio会自动读取) | |
| output_path = "output.wav" | |
| sf.write(output_path, audio_output[0].cpu().numpy(), samplerate=24000) | |
| return output_path, "语音生成成功!" | |
| # 构建Gradio界面(可视化操作面板) | |
| with gr.Blocks(title="SoulX-Podcast-1.7B 中英双语TTS") as demo: | |
| gr.Markdown("# 🎤 SoulX-Podcast-1.7B 文本转语音") | |
| gr.Markdown("支持中英双语输入,生成自然流畅的语音(采样率24000Hz)") | |
| with gr.Row(): | |
| # 文本输入框 | |
| text_input = gr.Textbox( | |
| label="输入文本", | |
| placeholder="请输入要转换的文本(建议≤500字),支持中英双语...", | |
| lines=5 | |
| ) | |
| # 音频输出框 | |
| audio_output = gr.Audio(label="生成的语音", type="filepath") | |
| # 状态提示框 | |
| status_text = gr.Textbox(label="状态", interactive=False) | |
| # 生成按钮 | |
| generate_btn = gr.Button("🚀 开始生成", variant="primary") | |
| # 绑定按钮事件:点击后触发生成函数 | |
| generate_btn.click( | |
| fn=generate_speech, | |
| inputs=text_input, | |
| outputs=[audio_output, status_text] | |
| ) | |
| # 启动应用(Hugging Face Space会自动运行) | |
| if __name__ == "__main__": | |
| demo.launch() |