cv / app.py
leesenx's picture
Update app.py
f0438bc verified
import os, sys, subprocess, numpy as np, gradio as gr
from huggingface_hub import snapshot_download
subprocess.run(["git", "clone", "--recursive", "https://github.com/FunAudioLLM/CosyVoice.git", "CosyVoice"], check=True)
sys.path.insert(0, "CosyVoice/third_party/Matcha-TTS")
sys.path.insert(0, "CosyVoice")
model_dir = snapshot_download("FunAudioLLM/CosyVoice-300M-SFT", local_dir="pretrained_models/CosyVoice-300M-SFT")
from cosyvoice.cli.cosyvoice import CosyVoice
cosyvoice = CosyVoice(model_dir)
spk_list = cosyvoice.list_available_spks()
def tts(text, spk):
for result in cosyvoice.inference_sft(text, spk, stream=False):
audio = result["tts_speech"].numpy().flatten()
return (cosyvoice.sample_rate, audio)
demo = gr.Interface(
fn=tts,
inputs=[
gr.Textbox(label="Text", value="你好,我是通义生成式语音大模型。"),
gr.Dropdown(choices=spk_list, value=spk_list[0], label="Speaker"),
],
outputs=gr.Audio(label="Audio"),
)
demo.launch(server_name="0.0.0.0", server_port=7860)