File size: 1,048 Bytes
f0438bc 2e78d7c 29b60da 232e893 29b60da 2841146 29b60da 232e893 2841146 232e893 2841146 29b60da 232e893 2841146 232e893 2841146 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 | import os, sys, subprocess, numpy as np, gradio as gr
from huggingface_hub import snapshot_download
subprocess.run(["git", "clone", "--recursive", "https://github.com/FunAudioLLM/CosyVoice.git", "CosyVoice"], check=True)
sys.path.insert(0, "CosyVoice/third_party/Matcha-TTS")
sys.path.insert(0, "CosyVoice")
model_dir = snapshot_download("FunAudioLLM/CosyVoice-300M-SFT", local_dir="pretrained_models/CosyVoice-300M-SFT")
from cosyvoice.cli.cosyvoice import CosyVoice
cosyvoice = CosyVoice(model_dir)
spk_list = cosyvoice.list_available_spks()
def tts(text, spk):
for result in cosyvoice.inference_sft(text, spk, stream=False):
audio = result["tts_speech"].numpy().flatten()
return (cosyvoice.sample_rate, audio)
demo = gr.Interface(
fn=tts,
inputs=[
gr.Textbox(label="Text", value="你好,我是通义生成式语音大模型。"),
gr.Dropdown(choices=spk_list, value=spk_list[0], label="Speaker"),
],
outputs=gr.Audio(label="Audio"),
)
demo.launch(server_name="0.0.0.0", server_port=7860) |