Spaces:

leesenx
/

cv

Build error

leesenx commited on 6 days ago

Commit

2841146

verified ·

1 Parent(s): 478cf8b

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,33 +1,34 @@
-import os, sys, subprocess, numpy as np, gradio as gr
 from huggingface_hub import snapshot_download
 subprocess.run(["git", "clone", "--recursive", "https://github.com/FunAudioLLM/CosyVoice.git", "CosyVoice"], check=True)
 sys.path.insert(0, "CosyVoice/third_party/Matcha-TTS")
 sys.path.insert(0, "CosyVoice")
-cosyvoice = None
-def load_model():
-    global cosyvoice
-    if cosyvoice is not None:
-        return cosyvoice
-    model_dir = snapshot_download("FunAudioLLM/CosyVoice-300M-SFT", local_dir="pretrained_models/CosyVoice-300M-SFT")
-    from cosyvoice.cli.cosyvoice import CosyVoice
-    cosyvoice = CosyVoice(model_dir)
-    return cosyvoice
 def tts(text, spk):
-    model = load_model()
-    for result in model.inference_sft(text, spk, stream=False):
         audio = result["tts_speech"].numpy().flatten()
-        return (model.sample_rate, audio)
 demo = gr.Interface(
     fn=tts,
     inputs=[
         gr.Textbox(label="Text", value="你好，我是通义生成式语音大模型。"),
-        gr.Textbox(label="Speaker", value="中文女"),
     ],
     outputs=gr.Audio(label="Audio"),
 )
-demo.launch()

+import os, sys, subprocess, types, numpy as np, gradio as gr
+import torch
 from huggingface_hub import snapshot_download
 subprocess.run(["git", "clone", "--recursive", "https://github.com/FunAudioLLM/CosyVoice.git", "CosyVoice"], check=True)
 sys.path.insert(0, "CosyVoice/third_party/Matcha-TTS")
 sys.path.insert(0, "CosyVoice")
+# stub whisper: only used in _extract_speech_token which SFT mode never calls
+w = types.ModuleType("whisper")
+def _log_mel_spectrogram(*a, **kw):
+    return torch.zeros(1, 128, 100)
+w.log_mel_spectrogram = _log_mel_spectrogram
+sys.modules["whisper"] = w
+model_dir = snapshot_download("FunAudioLLM/CosyVoice-300M-SFT", local_dir="pretrained_models/CosyVoice-300M-SFT")
+from cosyvoice.cli.cosyvoice import CosyVoice
+cosyvoice = CosyVoice(model_dir)
+spk_list = cosyvoice.list_available_spks()
 def tts(text, spk):
+    for result in cosyvoice.inference_sft(text, spk, stream=False):
         audio = result["tts_speech"].numpy().flatten()
+        return (cosyvoice.sample_rate, audio)
 demo = gr.Interface(
     fn=tts,
     inputs=[
         gr.Textbox(label="Text", value="你好，我是通义生成式语音大模型。"),
+        gr.Dropdown(choices=spk_list, value=spk_list[0], label="Speaker"),
     ],
     outputs=gr.Audio(label="Audio"),
 )
+demo.launch(server_name="0.0.0.0", server_port=7860)