leesenx commited on
Commit
2841146
·
verified ·
1 Parent(s): 478cf8b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +16 -15
app.py CHANGED
@@ -1,33 +1,34 @@
1
- import os, sys, subprocess, numpy as np, gradio as gr
 
2
  from huggingface_hub import snapshot_download
3
 
4
  subprocess.run(["git", "clone", "--recursive", "https://github.com/FunAudioLLM/CosyVoice.git", "CosyVoice"], check=True)
5
  sys.path.insert(0, "CosyVoice/third_party/Matcha-TTS")
6
  sys.path.insert(0, "CosyVoice")
7
 
8
- cosyvoice = None
 
 
 
 
 
9
 
10
- def load_model():
11
- global cosyvoice
12
- if cosyvoice is not None:
13
- return cosyvoice
14
- model_dir = snapshot_download("FunAudioLLM/CosyVoice-300M-SFT", local_dir="pretrained_models/CosyVoice-300M-SFT")
15
- from cosyvoice.cli.cosyvoice import CosyVoice
16
- cosyvoice = CosyVoice(model_dir)
17
- return cosyvoice
18
 
19
  def tts(text, spk):
20
- model = load_model()
21
- for result in model.inference_sft(text, spk, stream=False):
22
  audio = result["tts_speech"].numpy().flatten()
23
- return (model.sample_rate, audio)
24
 
25
  demo = gr.Interface(
26
  fn=tts,
27
  inputs=[
28
  gr.Textbox(label="Text", value="你好,我是通义生成式语音大模型。"),
29
- gr.Textbox(label="Speaker", value="中文女"),
30
  ],
31
  outputs=gr.Audio(label="Audio"),
32
  )
33
- demo.launch()
 
1
+ import os, sys, subprocess, types, numpy as np, gradio as gr
2
+ import torch
3
  from huggingface_hub import snapshot_download
4
 
5
  subprocess.run(["git", "clone", "--recursive", "https://github.com/FunAudioLLM/CosyVoice.git", "CosyVoice"], check=True)
6
  sys.path.insert(0, "CosyVoice/third_party/Matcha-TTS")
7
  sys.path.insert(0, "CosyVoice")
8
 
9
+ # stub whisper: only used in _extract_speech_token which SFT mode never calls
10
+ w = types.ModuleType("whisper")
11
+ def _log_mel_spectrogram(*a, **kw):
12
+ return torch.zeros(1, 128, 100)
13
+ w.log_mel_spectrogram = _log_mel_spectrogram
14
+ sys.modules["whisper"] = w
15
 
16
+ model_dir = snapshot_download("FunAudioLLM/CosyVoice-300M-SFT", local_dir="pretrained_models/CosyVoice-300M-SFT")
17
+ from cosyvoice.cli.cosyvoice import CosyVoice
18
+ cosyvoice = CosyVoice(model_dir)
19
+ spk_list = cosyvoice.list_available_spks()
 
 
 
 
20
 
21
  def tts(text, spk):
22
+ for result in cosyvoice.inference_sft(text, spk, stream=False):
 
23
  audio = result["tts_speech"].numpy().flatten()
24
+ return (cosyvoice.sample_rate, audio)
25
 
26
  demo = gr.Interface(
27
  fn=tts,
28
  inputs=[
29
  gr.Textbox(label="Text", value="你好,我是通义生成式语音大模型。"),
30
+ gr.Dropdown(choices=spk_list, value=spk_list[0], label="Speaker"),
31
  ],
32
  outputs=gr.Audio(label="Audio"),
33
  )
34
+ demo.launch(server_name="0.0.0.0", server_port=7860)