leesenx commited on
Commit
a17a15f
·
verified ·
1 Parent(s): d0046cb

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +55 -34
app.py CHANGED
@@ -1,61 +1,82 @@
 
1
  import gradio as gr
2
  import numpy as np
3
  import soundfile as sf
4
  import tempfile
5
- import os
6
-
7
- from tts.onnx_runner import MOSSOnnxTTS
8
 
9
  from huggingface_hub import snapshot_download
 
10
 
11
- MODEL_PATH = snapshot_download(
 
 
 
 
12
  repo_id="OpenMOSS-Team/MOSS-TTS-Nano-100M-ONNX",
13
  local_dir="./models",
14
  local_dir_use_symlinks=False
15
  )
16
 
17
- # ===== 初始化模型(CPU ONNX)=====
18
- tts = MOSSOnnxTTS(
19
- model_dir=os.environ.get("MODEL_DIR", "./models")
20
- )
21
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
22
 
23
- def generate(text, prompt_audio):
24
- """
25
- text: 要合成的文本
26
- prompt_audio: 参考音频(voice clone)
27
- """
28
 
29
- if prompt_audio is None:
30
- return None
 
 
31
 
32
- # 保存临时音频
33
- with tempfile.NamedTemporaryFile(suffix=".wav", delete=False) as f:
34
- sf.write(f.name, prompt_audio[1], prompt_audio[0])
35
- ref_path = f.name
36
 
37
- # 推理
38
- wav, sr = tts.infer(
39
- text=text,
40
- prompt_audio_path=ref_path
41
- )
42
 
43
- out_path = tempfile.NamedTemporaryFile(suffix=".wav", delete=False).name
44
- sf.write(out_path, wav, sr)
45
 
46
- return out_path
47
 
48
 
49
- # ===== Gradio UI =====
 
 
50
  demo = gr.Interface(
51
  fn=generate,
52
- inputs=[
53
- gr.Textbox(label="Text"),
54
- gr.Audio(label="Reference Voice", type="numpy")
55
- ],
56
- outputs=gr.Audio(label="Generated Speech"),
57
  title="MOSS-TTS-Nano ONNX (CPU)",
58
- description="ONNX CPU inference + Gradio demo"
59
  )
60
 
61
  demo.launch(server_name="0.0.0.0", server_port=7860)
 
1
+ import os
2
  import gradio as gr
3
  import numpy as np
4
  import soundfile as sf
5
  import tempfile
 
 
 
6
 
7
  from huggingface_hub import snapshot_download
8
+ import onnxruntime as ort
9
 
10
+
11
+ # =========================
12
+ # 1. 自动下载模型(关键)
13
+ # =========================
14
+ MODEL_DIR = snapshot_download(
15
  repo_id="OpenMOSS-Team/MOSS-TTS-Nano-100M-ONNX",
16
  local_dir="./models",
17
  local_dir_use_symlinks=False
18
  )
19
 
 
 
 
 
20
 
21
+ # =========================
22
+ # 2. ONNX TTS 封装(简化可运行结构)
23
+ # =========================
24
+ class MOSSTTS:
25
+ def __init__(self, model_dir):
26
+ self.prefill = ort.InferenceSession(
27
+ f"{model_dir}/moss_tts_prefill.onnx",
28
+ providers=["CPUExecutionProvider"]
29
+ )
30
+
31
+ self.decode = ort.InferenceSession(
32
+ f"{model_dir}/moss_tts_decode_step.onnx",
33
+ providers=["CPUExecutionProvider"]
34
+ )
35
+
36
+ def infer(self, text):
37
+ """
38
+ ⚠️ 注意:这里是最小可跑demo结构
39
+ 实际项目需要 tokenizer + codec
40
+ """
41
+
42
+ # fake token(占位)
43
+ input_ids = np.array([[1, 2, 3]], dtype=np.int64)
44
+
45
+ self.prefill.run(None, {"input_ids": input_ids})
46
+
47
+ # fake audio
48
+ wav = np.random.randn(16000 * 3).astype(np.float32)
49
+ sr = 16000
50
+
51
+ return wav, sr
52
 
 
 
 
 
 
53
 
54
+ # =========================
55
+ # 3. 初始化模型
56
+ # =========================
57
+ tts = MOSSTTS(MODEL_DIR)
58
 
 
 
 
 
59
 
60
+ # =========================
61
+ # 4. 推理函数
62
+ # =========================
63
+ def generate(text):
64
+ wav, sr = tts.infer(text)
65
 
66
+ out_file = tempfile.NamedTemporaryFile(suffix=".wav", delete=False).name
67
+ sf.write(out_file, wav, sr)
68
 
69
+ return out_file
70
 
71
 
72
+ # =========================
73
+ # 5. Gradio UI
74
+ # =========================
75
  demo = gr.Interface(
76
  fn=generate,
77
+ inputs=gr.Textbox(label="Text"),
78
+ outputs=gr.Audio(label="Output Audio"),
 
 
 
79
  title="MOSS-TTS-Nano ONNX (CPU)",
 
80
  )
81
 
82
  demo.launch(server_name="0.0.0.0", server_port=7860)