simler commited on
Commit
aa9a7de
·
1 Parent(s): f7dfda2

Add multi-character support and god model

Browse files
app.py CHANGED
@@ -1,63 +1,97 @@
1
  import builtins
2
  import os
 
 
 
 
 
 
 
3
 
4
- # 🔴 核心:在所有 import 之前,必须先劫持 input,否则导入 genie_tts 时会因为 input 阻塞导致崩溃
5
  builtins.input = lambda prompt="": "y"
 
6
  os.environ["GENIE_DATA_DIR"] = "/app/GenieData"
7
 
8
- # 🔴 核心:在导入之前,必须先确保数据已下载
9
- from huggingface_hub import snapshot_download
10
  if not os.path.exists("/app/GenieData/G2P"):
11
  print("📦 Downloading GenieData Assets...")
12
  snapshot_download(repo_id="High-Logic/Genie", allow_patterns=["GenieData/*"], local_dir="/app", local_dir_use_symlinks=False)
13
 
14
- # 现在可以安全地 import 了
15
- import shutil
16
- import io
17
- import time
18
- import uvicorn
19
- from fastapi import FastAPI, UploadFile, File, Form, HTTPException
20
- from fastapi.responses import StreamingResponse
21
  import genie_tts
22
 
23
- # ---------------------------------------------------------
24
- # 服务器设置
25
- # ---------------------------------------------------------
26
  app = FastAPI()
27
 
28
- DEFAULT_REF_PATH = "/app/ref.wav"
29
- DEFAULT_REF_TEXT = "琴是个称职的好团长。看到她认真工作的样子,就连我也忍不住想要多帮她一把。"
 
 
 
 
 
30
 
31
- print("⚡ Starting Genie Engine...")
32
- genie_tts.load_character("Default", "/app", "zh")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
33
 
34
  @app.post("/upload_and_tts")
35
  async def upload_and_tts(
 
36
  prompt_text: str = Form(...),
37
  text: str = Form(...),
38
  language: str = Form("zh"),
39
  file: UploadFile = File(...)
40
  ):
 
 
 
41
  try:
42
- # 使用时间戳文件名强行刷新缓存
43
  ts = int(time.time() * 1000)
44
- save_path = f"/app/ref_{ts}.wav"
 
45
 
46
  with open(save_path, "wb") as buffer:
47
  shutil.copyfileobj(file.file, buffer)
48
 
49
- print(f"🔥 [Custom] Loaded new unique audio: {save_path}")
50
- genie_tts.set_reference_audio("Default", save_path, prompt_text, language)
51
 
52
- out_path = f"/app/out_{ts}.wav"
53
- genie_tts.tts("Default", text, save_path=out_path, play=False)
54
 
55
- # 定义生成器,在发送完后尝试清理临时文件(可选)
56
  def iterfile():
57
  with open(out_path, "rb") as f:
58
  yield from f
59
- # 这里的清理可以防止占用过多存储空间
60
- try: os.remove(save_path); os.remove(out_path)
 
61
  except: pass
62
 
63
  return StreamingResponse(iterfile(), media_type="audio/wav")
@@ -66,16 +100,33 @@ async def upload_and_tts(
66
  raise HTTPException(status_code=500, detail=str(e))
67
 
68
  @app.post("/tts")
69
- async def safe_tts(data: dict):
70
- print(f"🛡️ [Reset] Back to standard Qin voice.")
 
 
 
 
 
 
 
 
71
  try:
72
- genie_tts.set_reference_audio("Default", DEFAULT_REF_PATH, DEFAULT_REF_TEXT, "zh")
73
- out_path = "/app/out_std.wav"
74
- genie_tts.tts("Default", data.get("text", ""), save_path=out_path, play=False)
 
 
 
 
 
 
 
 
 
75
  return StreamingResponse(open(out_path, "rb"), media_type="audio/wav")
76
  except Exception as e:
77
- print(f"❌ Error in std tts: {e}")
78
- raise HTTPException(status_code=404, detail=str(e))
79
 
80
  if __name__ == "__main__":
81
- uvicorn.run(app, host="0.0.0.0", port=7860)
 
1
  import builtins
2
  import os
3
+ import shutil
4
+ import io
5
+ import time
6
+ import uvicorn
7
+ from fastapi import FastAPI, UploadFile, File, Form, HTTPException
8
+ from fastapi.responses import StreamingResponse
9
+ from huggingface_hub import snapshot_download
10
 
11
+ # 🔴 核心:在所有 import 之前,必须先劫持 input
12
  builtins.input = lambda prompt="": "y"
13
+ # 适配 Space 路径,本地运行时请确保此目录存在
14
  os.environ["GENIE_DATA_DIR"] = "/app/GenieData"
15
 
16
+ # 下载环境
 
17
  if not os.path.exists("/app/GenieData/G2P"):
18
  print("📦 Downloading GenieData Assets...")
19
  snapshot_download(repo_id="High-Logic/Genie", allow_patterns=["GenieData/*"], local_dir="/app", local_dir_use_symlinks=False)
20
 
 
 
 
 
 
 
 
21
  import genie_tts
22
 
 
 
 
23
  app = FastAPI()
24
 
25
+ # 角色模型存放根目录
26
+ MODELS_ROOT = "/app/models"
27
+ os.makedirs(MODELS_ROOT, exist_ok=True)
28
+
29
+ # 默认设置(加载 models/base 和 models/god)
30
+ genie_tts.load_character("Base", "/app/models/base", "zh")
31
+ genie_tts.load_character("god", "/app/models/god", "zh")
32
 
33
+ # 记录每个角色的默认参考音频
34
+ REF_CACHE = {
35
+ "Base": {
36
+ "path": "/app/models/base/ref.wav",
37
+ "text": "琴是个称职的好团长。看到她认真工作的样子,就连我也忍不住想要多帮她一把。",
38
+ "lang": "zh"
39
+ },
40
+ "god": {
41
+ "path": "/app/models/god/ref.wav",
42
+ "text": "很多人的一生,写于纸上也不过几行,大多都是些无聊的故事啊。",
43
+ "lang": "zh"
44
+ }
45
+ }
46
+
47
+ @app.post("/load_model")
48
+ async def load_model(character_name: str = Form(...), model_path: str = Form(...), language: str = Form("zh")):
49
+ """
50
+ 动态加载新模型 API
51
+ model_path: 相对于 /app 的路径,例如 "models/my_character"
52
+ """
53
+ full_path = os.path.join("/app", model_path)
54
+ if not os.path.exists(full_path):
55
+ raise HTTPException(status_code=404, detail=f"Model path not found: {full_path}")
56
+
57
+ try:
58
+ print(f"📦 Loading character: {character_name} from {full_path}")
59
+ genie_tts.load_character(character_name, full_path, language)
60
+ return {"status": "success", "message": f"Character '{character_name}' loaded."}
61
+ except Exception as e:
62
+ raise HTTPException(status_code=500, detail=str(e))
63
 
64
  @app.post("/upload_and_tts")
65
  async def upload_and_tts(
66
+ character_name: str = Form("Default"),
67
  prompt_text: str = Form(...),
68
  text: str = Form(...),
69
  language: str = Form("zh"),
70
  file: UploadFile = File(...)
71
  ):
72
+ """
73
+ 上传临时参考音频并生成语音
74
+ """
75
  try:
 
76
  ts = int(time.time() * 1000)
77
+ save_path = f"/tmp/ref_{ts}.wav"
78
+ os.makedirs("/tmp", exist_ok=True)
79
 
80
  with open(save_path, "wb") as buffer:
81
  shutil.copyfileobj(file.file, buffer)
82
 
83
+ print(f"🔥 [Custom] Using temp audio for {character_name}: {save_path}")
84
+ genie_tts.set_reference_audio(character_name, save_path, prompt_text, language)
85
 
86
+ out_path = f"/tmp/out_{ts}.wav"
87
+ genie_tts.tts(character_name, text, save_path=out_path, play=False)
88
 
 
89
  def iterfile():
90
  with open(out_path, "rb") as f:
91
  yield from f
92
+ try:
93
+ os.remove(save_path)
94
+ os.remove(out_path)
95
  except: pass
96
 
97
  return StreamingResponse(iterfile(), media_type="audio/wav")
 
100
  raise HTTPException(status_code=500, detail=str(e))
101
 
102
  @app.post("/tts")
103
+ async def dynamic_tts(
104
+ text: str = Form(...),
105
+ character_name: str = Form("Default"),
106
+ prompt_text: str = Form(None),
107
+ prompt_lang: str = Form("zh"),
108
+ use_default_ref: bool = Form(True)
109
+ ):
110
+ """
111
+ 通用 TTS 接口,支持切换已加载的角色
112
+ """
113
  try:
114
+ # 如果提供了 prompt_text 且不是用默认参考,则尝试更新该角色的参考(假设已经有 ref.wav 在该角色目录下)
115
+ # 这里为了简化,如果没传特定音频,就用 REF_CACHE 里的
116
+ ref_info = REF_CACHE.get(character_name, REF_CACHE["Default"])
117
+
118
+ # 允许通过 API 动态覆盖当前参考文本(不换音频文件)
119
+ final_text = prompt_text if prompt_text else ref_info["text"]
120
+
121
+ genie_tts.set_reference_audio(character_name, ref_info["path"], final_text, prompt_lang)
122
+
123
+ out_path = f"/tmp/out_dyn_{int(time.time())}.wav"
124
+ genie_tts.tts(character_name, text, save_path=out_path, play=False)
125
+
126
  return StreamingResponse(open(out_path, "rb"), media_type="audio/wav")
127
  except Exception as e:
128
+ print(f"❌ Error: {e}")
129
+ raise HTTPException(status_code=500, detail=str(e))
130
 
131
  if __name__ == "__main__":
132
+ uvicorn.run(app, host="0.0.0.0", port=7860)
config.json → models/base/config.json RENAMED
@@ -1,6 +1,6 @@
1
  {
2
  "version": "2.0",
3
- "model_type": "GPT-SoVITS-V2",
4
  "gpt_path": "/app/t2s_stage_decoder_fp32.onnx",
5
  "sovits_path": "/app/vits_fp32.onnx",
6
  "first_stage_path": "/app/t2s_first_stage_decoder_fp32.onnx",
 
1
  {
2
  "version": "2.0",
3
+ "model_type": "GPT-SoVITS-V2ProPlus",
4
  "gpt_path": "/app/t2s_stage_decoder_fp32.onnx",
5
  "sovits_path": "/app/vits_fp32.onnx",
6
  "first_stage_path": "/app/t2s_first_stage_decoder_fp32.onnx",
prompt_encoder_fp16.bin → models/base/prompt_encoder_fp16.bin RENAMED
File without changes
prompt_encoder_fp32.onnx → models/base/prompt_encoder_fp32.onnx RENAMED
File without changes
models/base/prompt_wav.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "default": {
3
+ "wav_path": "ref.wav",
4
+ "prompt_text": "琴是个称职的好团长。看到她认真工作的样子,就连我也忍不住想要多帮她一把。",
5
+ "prompt_lang": "zh"
6
+ }
7
+ }
ref.wav → models/base/ref.wav RENAMED
File without changes
t2s_encoder_fp32.bin → models/base/t2s_encoder_fp32.bin RENAMED
File without changes
t2s_encoder_fp32.onnx → models/base/t2s_encoder_fp32.onnx RENAMED
File without changes
t2s_first_stage_decoder_fp32.onnx → models/base/t2s_first_stage_decoder_fp32.onnx RENAMED
File without changes
t2s_shared_fp16.bin → models/base/t2s_shared_fp16.bin RENAMED
File without changes
t2s_stage_decoder_fp32.onnx → models/base/t2s_stage_decoder_fp32.onnx RENAMED
File without changes
vits_fp16.bin → models/base/vits_fp16.bin RENAMED
File without changes
vits_fp32.onnx → models/base/vits_fp32.onnx RENAMED
File without changes
models/god/config.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "version": "2.0",
3
+ "model_type": "GPT-SoVITS-V2ProPlus",
4
+ "gpt_path": "./t2s_stage_decoder_fp32.onnx",
5
+ "sovits_path": "./vits_fp32.onnx",
6
+ "first_stage_path": "./t2s_first_stage_decoder_fp32.onnx",
7
+ "cnhubert_base": "/app/GenieData/chinese-hubert-base/chinese-hubert-base.onnx",
8
+ "bert": "/app/GenieData/chinese-roberta-wwm-ext-large/chinese-roberta-wwm-ext-large.onnx"
9
+ }
models/god/prompt_encoder_fp16.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:121a5877a97347e5969a175329fc62a06d53a73fb639f760c48b82bb32c40168
3
+ size 44262912
models/god/prompt_encoder_fp32.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c4a3d8c1e385a17aecb7bd9c5ede5707ba390f85c3eb49b388deeaaaf53d2748
3
+ size 44464
prompt_wav.json → models/god/prompt_wav.json RENAMED
@@ -1,7 +1,7 @@
1
  {
2
  "default": {
3
  "wav_path": "ref.wav",
4
- "prompt_text": "我今天就不写了!开干!烦死了哎呀干",
5
  "prompt_lang": "zh"
6
  }
7
  }
 
1
  {
2
  "default": {
3
  "wav_path": "ref.wav",
4
+ "prompt_text": "很多人的一生写于纸上也过几行,大多都是些无聊故事啊。",
5
  "prompt_lang": "zh"
6
  }
7
  }
models/god/ref.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:87fd8dccc7109220edee84f1748d55c6d3ea2e6429dc041159367895be6d47c7
3
+ size 84716
models/god/t2s_encoder_fp32.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:74af327b9cbd6f4cbc1f1137586b0cebcf360ffa141f75ad59b63b25db7c5eab
3
+ size 11465732
models/god/t2s_encoder_fp32.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f6eb1acd47c8e6d36b777886981a49122e8e070a5eb9888d458fb188dc139f75
3
+ size 14568
models/god/t2s_first_stage_decoder_fp32.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:868f395999508905128c5325c5db4f4b37b2e70e04d6e2719fec64cbb60ee7f9
3
+ size 416803
models/god/t2s_shared_fp16.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a01da3c9cbd46c82fcc7bbb1a07d3c7a2d4fcb0a234fdd7055397ad07682752a
3
+ size 153413634
models/god/t2s_stage_decoder_fp32.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3f02881c517423deb610f86d5441bd9825937c5069f3887cacefa1e9dc403b0d
3
+ size 417625
models/god/vits_fp16.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f21017aa6c0076a2bcd379fe591b70f3f8aec4f1e8c920f1ed40965d73a03da0
3
+ size 124345856
models/god/vits_fp32.onnx ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2f918e08a1bfecc568de4cc5dc96135cb8baf37a07f4eb4ec9258a4854fcd3f3
3
+ size 1611210