Add multi-character support and god model
Browse files- app.py +85 -34
- config.json → models/base/config.json +1 -1
- prompt_encoder_fp16.bin → models/base/prompt_encoder_fp16.bin +0 -0
- prompt_encoder_fp32.onnx → models/base/prompt_encoder_fp32.onnx +0 -0
- models/base/prompt_wav.json +7 -0
- ref.wav → models/base/ref.wav +0 -0
- t2s_encoder_fp32.bin → models/base/t2s_encoder_fp32.bin +0 -0
- t2s_encoder_fp32.onnx → models/base/t2s_encoder_fp32.onnx +0 -0
- t2s_first_stage_decoder_fp32.onnx → models/base/t2s_first_stage_decoder_fp32.onnx +0 -0
- t2s_shared_fp16.bin → models/base/t2s_shared_fp16.bin +0 -0
- t2s_stage_decoder_fp32.onnx → models/base/t2s_stage_decoder_fp32.onnx +0 -0
- vits_fp16.bin → models/base/vits_fp16.bin +0 -0
- vits_fp32.onnx → models/base/vits_fp32.onnx +0 -0
- models/god/config.json +9 -0
- models/god/prompt_encoder_fp16.bin +3 -0
- models/god/prompt_encoder_fp32.onnx +3 -0
- prompt_wav.json → models/god/prompt_wav.json +1 -1
- models/god/ref.wav +3 -0
- models/god/t2s_encoder_fp32.bin +3 -0
- models/god/t2s_encoder_fp32.onnx +3 -0
- models/god/t2s_first_stage_decoder_fp32.onnx +3 -0
- models/god/t2s_shared_fp16.bin +3 -0
- models/god/t2s_stage_decoder_fp32.onnx +3 -0
- models/god/vits_fp16.bin +3 -0
- models/god/vits_fp32.onnx +3 -0
app.py
CHANGED
|
@@ -1,63 +1,97 @@
|
|
| 1 |
import builtins
|
| 2 |
import os
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 3 |
|
| 4 |
-
# 🔴 核心:在所有 import 之前,必须先劫持 input
|
| 5 |
builtins.input = lambda prompt="": "y"
|
|
|
|
| 6 |
os.environ["GENIE_DATA_DIR"] = "/app/GenieData"
|
| 7 |
|
| 8 |
-
#
|
| 9 |
-
from huggingface_hub import snapshot_download
|
| 10 |
if not os.path.exists("/app/GenieData/G2P"):
|
| 11 |
print("📦 Downloading GenieData Assets...")
|
| 12 |
snapshot_download(repo_id="High-Logic/Genie", allow_patterns=["GenieData/*"], local_dir="/app", local_dir_use_symlinks=False)
|
| 13 |
|
| 14 |
-
# 现在可以安全地 import 了
|
| 15 |
-
import shutil
|
| 16 |
-
import io
|
| 17 |
-
import time
|
| 18 |
-
import uvicorn
|
| 19 |
-
from fastapi import FastAPI, UploadFile, File, Form, HTTPException
|
| 20 |
-
from fastapi.responses import StreamingResponse
|
| 21 |
import genie_tts
|
| 22 |
|
| 23 |
-
# ---------------------------------------------------------
|
| 24 |
-
# 服务器设置
|
| 25 |
-
# ---------------------------------------------------------
|
| 26 |
app = FastAPI()
|
| 27 |
|
| 28 |
-
|
| 29 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 30 |
|
| 31 |
-
|
| 32 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 33 |
|
| 34 |
@app.post("/upload_and_tts")
|
| 35 |
async def upload_and_tts(
|
|
|
|
| 36 |
prompt_text: str = Form(...),
|
| 37 |
text: str = Form(...),
|
| 38 |
language: str = Form("zh"),
|
| 39 |
file: UploadFile = File(...)
|
| 40 |
):
|
|
|
|
|
|
|
|
|
|
| 41 |
try:
|
| 42 |
-
# 使用时间戳文件名强行刷新缓存
|
| 43 |
ts = int(time.time() * 1000)
|
| 44 |
-
save_path = f"/
|
|
|
|
| 45 |
|
| 46 |
with open(save_path, "wb") as buffer:
|
| 47 |
shutil.copyfileobj(file.file, buffer)
|
| 48 |
|
| 49 |
-
print(f"🔥 [Custom]
|
| 50 |
-
genie_tts.set_reference_audio(
|
| 51 |
|
| 52 |
-
out_path = f"/
|
| 53 |
-
genie_tts.tts(
|
| 54 |
|
| 55 |
-
# 定义生成器,在发送完后尝试清理临时文件(可选)
|
| 56 |
def iterfile():
|
| 57 |
with open(out_path, "rb") as f:
|
| 58 |
yield from f
|
| 59 |
-
|
| 60 |
-
|
|
|
|
| 61 |
except: pass
|
| 62 |
|
| 63 |
return StreamingResponse(iterfile(), media_type="audio/wav")
|
|
@@ -66,16 +100,33 @@ async def upload_and_tts(
|
|
| 66 |
raise HTTPException(status_code=500, detail=str(e))
|
| 67 |
|
| 68 |
@app.post("/tts")
|
| 69 |
-
async def
|
| 70 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 71 |
try:
|
| 72 |
-
|
| 73 |
-
|
| 74 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 75 |
return StreamingResponse(open(out_path, "rb"), media_type="audio/wav")
|
| 76 |
except Exception as e:
|
| 77 |
-
print(f"❌ Error
|
| 78 |
-
raise HTTPException(status_code=
|
| 79 |
|
| 80 |
if __name__ == "__main__":
|
| 81 |
-
uvicorn.run(app, host="0.0.0.0", port=7860)
|
|
|
|
| 1 |
import builtins
|
| 2 |
import os
|
| 3 |
+
import shutil
|
| 4 |
+
import io
|
| 5 |
+
import time
|
| 6 |
+
import uvicorn
|
| 7 |
+
from fastapi import FastAPI, UploadFile, File, Form, HTTPException
|
| 8 |
+
from fastapi.responses import StreamingResponse
|
| 9 |
+
from huggingface_hub import snapshot_download
|
| 10 |
|
| 11 |
+
# 🔴 核心:在所有 import 之前,必须先劫持 input
|
| 12 |
builtins.input = lambda prompt="": "y"
|
| 13 |
+
# 适配 Space 路径,本地运行时请确保此目录存在
|
| 14 |
os.environ["GENIE_DATA_DIR"] = "/app/GenieData"
|
| 15 |
|
| 16 |
+
# 下载环境
|
|
|
|
| 17 |
if not os.path.exists("/app/GenieData/G2P"):
|
| 18 |
print("📦 Downloading GenieData Assets...")
|
| 19 |
snapshot_download(repo_id="High-Logic/Genie", allow_patterns=["GenieData/*"], local_dir="/app", local_dir_use_symlinks=False)
|
| 20 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 21 |
import genie_tts
|
| 22 |
|
|
|
|
|
|
|
|
|
|
| 23 |
app = FastAPI()
|
| 24 |
|
| 25 |
+
# 角色模型存放根目录
|
| 26 |
+
MODELS_ROOT = "/app/models"
|
| 27 |
+
os.makedirs(MODELS_ROOT, exist_ok=True)
|
| 28 |
+
|
| 29 |
+
# 默认设置(加载 models/base 和 models/god)
|
| 30 |
+
genie_tts.load_character("Base", "/app/models/base", "zh")
|
| 31 |
+
genie_tts.load_character("god", "/app/models/god", "zh")
|
| 32 |
|
| 33 |
+
# 记录每个角色的默认参考音频
|
| 34 |
+
REF_CACHE = {
|
| 35 |
+
"Base": {
|
| 36 |
+
"path": "/app/models/base/ref.wav",
|
| 37 |
+
"text": "琴是个称职的好团长。看到她认真工作的样子,就连我也忍不住想要多帮她一把。",
|
| 38 |
+
"lang": "zh"
|
| 39 |
+
},
|
| 40 |
+
"god": {
|
| 41 |
+
"path": "/app/models/god/ref.wav",
|
| 42 |
+
"text": "很多人的一生,写于纸上也不过几行,大多都是些无聊的故事啊。",
|
| 43 |
+
"lang": "zh"
|
| 44 |
+
}
|
| 45 |
+
}
|
| 46 |
+
|
| 47 |
+
@app.post("/load_model")
|
| 48 |
+
async def load_model(character_name: str = Form(...), model_path: str = Form(...), language: str = Form("zh")):
|
| 49 |
+
"""
|
| 50 |
+
动态加载新模型 API
|
| 51 |
+
model_path: 相对于 /app 的路径,例如 "models/my_character"
|
| 52 |
+
"""
|
| 53 |
+
full_path = os.path.join("/app", model_path)
|
| 54 |
+
if not os.path.exists(full_path):
|
| 55 |
+
raise HTTPException(status_code=404, detail=f"Model path not found: {full_path}")
|
| 56 |
+
|
| 57 |
+
try:
|
| 58 |
+
print(f"📦 Loading character: {character_name} from {full_path}")
|
| 59 |
+
genie_tts.load_character(character_name, full_path, language)
|
| 60 |
+
return {"status": "success", "message": f"Character '{character_name}' loaded."}
|
| 61 |
+
except Exception as e:
|
| 62 |
+
raise HTTPException(status_code=500, detail=str(e))
|
| 63 |
|
| 64 |
@app.post("/upload_and_tts")
|
| 65 |
async def upload_and_tts(
|
| 66 |
+
character_name: str = Form("Default"),
|
| 67 |
prompt_text: str = Form(...),
|
| 68 |
text: str = Form(...),
|
| 69 |
language: str = Form("zh"),
|
| 70 |
file: UploadFile = File(...)
|
| 71 |
):
|
| 72 |
+
"""
|
| 73 |
+
上传临时参考音频并生成语音
|
| 74 |
+
"""
|
| 75 |
try:
|
|
|
|
| 76 |
ts = int(time.time() * 1000)
|
| 77 |
+
save_path = f"/tmp/ref_{ts}.wav"
|
| 78 |
+
os.makedirs("/tmp", exist_ok=True)
|
| 79 |
|
| 80 |
with open(save_path, "wb") as buffer:
|
| 81 |
shutil.copyfileobj(file.file, buffer)
|
| 82 |
|
| 83 |
+
print(f"🔥 [Custom] Using temp audio for {character_name}: {save_path}")
|
| 84 |
+
genie_tts.set_reference_audio(character_name, save_path, prompt_text, language)
|
| 85 |
|
| 86 |
+
out_path = f"/tmp/out_{ts}.wav"
|
| 87 |
+
genie_tts.tts(character_name, text, save_path=out_path, play=False)
|
| 88 |
|
|
|
|
| 89 |
def iterfile():
|
| 90 |
with open(out_path, "rb") as f:
|
| 91 |
yield from f
|
| 92 |
+
try:
|
| 93 |
+
os.remove(save_path)
|
| 94 |
+
os.remove(out_path)
|
| 95 |
except: pass
|
| 96 |
|
| 97 |
return StreamingResponse(iterfile(), media_type="audio/wav")
|
|
|
|
| 100 |
raise HTTPException(status_code=500, detail=str(e))
|
| 101 |
|
| 102 |
@app.post("/tts")
|
| 103 |
+
async def dynamic_tts(
|
| 104 |
+
text: str = Form(...),
|
| 105 |
+
character_name: str = Form("Default"),
|
| 106 |
+
prompt_text: str = Form(None),
|
| 107 |
+
prompt_lang: str = Form("zh"),
|
| 108 |
+
use_default_ref: bool = Form(True)
|
| 109 |
+
):
|
| 110 |
+
"""
|
| 111 |
+
通用 TTS 接口,支持切换已加载的角色
|
| 112 |
+
"""
|
| 113 |
try:
|
| 114 |
+
# 如果提供了 prompt_text 且不是用默认参考,则尝试更新该角色的参考(假设已经有 ref.wav 在该角色目录下)
|
| 115 |
+
# 这里为了简化,如果没传特定音频,就用 REF_CACHE 里的
|
| 116 |
+
ref_info = REF_CACHE.get(character_name, REF_CACHE["Default"])
|
| 117 |
+
|
| 118 |
+
# 允许通过 API 动态覆盖当前参考文本(不换音频文件)
|
| 119 |
+
final_text = prompt_text if prompt_text else ref_info["text"]
|
| 120 |
+
|
| 121 |
+
genie_tts.set_reference_audio(character_name, ref_info["path"], final_text, prompt_lang)
|
| 122 |
+
|
| 123 |
+
out_path = f"/tmp/out_dyn_{int(time.time())}.wav"
|
| 124 |
+
genie_tts.tts(character_name, text, save_path=out_path, play=False)
|
| 125 |
+
|
| 126 |
return StreamingResponse(open(out_path, "rb"), media_type="audio/wav")
|
| 127 |
except Exception as e:
|
| 128 |
+
print(f"❌ Error: {e}")
|
| 129 |
+
raise HTTPException(status_code=500, detail=str(e))
|
| 130 |
|
| 131 |
if __name__ == "__main__":
|
| 132 |
+
uvicorn.run(app, host="0.0.0.0", port=7860)
|
config.json → models/base/config.json
RENAMED
|
@@ -1,6 +1,6 @@
|
|
| 1 |
{
|
| 2 |
"version": "2.0",
|
| 3 |
-
"model_type": "GPT-SoVITS-
|
| 4 |
"gpt_path": "/app/t2s_stage_decoder_fp32.onnx",
|
| 5 |
"sovits_path": "/app/vits_fp32.onnx",
|
| 6 |
"first_stage_path": "/app/t2s_first_stage_decoder_fp32.onnx",
|
|
|
|
| 1 |
{
|
| 2 |
"version": "2.0",
|
| 3 |
+
"model_type": "GPT-SoVITS-V2ProPlus",
|
| 4 |
"gpt_path": "/app/t2s_stage_decoder_fp32.onnx",
|
| 5 |
"sovits_path": "/app/vits_fp32.onnx",
|
| 6 |
"first_stage_path": "/app/t2s_first_stage_decoder_fp32.onnx",
|
prompt_encoder_fp16.bin → models/base/prompt_encoder_fp16.bin
RENAMED
|
File without changes
|
prompt_encoder_fp32.onnx → models/base/prompt_encoder_fp32.onnx
RENAMED
|
File without changes
|
models/base/prompt_wav.json
ADDED
|
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"default": {
|
| 3 |
+
"wav_path": "ref.wav",
|
| 4 |
+
"prompt_text": "琴是个称职的好团长。看到她认真工作的样子,就连我也忍不住想要多帮她一把。",
|
| 5 |
+
"prompt_lang": "zh"
|
| 6 |
+
}
|
| 7 |
+
}
|
ref.wav → models/base/ref.wav
RENAMED
|
File without changes
|
t2s_encoder_fp32.bin → models/base/t2s_encoder_fp32.bin
RENAMED
|
File without changes
|
t2s_encoder_fp32.onnx → models/base/t2s_encoder_fp32.onnx
RENAMED
|
File without changes
|
t2s_first_stage_decoder_fp32.onnx → models/base/t2s_first_stage_decoder_fp32.onnx
RENAMED
|
File without changes
|
t2s_shared_fp16.bin → models/base/t2s_shared_fp16.bin
RENAMED
|
File without changes
|
t2s_stage_decoder_fp32.onnx → models/base/t2s_stage_decoder_fp32.onnx
RENAMED
|
File without changes
|
vits_fp16.bin → models/base/vits_fp16.bin
RENAMED
|
File without changes
|
vits_fp32.onnx → models/base/vits_fp32.onnx
RENAMED
|
File without changes
|
models/god/config.json
ADDED
|
@@ -0,0 +1,9 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
{
|
| 2 |
+
"version": "2.0",
|
| 3 |
+
"model_type": "GPT-SoVITS-V2ProPlus",
|
| 4 |
+
"gpt_path": "./t2s_stage_decoder_fp32.onnx",
|
| 5 |
+
"sovits_path": "./vits_fp32.onnx",
|
| 6 |
+
"first_stage_path": "./t2s_first_stage_decoder_fp32.onnx",
|
| 7 |
+
"cnhubert_base": "/app/GenieData/chinese-hubert-base/chinese-hubert-base.onnx",
|
| 8 |
+
"bert": "/app/GenieData/chinese-roberta-wwm-ext-large/chinese-roberta-wwm-ext-large.onnx"
|
| 9 |
+
}
|
models/god/prompt_encoder_fp16.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:121a5877a97347e5969a175329fc62a06d53a73fb639f760c48b82bb32c40168
|
| 3 |
+
size 44262912
|
models/god/prompt_encoder_fp32.onnx
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:c4a3d8c1e385a17aecb7bd9c5ede5707ba390f85c3eb49b388deeaaaf53d2748
|
| 3 |
+
size 44464
|
prompt_wav.json → models/god/prompt_wav.json
RENAMED
|
@@ -1,7 +1,7 @@
|
|
| 1 |
{
|
| 2 |
"default": {
|
| 3 |
"wav_path": "ref.wav",
|
| 4 |
-
"prompt_text": "
|
| 5 |
"prompt_lang": "zh"
|
| 6 |
}
|
| 7 |
}
|
|
|
|
| 1 |
{
|
| 2 |
"default": {
|
| 3 |
"wav_path": "ref.wav",
|
| 4 |
+
"prompt_text": "很多人的一生,写于纸上也不过几行,大多都是些无聊的故事啊。",
|
| 5 |
"prompt_lang": "zh"
|
| 6 |
}
|
| 7 |
}
|
models/god/ref.wav
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:87fd8dccc7109220edee84f1748d55c6d3ea2e6429dc041159367895be6d47c7
|
| 3 |
+
size 84716
|
models/god/t2s_encoder_fp32.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:74af327b9cbd6f4cbc1f1137586b0cebcf360ffa141f75ad59b63b25db7c5eab
|
| 3 |
+
size 11465732
|
models/god/t2s_encoder_fp32.onnx
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f6eb1acd47c8e6d36b777886981a49122e8e070a5eb9888d458fb188dc139f75
|
| 3 |
+
size 14568
|
models/god/t2s_first_stage_decoder_fp32.onnx
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:868f395999508905128c5325c5db4f4b37b2e70e04d6e2719fec64cbb60ee7f9
|
| 3 |
+
size 416803
|
models/god/t2s_shared_fp16.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:a01da3c9cbd46c82fcc7bbb1a07d3c7a2d4fcb0a234fdd7055397ad07682752a
|
| 3 |
+
size 153413634
|
models/god/t2s_stage_decoder_fp32.onnx
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:3f02881c517423deb610f86d5441bd9825937c5069f3887cacefa1e9dc403b0d
|
| 3 |
+
size 417625
|
models/god/vits_fp16.bin
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:f21017aa6c0076a2bcd379fe591b70f3f8aec4f1e8c920f1ed40965d73a03da0
|
| 3 |
+
size 124345856
|
models/god/vits_fp32.onnx
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:2f918e08a1bfecc568de4cc5dc96135cb8baf37a07f4eb4ec9258a4854fcd3f3
|
| 3 |
+
size 1611210
|