jerrybwang commited on
Commit ·
2bc8444
1
Parent(s): bd60378
33
Browse files
app.py
CHANGED
|
@@ -33,147 +33,122 @@ def load_cosyvoice_model():
|
|
| 33 |
print("="*60)
|
| 34 |
|
| 35 |
try:
|
| 36 |
-
# 方法1:
|
| 37 |
-
print("\n
|
| 38 |
-
|
| 39 |
-
cosyvoice_repo_path = Path("./CosyVoice")
|
| 40 |
-
if not cosyvoice_repo_path.exists():
|
| 41 |
-
print("正在克隆CosyVoice源码仓库...")
|
| 42 |
-
import subprocess
|
| 43 |
-
result = subprocess.run(
|
| 44 |
-
["git", "clone", "--depth", "1", "https://github.com/FunAudioLLM/CosyVoice.git"],
|
| 45 |
-
capture_output=True,
|
| 46 |
-
text=True
|
| 47 |
-
)
|
| 48 |
-
if result.returncode != 0:
|
| 49 |
-
print(f"⚠ 克隆失败: {result.stderr}")
|
| 50 |
-
raise Exception("无法克隆CosyVoice仓库")
|
| 51 |
-
print("✓ CosyVoice源码克隆成功")
|
| 52 |
-
else:
|
| 53 |
-
print("✓ CosyVoice源码已存在")
|
| 54 |
-
|
| 55 |
-
# 添加路径到sys.path
|
| 56 |
-
cosyvoice_path = str(cosyvoice_repo_path.absolute())
|
| 57 |
-
matcha_tts_path = str((cosyvoice_repo_path / "third_party" / "Matcha-TTS").absolute())
|
| 58 |
-
|
| 59 |
-
if cosyvoice_path not in sys.path:
|
| 60 |
-
sys.path.insert(0, cosyvoice_path)
|
| 61 |
-
if matcha_tts_path not in sys.path:
|
| 62 |
-
sys.path.insert(0, matcha_tts_path)
|
| 63 |
-
|
| 64 |
-
print(f"✓ 已添加路径: {cosyvoice_path}")
|
| 65 |
-
print(f"✓ 已添加路径: {matcha_tts_path}")
|
| 66 |
-
|
| 67 |
-
# 方法1.1: 使用官方AutoModel加载
|
| 68 |
try:
|
| 69 |
-
|
| 70 |
-
|
|
|
|
|
|
|
| 71 |
|
| 72 |
-
|
| 73 |
-
print("正在下载预训练模型...")
|
| 74 |
-
from huggingface_hub import snapshot_download
|
| 75 |
|
|
|
|
| 76 |
model_name = "FunAudioLLM/CosyVoice-300M"
|
| 77 |
-
|
| 78 |
-
repo_id=model_name,
|
| 79 |
-
allow_patterns=["*.pt", "*.pth", "*.bin", "*.json", "*.yaml", "*.txt", "*.safetensors"],
|
| 80 |
-
ignore_patterns=["*.md", "*.gitattributes"]
|
| 81 |
-
)
|
| 82 |
-
print(f"✓ 模型文件已下载到: {model_dir}")
|
| 83 |
|
| 84 |
-
#
|
| 85 |
-
|
| 86 |
-
|
| 87 |
-
|
|
|
|
|
|
|
| 88 |
|
| 89 |
cosyvoice_model = {
|
| 90 |
-
'model':
|
| 91 |
-
'type': '
|
| 92 |
'has_inference': True,
|
| 93 |
-
'sample_rate': getattr(
|
| 94 |
}
|
| 95 |
model_loaded = True
|
| 96 |
-
print("✓ 成功加载
|
| 97 |
print("="*60 + "\n")
|
| 98 |
return cosyvoice_model
|
| 99 |
|
| 100 |
-
except
|
| 101 |
-
print(f"⚠
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 102 |
import traceback
|
| 103 |
traceback.print_exc()
|
| 104 |
|
| 105 |
-
|
| 106 |
-
|
| 107 |
-
print("\n备用方案: 尝试直接加载PyTorch模型文件...")
|
| 108 |
from huggingface_hub import snapshot_download
|
| 109 |
-
import glob
|
| 110 |
|
| 111 |
model_name = "FunAudioLLM/CosyVoice-300M"
|
| 112 |
model_dir = snapshot_download(
|
| 113 |
repo_id=model_name,
|
| 114 |
-
allow_patterns=["*.pt", "*.pth", "*.bin", "*.json", "*.yaml", "*.txt"],
|
| 115 |
-
|
| 116 |
)
|
| 117 |
|
| 118 |
-
|
| 119 |
-
|
| 120 |
-
|
| 121 |
-
|
|
|
|
|
|
|
| 122 |
|
| 123 |
-
|
| 124 |
-
|
| 125 |
-
|
| 126 |
-
|
| 127 |
-
# 检查模型是否可用
|
| 128 |
-
if isinstance(model, dict):
|
| 129 |
-
print("✓ PyTorch模型字典加载成功")
|
| 130 |
-
cosyvoice_model = {
|
| 131 |
-
'model': model,
|
| 132 |
-
'type': 'pytorch_dict',
|
| 133 |
-
'model_dir': model_dir,
|
| 134 |
-
'has_inference': False
|
| 135 |
-
}
|
| 136 |
-
else:
|
| 137 |
-
if hasattr(model, 'eval'):
|
| 138 |
-
model.eval()
|
| 139 |
-
print(f"✓ PyTorch模型加载成功: {type(model).__name__}")
|
| 140 |
-
cosyvoice_model = {
|
| 141 |
-
'model': model,
|
| 142 |
-
'type': 'pytorch',
|
| 143 |
-
'model_dir': model_dir,
|
| 144 |
-
'has_inference': False
|
| 145 |
-
}
|
| 146 |
-
|
| 147 |
-
model_loaded = True
|
| 148 |
-
print("✓ 成功加载CosyVoice模型 (PyTorch)")
|
| 149 |
-
print("="*60 + "\n")
|
| 150 |
-
return cosyvoice_model
|
| 151 |
-
else:
|
| 152 |
-
print("⚠ 未找到模型权重文件")
|
| 153 |
-
# 模型文件已下载,但没有找到权重文件
|
| 154 |
-
cosyvoice_model = {
|
| 155 |
-
'model': None,
|
| 156 |
-
'type': 'downloaded',
|
| 157 |
-
'model_dir': model_dir,
|
| 158 |
-
'has_inference': False
|
| 159 |
-
}
|
| 160 |
-
model_loaded = True
|
| 161 |
-
print("✓ 模型文件已下载(但未找到权重文件)")
|
| 162 |
-
print("="*60 + "\n")
|
| 163 |
-
return cosyvoice_model
|
| 164 |
|
| 165 |
except Exception as e:
|
| 166 |
print(f"✗ 模型加载失败: {e}")
|
| 167 |
import traceback
|
| 168 |
print(f"详细错误:\n{traceback.format_exc()}")
|
| 169 |
|
| 170 |
-
# 演示模式(加载失败)
|
| 171 |
print("\n⚠ 使用演示模式")
|
| 172 |
-
print("提示: 要使用完整功能,请
|
| 173 |
-
print(" 1. 网络连接正常
|
| 174 |
-
print(" 2. 有足够的磁盘空间(约2GB)")
|
| 175 |
-
print(" 3.
|
| 176 |
-
print(" 4. (可选) 安装CosyVoice官方包: pip install cosyvoice")
|
| 177 |
print("="*60 + "\n")
|
| 178 |
|
| 179 |
cosyvoice_model = None
|
|
@@ -258,7 +233,7 @@ def text_to_speech(text, speaker="中文女", prompt_audio=None, prompt_text=Non
|
|
| 258 |
model_type = model.get('type', 'unknown')
|
| 259 |
|
| 260 |
# 官方 CosyVoice AutoModel
|
| 261 |
-
if model_type == '
|
| 262 |
cosyvoice = model['model']
|
| 263 |
sample_rate = model.get('sample_rate', 22050)
|
| 264 |
|
|
|
|
| 33 |
print("="*60)
|
| 34 |
|
| 35 |
try:
|
| 36 |
+
# 方法1: 尝试使用官方 CosyVoice 包
|
| 37 |
+
print("\n尝试使用官方 CosyVoice 包...")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 38 |
try:
|
| 39 |
+
# 添加 third_party 路径(如果存在)
|
| 40 |
+
third_party_path = os.path.join(os.path.dirname(__file__), 'third_party', 'Matcha-TTS')
|
| 41 |
+
if os.path.exists(third_party_path):
|
| 42 |
+
sys.path.insert(0, third_party_path)
|
| 43 |
|
| 44 |
+
from cosyvoice.cli.cosyvoice import CosyVoice
|
|
|
|
|
|
|
| 45 |
|
| 46 |
+
# 尝试从 Hugging Face Hub 加载
|
| 47 |
model_name = "FunAudioLLM/CosyVoice-300M"
|
| 48 |
+
print(f"从 {model_name} 加载...")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 49 |
|
| 50 |
+
# 下载模型到本地
|
| 51 |
+
from huggingface_hub import snapshot_download
|
| 52 |
+
model_dir = snapshot_download(repo_id=model_name, cache_dir="./models")
|
| 53 |
+
|
| 54 |
+
# 使用 CosyVoice 加载
|
| 55 |
+
cosyvoice = CosyVoice(model_dir=model_dir)
|
| 56 |
|
| 57 |
cosyvoice_model = {
|
| 58 |
+
'model': cosyvoice,
|
| 59 |
+
'type': 'cosyvoice_official',
|
| 60 |
'has_inference': True,
|
| 61 |
+
'sample_rate': getattr(cosyvoice, 'sample_rate', 22050)
|
| 62 |
}
|
| 63 |
model_loaded = True
|
| 64 |
+
print("✓ 成功使用官方 CosyVoice 包加载模型")
|
| 65 |
print("="*60 + "\n")
|
| 66 |
return cosyvoice_model
|
| 67 |
|
| 68 |
+
except ImportError as ie:
|
| 69 |
+
print(f"⚠ 官方 CosyVoice 包不可用: {ie}")
|
| 70 |
+
print(" 尝试其他加载方式...")
|
| 71 |
+
|
| 72 |
+
# 方法2: 尝试使用 transformers AutoModel(需要 trust_remote_code)
|
| 73 |
+
print("\n尝试使用 transformers AutoModel...")
|
| 74 |
+
try:
|
| 75 |
+
from transformers import AutoModel
|
| 76 |
+
|
| 77 |
+
model_name = "FunAudioLLM/CosyVoice-300M"
|
| 78 |
+
print(f"从 {model_name} 加载...")
|
| 79 |
+
|
| 80 |
+
# 使用 trust_remote_code=True 加载自定义模型
|
| 81 |
+
model = AutoModel.from_pretrained(
|
| 82 |
+
model_name,
|
| 83 |
+
trust_remote_code=True,
|
| 84 |
+
torch_dtype=torch.float32,
|
| 85 |
+
low_cpu_mem_usage=True
|
| 86 |
+
)
|
| 87 |
+
model.eval()
|
| 88 |
+
|
| 89 |
+
# 检查模型方法
|
| 90 |
+
has_inference_sft = hasattr(model, 'inference_sft')
|
| 91 |
+
has_inference_zero_shot = hasattr(model, 'inference_zero_shot')
|
| 92 |
+
has_inference_cross_lingual = hasattr(model, 'inference_cross_lingual')
|
| 93 |
+
|
| 94 |
+
print(f"模��类型: {type(model).__name__}")
|
| 95 |
+
print(f"推理方法:")
|
| 96 |
+
print(f" - inference_sft: {has_inference_sft}")
|
| 97 |
+
print(f" - inference_zero_shot: {has_inference_zero_shot}")
|
| 98 |
+
print(f" - inference_cross_lingual: {has_inference_cross_lingual}")
|
| 99 |
+
|
| 100 |
+
if has_inference_sft or has_inference_zero_shot:
|
| 101 |
+
cosyvoice_model = {
|
| 102 |
+
'model': model,
|
| 103 |
+
'type': 'transformers',
|
| 104 |
+
'has_inference': True,
|
| 105 |
+
'sample_rate': getattr(model, 'sample_rate', 22050)
|
| 106 |
+
}
|
| 107 |
+
model_loaded = True
|
| 108 |
+
print("✓ 成功使用 transformers 加载模型")
|
| 109 |
+
print("="*60 + "\n")
|
| 110 |
+
return cosyvoice_model
|
| 111 |
+
else:
|
| 112 |
+
print("⚠ 模型缺少必要的推理方法")
|
| 113 |
+
raise ValueError("Model missing inference methods")
|
| 114 |
+
|
| 115 |
+
except Exception as te:
|
| 116 |
+
print(f"⚠ transformers 加载失败: {te}")
|
| 117 |
import traceback
|
| 118 |
traceback.print_exc()
|
| 119 |
|
| 120 |
+
# 方法3: 下载模型文件(演示模式)
|
| 121 |
+
print("\n尝试下载模型文件...")
|
|
|
|
| 122 |
from huggingface_hub import snapshot_download
|
|
|
|
| 123 |
|
| 124 |
model_name = "FunAudioLLM/CosyVoice-300M"
|
| 125 |
model_dir = snapshot_download(
|
| 126 |
repo_id=model_name,
|
| 127 |
+
allow_patterns=["*.pt", "*.pth", "*.bin", "*.json", "*.yaml", "*.txt", "*.safetensors"],
|
| 128 |
+
cache_dir="./models"
|
| 129 |
)
|
| 130 |
|
| 131 |
+
print(f"✓ 模型文件已下载到: {model_dir}")
|
| 132 |
+
print("\n⚠ 注意: 模型文件已下载,但无法加载推理引擎")
|
| 133 |
+
print(" 建议:")
|
| 134 |
+
print(" 1. 安装完整的 CosyVoice 包: pip install cosyvoice")
|
| 135 |
+
print(" 2. 或在 Hugging Face Space 中使用演示模式")
|
| 136 |
+
print("="*60 + "\n")
|
| 137 |
|
| 138 |
+
cosyvoice_model = None
|
| 139 |
+
model_loaded = True
|
| 140 |
+
return None
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 141 |
|
| 142 |
except Exception as e:
|
| 143 |
print(f"✗ 模型加载失败: {e}")
|
| 144 |
import traceback
|
| 145 |
print(f"详细错误:\n{traceback.format_exc()}")
|
| 146 |
|
|
|
|
| 147 |
print("\n⚠ 使用演示模式")
|
| 148 |
+
print("提示: 要使用完整功能,请:")
|
| 149 |
+
print(" 1. 确保网络连接正常")
|
| 150 |
+
print(" 2. 确保有足够的磁盘空间(约2GB)")
|
| 151 |
+
print(" 3. 安装 CosyVoice: pip install cosyvoice")
|
|
|
|
| 152 |
print("="*60 + "\n")
|
| 153 |
|
| 154 |
cosyvoice_model = None
|
|
|
|
| 233 |
model_type = model.get('type', 'unknown')
|
| 234 |
|
| 235 |
# 官方 CosyVoice AutoModel
|
| 236 |
+
if model_type == 'cosyvoice_official':
|
| 237 |
cosyvoice = model['model']
|
| 238 |
sample_rate = model.get('sample_rate', 22050)
|
| 239 |
|