jerrybwang commited on
Commit
2bc8444
·
1 Parent(s): bd60378
Files changed (1) hide show
  1. app.py +86 -111
app.py CHANGED
@@ -33,147 +33,122 @@ def load_cosyvoice_model():
33
  print("="*60)
34
 
35
  try:
36
- # 方法1: 克隆CosyVoice源码仓库并使用官方加载方式
37
- print("\n步骤1: 检查CosyVoice源码...")
38
-
39
- cosyvoice_repo_path = Path("./CosyVoice")
40
- if not cosyvoice_repo_path.exists():
41
- print("正在克隆CosyVoice源码仓库...")
42
- import subprocess
43
- result = subprocess.run(
44
- ["git", "clone", "--depth", "1", "https://github.com/FunAudioLLM/CosyVoice.git"],
45
- capture_output=True,
46
- text=True
47
- )
48
- if result.returncode != 0:
49
- print(f"⚠ 克隆失败: {result.stderr}")
50
- raise Exception("无法克隆CosyVoice仓库")
51
- print("✓ CosyVoice源码克隆成功")
52
- else:
53
- print("✓ CosyVoice源码已存在")
54
-
55
- # 添加路径到sys.path
56
- cosyvoice_path = str(cosyvoice_repo_path.absolute())
57
- matcha_tts_path = str((cosyvoice_repo_path / "third_party" / "Matcha-TTS").absolute())
58
-
59
- if cosyvoice_path not in sys.path:
60
- sys.path.insert(0, cosyvoice_path)
61
- if matcha_tts_path not in sys.path:
62
- sys.path.insert(0, matcha_tts_path)
63
-
64
- print(f"✓ 已添加路径: {cosyvoice_path}")
65
- print(f"✓ 已添加路径: {matcha_tts_path}")
66
-
67
- # 方法1.1: 使用官方AutoModel加载
68
  try:
69
- print("\n步骤2: 使用CosyVoice官方AutoModel载...")
70
- from cosyvoice.cli.cosyvoice import AutoModel
 
 
71
 
72
- # 下载预训练模型
73
- print("正在下载预训练模型...")
74
- from huggingface_hub import snapshot_download
75
 
 
76
  model_name = "FunAudioLLM/CosyVoice-300M"
77
- model_dir = snapshot_download(
78
- repo_id=model_name,
79
- allow_patterns=["*.pt", "*.pth", "*.bin", "*.json", "*.yaml", "*.txt", "*.safetensors"],
80
- ignore_patterns=["*.md", "*.gitattributes"]
81
- )
82
- print(f"✓ 模型文件已下载到: {model_dir}")
83
 
84
- # 使用AutoModel加
85
- print(f"正在加载模型: {model_dir}")
86
- model = AutoModel(model_dir=model_dir)
87
- print("✓ 使用CosyVoice AutoModel加载成功")
 
 
88
 
89
  cosyvoice_model = {
90
- 'model': model,
91
- 'type': 'official',
92
  'has_inference': True,
93
- 'sample_rate': getattr(model, 'sample_rate', 22050)
94
  }
95
  model_loaded = True
96
- print("✓ 成功加载CosyVoice模型 (官方AutoModel)")
97
  print("="*60 + "\n")
98
  return cosyvoice_model
99
 
100
- except Exception as e:
101
- print(f"⚠ AutoModel加载失败: {str(e)}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
102
  import traceback
103
  traceback.print_exc()
104
 
105
-
106
- # 方法1.2: 备用方案 - 尝试直接加PyTorch模型
107
- print("\n备用方案: 尝试直接加载PyTorch模型文件...")
108
  from huggingface_hub import snapshot_download
109
- import glob
110
 
111
  model_name = "FunAudioLLM/CosyVoice-300M"
112
  model_dir = snapshot_download(
113
  repo_id=model_name,
114
- allow_patterns=["*.pt", "*.pth", "*.bin", "*.json", "*.yaml", "*.txt"],
115
- ignore_patterns=["*.md", "*.gitattributes"]
116
  )
117
 
118
- # 查找模型文件
119
- model_files = glob.glob(os.path.join(model_dir, "**/*.pt"), recursive=True)
120
- model_files += glob.glob(os.path.join(model_dir, "**/*.pth"), recursive=True)
121
- model_files += glob.glob(os.path.join(model_dir, "**/*.bin"), recursive=True)
 
 
122
 
123
- if model_files:
124
- print(f"找到模型文件: {model_files[0]}")
125
- model = torch.load(model_files[0], map_location='cpu')
126
-
127
- # 检查模型是否可用
128
- if isinstance(model, dict):
129
- print("✓ PyTorch模型字典加载成功")
130
- cosyvoice_model = {
131
- 'model': model,
132
- 'type': 'pytorch_dict',
133
- 'model_dir': model_dir,
134
- 'has_inference': False
135
- }
136
- else:
137
- if hasattr(model, 'eval'):
138
- model.eval()
139
- print(f"✓ PyTorch模型加载成功: {type(model).__name__}")
140
- cosyvoice_model = {
141
- 'model': model,
142
- 'type': 'pytorch',
143
- 'model_dir': model_dir,
144
- 'has_inference': False
145
- }
146
-
147
- model_loaded = True
148
- print("✓ 成功加载CosyVoice模型 (PyTorch)")
149
- print("="*60 + "\n")
150
- return cosyvoice_model
151
- else:
152
- print("⚠ 未找到模型权重文件")
153
- # 模型文件已下载,但没有找到权重文件
154
- cosyvoice_model = {
155
- 'model': None,
156
- 'type': 'downloaded',
157
- 'model_dir': model_dir,
158
- 'has_inference': False
159
- }
160
- model_loaded = True
161
- print("✓ 模型文件已下载(但未找到权重文件)")
162
- print("="*60 + "\n")
163
- return cosyvoice_model
164
 
165
  except Exception as e:
166
  print(f"✗ 模型加载失败: {e}")
167
  import traceback
168
  print(f"详细错误:\n{traceback.format_exc()}")
169
 
170
- # 演示模式(加载失败)
171
  print("\n⚠ 使用演示模式")
172
- print("提示: 要使用完整功能,请确保:")
173
- print(" 1. 网络连接正常,可以访问Hugging Face Hub")
174
- print(" 2. 有足够的磁盘空间(约2GB)")
175
- print(" 3. 安装 huggingface_hub: pip install huggingface_hub")
176
- print(" 4. (可选) 安装CosyVoice官方包: pip install cosyvoice")
177
  print("="*60 + "\n")
178
 
179
  cosyvoice_model = None
@@ -258,7 +233,7 @@ def text_to_speech(text, speaker="中文女", prompt_audio=None, prompt_text=Non
258
  model_type = model.get('type', 'unknown')
259
 
260
  # 官方 CosyVoice AutoModel
261
- if model_type == 'official':
262
  cosyvoice = model['model']
263
  sample_rate = model.get('sample_rate', 22050)
264
 
 
33
  print("="*60)
34
 
35
  try:
36
+ # 方法1: 尝试使用官方 CosyVoice 包
37
+ print("\n尝试使用官方 CosyVoice...")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
38
  try:
39
+ # third_party 路径(如果存在)
40
+ third_party_path = os.path.join(os.path.dirname(__file__), 'third_party', 'Matcha-TTS')
41
+ if os.path.exists(third_party_path):
42
+ sys.path.insert(0, third_party_path)
43
 
44
+ from cosyvoice.cli.cosyvoice import CosyVoice
 
 
45
 
46
+ # 尝试从 Hugging Face Hub 加载
47
  model_name = "FunAudioLLM/CosyVoice-300M"
48
+ print(f"从 {model_name} 加载...")
 
 
 
 
 
49
 
50
+ # 模型到本地
51
+ from huggingface_hub import snapshot_download
52
+ model_dir = snapshot_download(repo_id=model_name, cache_dir="./models")
53
+
54
+ # 使用 CosyVoice 加载
55
+ cosyvoice = CosyVoice(model_dir=model_dir)
56
 
57
  cosyvoice_model = {
58
+ 'model': cosyvoice,
59
+ 'type': 'cosyvoice_official',
60
  'has_inference': True,
61
+ 'sample_rate': getattr(cosyvoice, 'sample_rate', 22050)
62
  }
63
  model_loaded = True
64
+ print("✓ 成功使用官方 CosyVoice 包加载模型")
65
  print("="*60 + "\n")
66
  return cosyvoice_model
67
 
68
+ except ImportError as ie:
69
+ print(f"⚠ 官方 CosyVoice 包不可用: {ie}")
70
+ print(" 尝试其他加载方式...")
71
+
72
+ # 方法2: 尝试使用 transformers AutoModel(需要 trust_remote_code)
73
+ print("\n尝试使用 transformers AutoModel...")
74
+ try:
75
+ from transformers import AutoModel
76
+
77
+ model_name = "FunAudioLLM/CosyVoice-300M"
78
+ print(f"从 {model_name} 加载...")
79
+
80
+ # 使用 trust_remote_code=True 加载自定义模型
81
+ model = AutoModel.from_pretrained(
82
+ model_name,
83
+ trust_remote_code=True,
84
+ torch_dtype=torch.float32,
85
+ low_cpu_mem_usage=True
86
+ )
87
+ model.eval()
88
+
89
+ # 检查模型方法
90
+ has_inference_sft = hasattr(model, 'inference_sft')
91
+ has_inference_zero_shot = hasattr(model, 'inference_zero_shot')
92
+ has_inference_cross_lingual = hasattr(model, 'inference_cross_lingual')
93
+
94
+ print(f"模��类型: {type(model).__name__}")
95
+ print(f"推理方法:")
96
+ print(f" - inference_sft: {has_inference_sft}")
97
+ print(f" - inference_zero_shot: {has_inference_zero_shot}")
98
+ print(f" - inference_cross_lingual: {has_inference_cross_lingual}")
99
+
100
+ if has_inference_sft or has_inference_zero_shot:
101
+ cosyvoice_model = {
102
+ 'model': model,
103
+ 'type': 'transformers',
104
+ 'has_inference': True,
105
+ 'sample_rate': getattr(model, 'sample_rate', 22050)
106
+ }
107
+ model_loaded = True
108
+ print("✓ 成功使用 transformers 加载模型")
109
+ print("="*60 + "\n")
110
+ return cosyvoice_model
111
+ else:
112
+ print("⚠ 模型缺少必要的推理方法")
113
+ raise ValueError("Model missing inference methods")
114
+
115
+ except Exception as te:
116
+ print(f"⚠ transformers 加载失败: {te}")
117
  import traceback
118
  traceback.print_exc()
119
 
120
+ # 方法3: 下载模型文件(演示模式)
121
+ print("\n尝试载模型文件...")
 
122
  from huggingface_hub import snapshot_download
 
123
 
124
  model_name = "FunAudioLLM/CosyVoice-300M"
125
  model_dir = snapshot_download(
126
  repo_id=model_name,
127
+ allow_patterns=["*.pt", "*.pth", "*.bin", "*.json", "*.yaml", "*.txt", "*.safetensors"],
128
+ cache_dir="./models"
129
  )
130
 
131
+ print(f"✓ 模型文件已下载到: {model_dir}")
132
+ print("\n⚠ 注意: 模型文件已下载,但无法加载推理引擎")
133
+ print(" 建议:")
134
+ print(" 1. 安装完整的 CosyVoice 包: pip install cosyvoice")
135
+ print(" 2. 或在 Hugging Face Space 中使用演示模式")
136
+ print("="*60 + "\n")
137
 
138
+ cosyvoice_model = None
139
+ model_loaded = True
140
+ return None
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
141
 
142
  except Exception as e:
143
  print(f"✗ 模型加载失败: {e}")
144
  import traceback
145
  print(f"详细错误:\n{traceback.format_exc()}")
146
 
 
147
  print("\n⚠ 使用演示模式")
148
+ print("提示: 要使用完整功能,请:")
149
+ print(" 1. 确保网络连接正常")
150
+ print(" 2. 确保有足够的磁盘空间(约2GB)")
151
+ print(" 3. 安装 CosyVoice: pip install cosyvoice")
 
152
  print("="*60 + "\n")
153
 
154
  cosyvoice_model = None
 
233
  model_type = model.get('type', 'unknown')
234
 
235
  # 官方 CosyVoice AutoModel
236
+ if model_type == 'cosyvoice_official':
237
  cosyvoice = model['model']
238
  sample_rate = model.get('sample_rate', 22050)
239