Spaces:

WJBSCUT
/

CosyVoice

Running

App Files Files Community

jerrybwang commited on Jan 26

Commit

2bc8444

1 Parent(s): bd60378

33

Browse files

Files changed (1) hide show

app.py +86 -111

app.py CHANGED Viewed

@@ -33,147 +33,122 @@ def load_cosyvoice_model():
     print("="*60)
     try:
-        # 方法1: 克隆CosyVoice源码仓库并使用官方加载方式
-        print("\n步骤1: 检查CosyVoice源码...")
-        cosyvoice_repo_path = Path("./CosyVoice")
-        if not cosyvoice_repo_path.exists():
-            print("正在克隆CosyVoice源码仓库...")
-            import subprocess
-            result = subprocess.run(
-                ["git", "clone", "--depth", "1", "https://github.com/FunAudioLLM/CosyVoice.git"],
-                capture_output=True,
-                text=True
-            )
-            if result.returncode != 0:
-                print(f"⚠ 克隆失败: {result.stderr}")
-                raise Exception("无法克隆CosyVoice仓库")
-            print("✓ CosyVoice源码克隆成功")
-        else:
-            print("✓ CosyVoice源码已存在")
-        # 添加路径到sys.path
-        cosyvoice_path = str(cosyvoice_repo_path.absolute())
-        matcha_tts_path = str((cosyvoice_repo_path / "third_party" / "Matcha-TTS").absolute())
-        if cosyvoice_path not in sys.path:
-            sys.path.insert(0, cosyvoice_path)
-        if matcha_tts_path not in sys.path:
-            sys.path.insert(0, matcha_tts_path)
-        print(f"✓ 已添加路径: {cosyvoice_path}")
-        print(f"✓ 已添加路径: {matcha_tts_path}")
-        # 方法1.1: 使用官方AutoModel加载
         try:
-            print("\n步骤2: 使用CosyVoice官方AutoModel加载...")
-            from cosyvoice.cli.cosyvoice import AutoModel
-            # 下载预训练模型
-            print("正在下载预训练模型...")
-            from huggingface_hub import snapshot_download
             model_name = "FunAudioLLM/CosyVoice-300M"
-            model_dir = snapshot_download(
-                repo_id=model_name,
-                allow_patterns=["*.pt", "*.pth", "*.bin", "*.json", "*.yaml", "*.txt", "*.safetensors"],
-                ignore_patterns=["*.md", "*.gitattributes"]
-            )
-            print(f"✓ 模型文件已下载到: {model_dir}")
-            # 使用AutoModel加载
-            print(f"正在加载模型: {model_dir}")
-            model = AutoModel(model_dir=model_dir)
-            print("✓ 使用CosyVoice AutoModel加载成功")
             cosyvoice_model = {
-                'model': model,
-                'type': 'official',
                 'has_inference': True,
-                'sample_rate': getattr(model, 'sample_rate', 22050)
             }
             model_loaded = True
-            print("✓ 成功加载CosyVoice模型 (官方AutoModel)")
             print("="*60 + "\n")
             return cosyvoice_model
-        except Exception as e:
-            print(f"⚠ AutoModel加载失败: {str(e)}")
             import traceback
             traceback.print_exc()
-        # 方法1.2: 备用方案 - 尝试直接加载PyTorch模型
-        print("\n备用方案: 尝试直接加载PyTorch模型文件...")
         from huggingface_hub import snapshot_download
-        import glob
         model_name = "FunAudioLLM/CosyVoice-300M"
         model_dir = snapshot_download(
             repo_id=model_name,
-            allow_patterns=["*.pt", "*.pth", "*.bin", "*.json", "*.yaml", "*.txt"],
-            ignore_patterns=["*.md", "*.gitattributes"]
         )
-        # 查找模型文件
-        model_files = glob.glob(os.path.join(model_dir, "**/*.pt"), recursive=True)
-        model_files += glob.glob(os.path.join(model_dir, "**/*.pth"), recursive=True)
-        model_files += glob.glob(os.path.join(model_dir, "**/*.bin"), recursive=True)
-        if model_files:
-            print(f"找到模型文件: {model_files[0]}")
-            model = torch.load(model_files[0], map_location='cpu')
-            # 检查模型是否可用
-            if isinstance(model, dict):
-                print("✓ PyTorch模型字典加载成功")
-                cosyvoice_model = {
-                    'model': model,
-                    'type': 'pytorch_dict',
-                    'model_dir': model_dir,
-                    'has_inference': False
-                }
-            else:
-                if hasattr(model, 'eval'):
-                    model.eval()
-                print(f"✓ PyTorch模型加载成功: {type(model).__name__}")
-                cosyvoice_model = {
-                    'model': model,
-                    'type': 'pytorch',
-                    'model_dir': model_dir,
-                    'has_inference': False
-                }
-            model_loaded = True
-            print("✓ 成功加载CosyVoice模型 (PyTorch)")
-            print("="*60 + "\n")
-            return cosyvoice_model
-        else:
-            print("⚠ 未找到模型权重文件")
-            # 模型文件已下载，但没有找到权重文件
-            cosyvoice_model = {
-                'model': None,
-                'type': 'downloaded',
-                'model_dir': model_dir,
-                'has_inference': False
-            }
-            model_loaded = True
-            print("✓ 模型文件已下载（但未找到权重文件）")
-            print("="*60 + "\n")
-            return cosyvoice_model
     except Exception as e:
         print(f"✗ 模型加载失败: {e}")
         import traceback
         print(f"详细错误:\n{traceback.format_exc()}")
-        # 演示模式（加载失败）
         print("\n⚠ 使用演示模式")
-        print("提示: 要使用完整功能，请确保:")
-        print("  1. 网络连接正常，可以访问Hugging Face Hub")
-        print("  2. 有足够的磁盘空间（约2GB）")
-        print("  3. 已安装 huggingface_hub: pip install huggingface_hub")
-        print("  4. (可选) 安装CosyVoice官方包: pip install cosyvoice")
         print("="*60 + "\n")
         cosyvoice_model = None
@@ -258,7 +233,7 @@ def text_to_speech(text, speaker="中文女", prompt_audio=None, prompt_text=Non
                     model_type = model.get('type', 'unknown')
                     # 官方 CosyVoice AutoModel
-                    if model_type == 'official':
                         cosyvoice = model['model']
                         sample_rate = model.get('sample_rate', 22050)

     print("="*60)
     try:
+        # 方法1: 尝试使用官方 CosyVoice 包
+        print("\n尝试使用官方 CosyVoice 包...")
         try:
+            # 添加 third_party 路径（如果存在）
+            third_party_path = os.path.join(os.path.dirname(__file__), 'third_party', 'Matcha-TTS')
+            if os.path.exists(third_party_path):
+                sys.path.insert(0, third_party_path)
+            from cosyvoice.cli.cosyvoice import CosyVoice
+            # 尝试从 Hugging Face Hub 加载
             model_name = "FunAudioLLM/CosyVoice-300M"
+            print(f"从 {model_name} 加载...")
+            # 下载模型到本地
+            from huggingface_hub import snapshot_download
+            model_dir = snapshot_download(repo_id=model_name, cache_dir="./models")
+            # 使用 CosyVoice 加载
+            cosyvoice = CosyVoice(model_dir=model_dir)
             cosyvoice_model = {
+                'model': cosyvoice,
+                'type': 'cosyvoice_official',
                 'has_inference': True,
+                'sample_rate': getattr(cosyvoice, 'sample_rate', 22050)
             }
             model_loaded = True
+            print("✓ 成功使用官方 CosyVoice 包加载模型")
             print("="*60 + "\n")
             return cosyvoice_model
+        except ImportError as ie:
+            print(f"⚠ 官方 CosyVoice 包不可用: {ie}")
+            print("  尝试其他加载方式...")
+        # 方法2: 尝试使用 transformers AutoModel（需要 trust_remote_code）
+        print("\n尝试使用 transformers AutoModel...")
+        try:
+            from transformers import AutoModel
+            model_name = "FunAudioLLM/CosyVoice-300M"
+            print(f"从 {model_name} 加载...")
+            # 使用 trust_remote_code=True 加载自定义模型
+            model = AutoModel.from_pretrained(
+                model_name,
+                trust_remote_code=True,
+                torch_dtype=torch.float32,
+                low_cpu_mem_usage=True
+            )
+            model.eval()
+            # 检查模型方法
+            has_inference_sft = hasattr(model, 'inference_sft')
+            has_inference_zero_shot = hasattr(model, 'inference_zero_shot')
+            has_inference_cross_lingual = hasattr(model, 'inference_cross_lingual')
+            print(f"模��类型: {type(model).__name__}")
+            print(f"推理方法:")
+            print(f"  - inference_sft: {has_inference_sft}")
+            print(f"  - inference_zero_shot: {has_inference_zero_shot}")
+            print(f"  - inference_cross_lingual: {has_inference_cross_lingual}")
+            if has_inference_sft or has_inference_zero_shot:
+                cosyvoice_model = {
+                    'model': model,
+                    'type': 'transformers',
+                    'has_inference': True,
+                    'sample_rate': getattr(model, 'sample_rate', 22050)
+                }
+                model_loaded = True
+                print("✓ 成功使用 transformers 加载模型")
+                print("="*60 + "\n")
+                return cosyvoice_model
+            else:
+                print("⚠ 模型缺少必要的推理方法")
+                raise ValueError("Model missing inference methods")
+        except Exception as te:
+            print(f"⚠ transformers 加载失败: {te}")
             import traceback
             traceback.print_exc()
+        # 方法3: 下载模型文件（演示模式）
+        print("\n尝试下载模型文件...")
         from huggingface_hub import snapshot_download
         model_name = "FunAudioLLM/CosyVoice-300M"
         model_dir = snapshot_download(
             repo_id=model_name,
+            allow_patterns=["*.pt", "*.pth", "*.bin", "*.json", "*.yaml", "*.txt", "*.safetensors"],
+            cache_dir="./models"
         )
+        print(f"✓ 模型文件已下载到: {model_dir}")
+        print("\n⚠ 注意: 模型文件已下载，但无法加载推理引擎")
+        print("  建议:")
+        print("  1. 安装完整的 CosyVoice 包: pip install cosyvoice")
+        print("  2. 或在 Hugging Face Space 中使用演示模式")
+        print("="*60 + "\n")
+        cosyvoice_model = None
+        model_loaded = True
+        return None
     except Exception as e:
         print(f"✗ 模型加载失败: {e}")
         import traceback
         print(f"详细错误:\n{traceback.format_exc()}")
         print("\n⚠ 使用演示模式")
+        print("提示: 要使用完整功能，请:")
+        print("  1. 确保网络连接正常")
+        print("  2. 确保有足够的磁盘空间（约2GB）")
+        print("  3. 安装 CosyVoice: pip install cosyvoice")
         print("="*60 + "\n")
         cosyvoice_model = None
                     model_type = model.get('type', 'unknown')
                     # 官方 CosyVoice AutoModel
+                    if model_type == 'cosyvoice_official':
                         cosyvoice = model['model']
                         sample_rate = model.get('sample_rate', 22050)