Spaces:

TNOT
/

JinrikiHelper

Sleeping

App Files Files Community

TNOT commited on Mar 15

Commit

8b09b87

1 Parent(s): ec4b9e1

sync: align master with local snapshot without force

Browse files

Files changed (22) hide show

.gitattributes +2 -47
.github/ISSUE_TEMPLATE/bug_report.md +0 -32
.gitignore +24 -44
README_1.1.txt +3 -0
app.py +5 -96
docs/.gitkeep +0 -0
docs/FFmpeg环境变量问题排查.md +96 -0
docs/MFA引擎安装说明.md +0 -64
plans/cvvc_export_design.md +339 -0
plans/cvvc_implementation_summary.md +128 -0
requirements.txt +0 -0
run_portable.bat +10 -0
src/export_plugins/utau_oto_export.py +1080 -93
src/gui_cloud.py +31 -280
src/mfa_model_downloader.py +59 -96
src/mfa_runner.py +0 -12
src/pipeline.py +3 -20
tests/.gitkeep +0 -0
tests/test_mfa_model_downloader.py +0 -182
tests/test_mfa_runner.py +0 -243
tests/test_silero_vad_downloader.py +0 -65
目前便携版仅支持Windows！.txt +1 -0

.gitattributes CHANGED Viewed

@@ -1,47 +1,2 @@
-*.7z filter=lfs diff=lfs merge=lfs -text
-*.arrow filter=lfs diff=lfs merge=lfs -text
-*.bin filter=lfs diff=lfs merge=lfs -text
-*.bin.* filter=lfs diff=lfs merge=lfs -text
-*.bz2 filter=lfs diff=lfs merge=lfs -text
-*.ftz filter=lfs diff=lfs merge=lfs -text
-*.gz filter=lfs diff=lfs merge=lfs -text
-*.h5 filter=lfs diff=lfs merge=lfs -text
-*.joblib filter=lfs diff=lfs merge=lfs -text
-*.lfs.* filter=lfs diff=lfs merge=lfs -text
-*.model filter=lfs diff=lfs merge=lfs -text
-*.msgpack filter=lfs diff=lfs merge=lfs -text
-*.onnx filter=lfs diff=lfs merge=lfs -text
-*.ot filter=lfs diff=lfs merge=lfs -text
-*.parquet filter=lfs diff=lfs merge=lfs -text
-*.pb filter=lfs diff=lfs merge=lfs -text
-*.pt filter=lfs diff=lfs merge=lfs -text
-*.pth filter=lfs diff=lfs merge=lfs -text
-*.rar filter=lfs diff=lfs merge=lfs -text
-saved_model/**/* filter=lfs diff=lfs merge=lfs -text
-*.tar.* filter=lfs diff=lfs merge=lfs -text
-*.tflite filter=lfs diff=lfs merge=lfs -text
-*.tgz filter=lfs diff=lfs merge=lfs -text
-*.xz filter=lfs diff=lfs merge=lfs -text
-*.zip filter=lfs diff=lfs merge=lfs -text
-*.zstandard filter=lfs diff=lfs merge=lfs -text
-*.tfevents* filter=lfs diff=lfs merge=lfs -text
-*.db* filter=lfs diff=lfs merge=lfs -text
-*.ark* filter=lfs diff=lfs merge=lfs -text
-**/*ckpt*data* filter=lfs diff=lfs merge=lfs -text
-**/*ckpt*.meta filter=lfs diff=lfs merge=lfs -text
-**/*ckpt*.index filter=lfs diff=lfs merge=lfs -text
-*.safetensors filter=lfs diff=lfs merge=lfs -text
-*.ckpt filter=lfs diff=lfs merge=lfs -text
-*.gguf* filter=lfs diff=lfs merge=lfs -text
-*.ggml filter=lfs diff=lfs merge=lfs -text
-*.llamafile* filter=lfs diff=lfs merge=lfs -text
-*.pt2 filter=lfs diff=lfs merge=lfs -text
-*.mlmodel filter=lfs diff=lfs merge=lfs -text
-*.npy filter=lfs diff=lfs merge=lfs -text
-*.npz filter=lfs diff=lfs merge=lfs -text
-*.pickle filter=lfs diff=lfs merge=lfs -text
-*.pkl filter=lfs diff=lfs merge=lfs -text
-*.tar filter=lfs diff=lfs merge=lfs -text
-*.wasm filter=lfs diff=lfs merge=lfs -text
-*.zst filter=lfs diff=lfs merge=lfs -text
-*tfevents* filter=lfs diff=lfs merge=lfs -text


1	+ tools/mfa_engine/Lib/site-packages/sudachidict_core/resources/system.dic filter=lfs diff=lfs merge=lfs -text
2	+ tools/mfa_engine/Library/lib/SPIRV-Tools-opt.lib filter=lfs diff=lfs merge=lfs -text

.github/ISSUE_TEMPLATE/bug_report.md DELETED Viewed

@@ -1,32 +0,0 @@
----
-name: Bug 报告
-about: 报告程序问题或错误
-title: '[BUG] '
-labels: bug
-assignees: ''
----
-## 问题描述
-简要描述遇到的问题
-## 复现步骤
-1. 打开程序
-2. 点击 ...
-3. 出现错误
-## 期望行为
-描述期望的正确行为
-## 环境信息
-- 操作系统: Windows 10 / Windows 11
-- Python 版本:
-- 显卡: 有 NVIDIA GPU / 无独立显卡
-- 内存:
-## 日志输出
-```
-粘贴相关日志或错误信息
-```
-## 截图
-如有必要，添加截图帮助说明问题

.gitignore CHANGED Viewed

@@ -1,49 +1,29 @@
-# 虚拟环境
-.venv/
-venv/
-# Python 缓存
-__pycache__/
-*.py[cod]
-*$py.class
-*.so
-*.egg
-*.egg-info/
-# IDE
-.idea/
-.kiro/
-.vscode/
-*.swp
-*.swo
-# 日志和临时文件
-*.log
-*.tmp
-temp/
 mfa_temp/
-# 用户配置 (包含本地路径)
 config.json
-# 本地启动脚本 (包含本地路径)
-run_local*.bat
-# 用户数据目录 (保留目录结构)
-bank/*/
-export/*/
-!bank/.gitkeep
-!export/.gitkeep
-# 测试临时文件
-tests/temp/*
-!tests/temp/.gitkeep
-# MFA 引擎 (体积大，需单独下载)
-tools/mfa_engine/
-# 模型文件 (体积大，由用户下载)
-models/whisper/
-models/silero_vad/*.onnx
-models/mfa/*.zip
-models/mfa/*.dict

+# Large local/portable data folders
+便携版打包/
+bank/
+export/
+mfa_data/
 mfa_temp/
+models/
+python/
+tools/mfa_engine/
+tools/ffmpeg/
+# Local config files
 config.json
+*.local.json
+.env
+.env.*
+# Python cache/log files
+__pycache__/
+*.py[cod]
+*.pyo
+*.pyd
+*.log
+# OS/editor
+.DS_Store
+Thumbs.db
+.vscode/
+.idea/

README_1.1.txt ADDED Viewed

	@@ -0,0 +1,3 @@


1	+ 此包为增量更新包！请将其替换1.0的对应文件以完成更新。
2	+
3	+ 更新内容大概为：更新了UTAU oto导出，现在支持中文，并且能导出日语的CVVC音源，内置了FFmpeg，修复了可能的路径问题

app.py CHANGED Viewed

@@ -13,14 +13,10 @@ from pathlib import Path
 logging.basicConfig(
     level=logging.INFO,
-    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s',
-    handlers=[logging.StreamHandler(sys.stdout)]
 )
 logger = logging.getLogger(__name__)
-# 确保日志立即输出（禁用缓冲）
-sys.stdout.reconfigure(line_buffering=True) if hasattr(sys.stdout, 'reconfigure') else None
 # 项目根目录
 BASE_DIR = Path(__file__).parent.absolute()
@@ -53,69 +49,6 @@ MODELS_DIR = None  # 延迟初始化
 MFA_DIR = None
-def cleanup_temp_files():
-    """
-    启动时清理临时文件，释放磁盘空间
-    清理目标：
-    - /tmp/gradio/* (Gradio 上传缓存)
-    - /tmp/jinriki_* (本应用的工作空间)
-    - /tmp/mfa_* (MFA 临时文件)
-    """
-    import shutil
-    import time
-    logger.info("清理临时文件...")
-    cleaned_size = 0
-    cleaned_count = 0
-    # 清理 Gradio 缓存
-    gradio_tmp = Path("/tmp/gradio")
-    if gradio_tmp.exists():
-        try:
-            for item in gradio_tmp.iterdir():
-                try:
-                    if item.is_dir():
-                        size = sum(f.stat().st_size for f in item.rglob('*') if f.is_file())
-                        shutil.rmtree(item)
-                    else:
-                        size = item.stat().st_size
-                        item.unlink()
-                    cleaned_size += size
-                    cleaned_count += 1
-                except Exception:
-                    pass
-        except Exception as e:
-            logger.warning(f"清理 Gradio 缓存失败: {e}")
-    # 清理 jinriki 工作空间
-    tmp_dir = Path("/tmp")
-    if tmp_dir.exists():
-        try:
-            for item in tmp_dir.iterdir():
-                if item.name.startswith("jinriki_") or item.name.startswith("mfa_"):
-                    try:
-                        if item.is_dir():
-                            size = sum(f.stat().st_size for f in item.rglob('*') if f.is_file())
-                            shutil.rmtree(item)
-                        else:
-                            size = item.stat().st_size
-                            item.unlink()
-                        cleaned_size += size
-                        cleaned_count += 1
-                    except Exception:
-                        pass
-        except Exception as e:
-            logger.warning(f"清理工作空间失败: {e}")
-    if cleaned_count > 0:
-        size_mb = cleaned_size / (1024 * 1024)
-        logger.info(f"已清理 {cleaned_count} 个临时文件/目录，释放 {size_mb:.1f} MB")
-    else:
-        logger.info("无需清理临时文件")
 def ensure_ffmpeg():
     """确保 ffmpeg 已安装（用于音频格式转换，支持 m4a 等格式）"""
     import shutil
@@ -168,17 +101,9 @@ def setup_environment():
         Path("/home/studio_service").exists(), # 魔搭创空间特征目录
     ])
-    # 云端环境启动时清理临时文件，释放磁盘空间
-    if is_cloud:
-        cleanup_temp_files()
-    logger.info("清理完成，继续初始化...")
     # 确保 ffmpeg 已安装（支持 m4a 等音频格式）
     if is_cloud or platform.system() != "Windows":
-        logger.info("检查 ffmpeg...")
         ensure_ffmpeg()
-        logger.info("ffmpeg 检查完成")
     # 魔搭创空间无法访问 HuggingFace，使用镜像
     if is_cloud and Path("/home/studio_service").exists():
@@ -196,7 +121,6 @@ def setup_environment():
     if platform.system() != "Windows":
         logger.info("Linux 环境，检查并安装 MFA...")
         setup_mfa_linux()
-        logger.info("MFA 设置完成")
     if is_cloud:
         logger.info("检测到云端环境，正在初始化...")
@@ -205,9 +129,7 @@ def setup_environment():
         os.environ.setdefault("TMPDIR", "/tmp")
         # 下载所有必需模型
-        logger.info("开始下载模型...")
         download_all_models()
-        logger.info("模型下载完成")
     else:
         logger.info("本地环境运行")
@@ -248,13 +170,11 @@ def setup_mfa_linux():
             mamba_root.mkdir(parents=True, exist_ok=True)
             # 下载并安装 micromamba
-            result = subprocess.run([
                 "bash", "-c",
                 f'curl -Ls https://micro.mamba.pm/api/micromamba/linux-64/latest | tar -xvj -C {mamba_root} bin/micromamba'
-            ], check=True, capture_output=True, text=True, timeout=120)
             logger.info("micromamba 下载完成")
-        else:
-            logger.info("micromamba 已存在，跳过下载")
         # 2. 使用 micromamba 创建环境并安装 MFA
         mfa_bin_path = mfa_env / "bin" / "mfa"
@@ -277,23 +197,20 @@ def setup_mfa_linux():
                 logger.info("MFA 环境验证通过，无需重新安装")
         if need_install:
-            logger.info("使用 micromamba 安装 MFA（这可能需要几分钟）...")
             env = os.environ.copy()
             env["MAMBA_ROOT_PREFIX"] = str(mamba_root)
             # 创建环境并安装 MFA（指定 Python 3.11）
-            logger.info("执行 micromamba create...")
-            result = subprocess.run([
                 str(mamba_bin), "create", "-n", "mfa",
                 "-c", "conda-forge",
                 "python=3.11",
                 "montreal-forced-aligner",
                 "-y"
             ], env=env, check=True, capture_output=True, text=True, timeout=600)
-            logger.info("MFA 环境创建完成")
             # 更新确保使用 CPU 版本的 kaldi
-            logger.info("安装 CPU 版本 kaldi...")
             subprocess.run([
                 str(mamba_bin), "install", "-n", "mfa",
                 "-c", "conda-forge",
@@ -653,21 +570,13 @@ def download_mfa_models_all() -> bool:
 def main():
     """主入口"""
-    logger.info("=" * 50)
-    logger.info("人力V助手 云端版启动")
-    logger.info("=" * 50)
     setup_environment()
-    logger.info("环境初始化完成，启动 Gradio UI...")
     # 导入并启动云端 GUI
     from src.gui_cloud import create_cloud_ui
     app = create_cloud_ui()
-    logger.info("Gradio UI 创建完成，启动服务...")
     # 云端配置
     # 启用队列，魔搭CPU按需分配，无需设置并发上限
     app.queue()

 logging.basicConfig(
     level=logging.INFO,
+    format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
 )
 logger = logging.getLogger(__name__)
 # 项目根目录
 BASE_DIR = Path(__file__).parent.absolute()
 MFA_DIR = None
 def ensure_ffmpeg():
     """确保 ffmpeg 已安装（用于音频格式转换，支持 m4a 等格式）"""
     import shutil
         Path("/home/studio_service").exists(), # 魔搭创空间特征目录
     ])
     # 确保 ffmpeg 已安装（支持 m4a 等音频格式）
     if is_cloud or platform.system() != "Windows":
         ensure_ffmpeg()
     # 魔搭创空间无法访问 HuggingFace，使用镜像
     if is_cloud and Path("/home/studio_service").exists():
     if platform.system() != "Windows":
         logger.info("Linux 环境，检查并安装 MFA...")
         setup_mfa_linux()
     if is_cloud:
         logger.info("检测到云端环境，正在初始化...")
         os.environ.setdefault("TMPDIR", "/tmp")
         # 下载所有必需模型
         download_all_models()
     else:
         logger.info("本地环境运行")
             mamba_root.mkdir(parents=True, exist_ok=True)
             # 下载并安装 micromamba
+            subprocess.run([
                 "bash", "-c",
                 f'curl -Ls https://micro.mamba.pm/api/micromamba/linux-64/latest | tar -xvj -C {mamba_root} bin/micromamba'
+            ], check=True, capture_output=True, timeout=120)
             logger.info("micromamba 下载完成")
         # 2. 使用 micromamba 创建环境并安装 MFA
         mfa_bin_path = mfa_env / "bin" / "mfa"
                 logger.info("MFA 环境验证通过，无需重新安装")
         if need_install:
+            logger.info("使用 micromamba 安装 MFA...")
             env = os.environ.copy()
             env["MAMBA_ROOT_PREFIX"] = str(mamba_root)
             # 创建环境并安装 MFA（指定 Python 3.11）
+            subprocess.run([
                 str(mamba_bin), "create", "-n", "mfa",
                 "-c", "conda-forge",
                 "python=3.11",
                 "montreal-forced-aligner",
                 "-y"
             ], env=env, check=True, capture_output=True, text=True, timeout=600)
             # 更新确保使用 CPU 版本的 kaldi
             subprocess.run([
                 str(mamba_bin), "install", "-n", "mfa",
                 "-c", "conda-forge",
 def main():
     """主入口"""
     setup_environment()
     # 导入并启动云端 GUI
     from src.gui_cloud import create_cloud_ui
     app = create_cloud_ui()
     # 云端配置
     # 启用队列，魔搭CPU按需分配，无需设置并发上限
     app.queue()

docs/.gitkeep DELETED Viewed

File without changes

docs/FFmpeg环境变量问题排查.md ADDED Viewed

	@@ -0,0 +1,96 @@

+# FFmpeg 环境变量问题排查指南
+## 问题现象
+用户已安装 FFmpeg 并设置了系统环境变量，但运行便携版时仍报错：
+```
+FileNotFoundError: [WinError 2] 系统找不到指定的档案。
+```
+错误发生在 `pipeline.py` 调用 FFmpeg 进行音频处理时。
+## 原因分析
+### 便携版环境隔离
+便携版通过 `run_portable.bat` 启动，使用内嵌的 `python\python.exe`。该 Python 环境可能存在以下问题：
+1. **PATH 环境变量未正确继承** - 便携版 Python 可能无法访问系统 PATH 中的 FFmpeg
+2. **CMD 窗口环境变量刷新问题** - 新设置的环境变量需要重启 CMD 窗口才能生效
+## 解决方案
+### 方案一：重启命令提示符（推荐先尝试）
+如果刚刚设置完 FFmpeg 环境变量，需要：
+1. **关闭所有 CMD 窗口**
+2. **重新打开 CMD 窗口**
+3. 再次运行 `run_portable.bat`
+> 环境变量修改后，已打开的 CMD 窗口不会自动刷新，必须重新打开。
+### 方案二：验证 FFmpeg 是否正确安装
+在 CMD 中执行以下命令验证：
+```cmd
+where ffmpeg
+ffmpeg -version
+```
+如果显示 "找不到文件" 或报错，说明环境变量设置有问题。
+### 方案三：检查环境变量设置
+1. 按 `Win + R`，输入 `sysdm.cpl`，回车
+2. 点击「高级」→「环境变量」
+3. 在「系统变量」或「用户变量」中找到 `Path`
+4. 确认 FFmpeg 的 `bin` 目录已添加，例如：
+   ```
+   C:\ffmpeg\bin
+   ```
+5. 点击确定保存，然后**重新打开 CMD 窗口**
+### 方案四：在便携版脚本中显式指定 FFmpeg 路径
+如果上述方案无效，可以修改 `run_portable.bat`，在启动前手动添加 FFmpeg 路径：
+```bat
+@echo off
+chcp 65001 >nul
+echo 启动人力V助手 (便携版)...
+REM 添加 FFmpeg 到 PATH（请修改为你的实际路径）
+set PATH=%PATH%;C:\ffmpeg\bin
+set PYTHONPATH=%~dp0
+"%~dp0python\python.exe" "%~dp0main.py"
+pause
+```
+将 `C:\ffmpeg\bin` 替换为你的 FFmpeg 实际安装路径。
+### 方案五：将 FFmpeg 放入便携版目录
+将 `ffmpeg.exe` 和 `ffprobe.exe` 直接复制到便携版根目录（与 `main.py` 同级），程序会优先使用当前目录下的可执行文件。
+## 快速诊断命令
+在 `run_portable.bat` 所在目录打开 CMD，执行：
+```cmd
+REM 检查系统 FFmpeg
+where ffmpeg
+REM 检查便携版 Python 能否找到 FFmpeg
+python\python.exe -c "import subprocess; subprocess.run(['ffmpeg', '-version'])"
+```
+如果第一条命令成功但第二条失败，说明便携版 Python 环境与系统环境隔离，请使用方案四或方案五。
+## 相关文件
+- `run_portable.bat` - 便携版启动脚本
+- `src/pipeline.py` - 音频处理流水线，调用 FFmpeg 的位置

docs/MFA引擎安装说明.md DELETED Viewed

@@ -1,64 +0,0 @@
-# MFA 引擎本地安装说明
-本文档介绍如何在本地部署 MFA (Montreal Forced Aligner) 引擎，适用于想从源码运行项目的用户。
-## 前提条件
-- Windows 系统
-- 已安装 conda 或 micromamba
-## 安装步骤
-### 1. 创建 MFA 环境
-使用 conda：
-```bash
-conda create -n mfa_engine -c conda-forge montreal-forced-aligner
-```
-或使用 micromamba （推荐）：
-```bash
-micromamba create -n mfa_engine -c conda-forge montreal-forced-aligner
-```
-### 2. 提取环境到项目目录
-安装完成后，将环境目录复制到项目的 `tools` 文件夹中：
-conda 默认路径：
-```
-%USERPROFILE%\anaconda3\envs\mfa_engine
-或
-%USERPROFILE%\miniconda3\envs\mfa_engine
-```
-micromamba 默认路径：
-```
-%USERPROFILE%\micromamba\envs\mfa_engine
-```
-将整个 `mfa_engine` 文件夹复制到项目的 `tools/` 目录下，最终结构：
-```
-项目根目录/
-└── tools/
-    └── mfa_engine/
-        ├── python.exe
-        ├── Scripts/
-        │   └── mfa.exe
-        └── ...
-```
-### 3. 验证安装
-运行以下命令验证 MFA 是否可用：
-```bash
-tools\mfa_engine\Scripts\mfa.exe version
-```
-如果正确输出版本号，说明安装成功。
-## 常见问题
-安装过程中如遇到问题或报错，建议将错误信息提供给 AI 助手寻求帮助。

plans/cvvc_export_design.md ADDED Viewed

	@@ -0,0 +1,339 @@

+# CVVC 音源导出功能设计方案
+## 1. 概述
+本方案为 [`utau_oto_export.py`](src/export_plugins/utau_oto_export.py) 插件添加 CVVC（Consonant-Vowel-Vowel-Consonant）音源导出功能。CVVC 相比传统 CV 音源，额外生成 **VC 部（元音到辅音过渡）** 条目，使音源在连续演唱时过渡更加自然流畅。
+## 2. CVVC 音源结构
+### 2.1 条目类型
+| 类型 | 别名格式 | 示例 | 说明 |
+|------|----------|------|------|
+| **CV** | `{辅音}{元音}` | `ba`, `ka`, `ni` | 辅音+元音（现有功能） |
+| **V** | `- {元音}` | `- a`, `- i` | 句首元音（现有功能，纯元音） |
+| **VC** | `{元音} {辅音}` | `a k`, `i n` | 元音到辅音过渡（**新增**） |
+| **VV** | `{元音} {元音}` | `a i`, `i u` | 元音到元音过渡（可选，暂不实现） |
+### 2.2 VC 部参数计算
+VC 部捕捉从元音到下一个辅音的过渡，参数计算逻辑如下：
+```
+音频时间线: |----元音(V)----|----辅音(C)----|
+                      ^              ^
+                   VC开始         VC结束
+                   (offset)       (cutoff位置)
+```
+**VC 部参数说明：**
+| 参数 | 计算方式 | 说明 |
+|------|----------|------|
+| offset | `vowel_end - vowel_duration × vc_offset_ratio` | VC 开始位置，在元音后半段 |
+| consonant | `min(30, (consonant_end - offset) × 0.3)` | 固定区域，较短 |
+| cutoff | `-(consonant_end - offset)` | 负值，到辅音结束 |
+| preutterance | `vowel_end - offset` | 从 offset 到辅音开始的距离 |
+| overlap | `preutterance × overlap_ratio` | 与前一音符的交叉淡化 |
+## 3. 代码修改设计
+### 3.1 新增配置选项
+在 [`get_options()`](src/export_plugins/utau_oto_export.py:254) 方法中添加以下选项：
+```python
+PluginOption(
+    key="cvvc_mode",
+    label="CVVC 模式",
+    option_type=OptionType.SWITCH,
+    default=False,
+    description="启用 CVVC 模式，额外生成 VC 部（元音到辅音过渡）条目"
+),
+PluginOption(
+    key="vc_alias_separator",
+    label="VC 别名分隔符",
+    option_type=OptionType.COMBO,
+    default=" ",
+    choices=[" ", "_", "-"],
+    description="VC 部别名中元音和辅音之间的分隔符",
+    visible_when={"cvvc_mode": True}
+),
+PluginOption(
+    key="vc_offset_ratio",
+    label="VC 偏移比例",
+    option_type=OptionType.NUMBER,
+    default=0.5,
+    min_value=0.3,
+    max_value=0.8,
+    description="VC 部开始位置 = 元音结束位置 - 元音时长 × 此比例",
+    visible_when={"cvvc_mode": True}
+),
+PluginOption(
+    key="vc_overlap_ratio",
+    label="VC Overlap 比例",
+    option_type=OptionType.NUMBER,
+    default=0.5,
+    min_value=0.3,
+    max_value=0.8,
+    description="VC 部的 Overlap = Preutterance × 此比例",
+    visible_when={"cvvc_mode": True}
+),
+```
+### 3.2 新增方法
+#### 3.2.1 `_extract_vc_pairs()` - 提取 VC 对
+在 [`_extract_cv_pairs()`](src/export_plugins/utau_oto_export.py:534) 方法基础上，新增 VC 对提取逻辑：
+```python
+def _extract_vc_pairs(
+    self,
+    words_tier,
+    phones_tier,
+    wav_name: str,
+    wav_duration_ms: float,
+    language: str,
+    use_hiragana: bool,
+    vc_offset_ratio: float,
+    vc_overlap_ratio: float,
+    vc_separator: str
+) -> List[Dict]:
+    """
+    从 phones 层提取元音+辅音对（VC 部）
+    VC 部捕捉从当前元音到下一个辅音的过渡
+    """
+    entries = []
+    intervals = list(phones_tier)
+    for i, interval in enumerate(intervals):
+        phone = interval.mark.strip()
+        if phone in SKIP_MARKS:
+            continue
+        # 当前是元音，检查下一个是否是辅音
+        if is_vowel(phone, language):
+            vowel = phone
+            vowel_start_ms = interval.minTime * 1000
+            vowel_end_ms = interval.maxTime * 1000
+            vowel_duration = vowel_end_ms - vowel_start_ms
+            # 检查下一个音素
+            if i + 1 < len(intervals):
+                next_interval = intervals[i + 1]
+                next_phone = next_interval.mark.strip()
+                if next_phone not in SKIP_MARKS and is_consonant(next_phone, language):
+                    consonant = next_phone
+                    consonant_end_ms = next_interval.maxTime * 1000
+                    # 生成 VC 别名
+                    v_alias = ipa_to_alias(None, vowel, language, use_hiragana)
+                    c_alias = ipa_to_alias(consonant, None, language, use_hiragana)
+                    if v_alias and c_alias:
+                        vc_alias = f"{v_alias}{vc_separator}{c_alias}"
+                        # 计算 VC 参数
+                        entry = self._calculate_vc_params(
+                            wav_name=wav_name,
+                            alias=vc_alias,
+                            vowel_start_ms=vowel_start_ms,
+                            vowel_end_ms=vowel_end_ms,
+                            consonant_end_ms=consonant_end_ms,
+                            wav_duration_ms=wav_duration_ms,
+                            vc_offset_ratio=vc_offset_ratio,
+                            vc_overlap_ratio=vc_overlap_ratio
+                        )
+                        entries.append(entry)
+    return entries
+```
+#### 3.2.2 `_calculate_vc_params()` - 计算 VC 参数
+```python
+def _calculate_vc_params(
+    self,
+    wav_name: str,
+    alias: str,
+    vowel_start_ms: float,
+    vowel_end_ms: float,
+    consonant_end_ms: float,
+    wav_duration_ms: float,
+    vc_offset_ratio: float,
+    vc_overlap_ratio: float
+) -> Dict:
+    """
+    计算 VC 部的 oto.ini 参数
+    VC 部从元音后半段开始，到辅音结束
+    """
+    vowel_duration = vowel_end_ms - vowel_start_ms
+    # offset: 元音后半段位置
+    offset = vowel_end_ms - vowel_duration * vc_offset_ratio
+    # 总时长
+    segment_duration = consonant_end_ms - offset
+    # preutterance: 从 offset 到辅音开始（即元音结束）的距离
+    preutterance = vowel_end_ms - offset
+    # consonant: 固定区域，较短
+    consonant = min(30, segment_duration * 0.3)
+    # overlap: 较大，平滑过渡
+    overlap = preutterance * vc_overlap_ratio
+    # cutoff: 负值，表示总时长
+    cutoff = -segment_duration
+    return {
+        "wav_name": wav_name,
+        "alias": alias,
+        "offset": round(offset, 1),
+        "consonant": round(consonant, 1),
+        "cutoff": round(cutoff, 1),
+        "preutterance": round(preutterance, 1),
+        "overlap": round(overlap, 1),
+        "segment_duration": segment_duration,
+        "is_vc": True  # 标记为 VC 部
+    }
+```
+### 3.3 修改 `_parse_textgrids()` 方法
+在 [`_parse_textgrids()`](src/export_plugins/utau_oto_export.py:463) 中添加 CVVC 模式支持：
+```python
+def _parse_textgrids(
+    self,
+    slices_dir: str,
+    textgrid_dir: str,
+    language: str,
+    use_hiragana: bool,
+    overlap_ratio: float,
+    cvvc_mode: bool = False,           # 新增
+    vc_offset_ratio: float = 0.5,      # 新增
+    vc_overlap_ratio: float = 0.5,     # 新增
+    vc_separator: str = " "            # 新增
+) -> Tuple[List[Dict], set]:
+    # ... 现有代码 ...
+    # 提取 CV 对（现有逻辑）
+    entries = self._extract_cv_pairs(...)
+    oto_entries.extend(entries)
+    # 如果启用 CVVC 模式，额外提取 VC 对
+    if cvvc_mode:
+        vc_entries = self._extract_vc_pairs(
+            words_tier, phones_tier, wav_name, wav_duration_ms,
+            language, use_hiragana,
+            vc_offset_ratio, vc_overlap_ratio, vc_separator
+        )
+        oto_entries.extend(vc_entries)
+    # ... 现有代码 ...
+```
+### 3.4 修改 `export()` 方法
+在 [`export()`](src/export_plugins/utau_oto_export.py:353) 中读取 CVVC 相关选项：
+```python
+def export(self, source_name: str, bank_dir: str, options: Dict[str, Any]) -> Tuple[bool, str]:
+    # ... 现有选项读取 ...
+    # CVVC 模式选项
+    cvvc_mode = options.get("cvvc_mode", False)
+    vc_separator = options.get("vc_alias_separator", " ")
+    vc_offset_ratio = float(options.get("vc_offset_ratio", 0.5))
+    vc_overlap_ratio = float(options.get("vc_overlap_ratio", 0.5))
+    # 调用 _parse_textgrids 时传入新参数
+    oto_entries, wav_files = self._parse_textgrids(
+        paths["slices_dir"],
+        paths["textgrid_dir"],
+        language,
+        use_hiragana,
+        overlap_ratio,
+        cvvc_mode=cvvc_mode,
+        vc_offset_ratio=vc_offset_ratio,
+        vc_overlap_ratio=vc_overlap_ratio,
+        vc_separator=vc_separator
+    )
+    # ... 现有代码 ...
+```
+## 4. 流程图
+```mermaid
+flowchart TD
+    A[开始导出] --> B{CVVC 模式?}
+    B -->|否| C[仅提取 CV 对]
+    B -->|是| D[提取 CV 对]
+    D --> E[提取 VC 对]
+    E --> F[合并条目]
+    C --> G[筛选最佳样本]
+    F --> G
+    G --> H[复制音频文件]
+    H --> I[写入 oto.ini]
+    I --> J[写入 character.txt]
+    J --> K[导出完成]
+```
+## 5. VC 部提取流程
+```mermaid
+flowchart LR
+    subgraph TextGrid
+        V1[元音 V] --> C1[辅音 C]
+        C1 --> V2[元音 V]
+        V2 --> C2[辅音 C]
+    end
+    subgraph VC条目
+        VC1[V C - VC部]
+        VC2[V C - VC部]
+    end
+    V1 -.-> VC1
+    C1 -.-> VC1
+    V2 -.-> VC2
+    C2 -.-> VC2
+```
+## 6. 实现步骤
+1. **添加配置选项** - 在 [`get_options()`](src/export_plugins/utau_oto_export.py:254) 中添加 CVVC 相关选项
+2. **实现 VC 参数计算** - 新增 `_calculate_vc_params()` 方法
+3. **实现 VC 对提取** - 新增 `_extract_vc_pairs()` 方法
+4. **修改解析逻辑** - 更新 [`_parse_textgrids()`](src/export_plugins/utau_oto_export.py:463) 支持 CVVC 模式
+5. **修改导出入口** - 更新 [`export()`](src/export_plugins/utau_oto_export.py:353) 读取新选项
+6. **更新版本号** - 将版本从 1.1.0 更新为 1.2.0
+## 7. 预期输出示例
+启用 CVVC 模式后，oto.ini 将包含：
+```ini
+# CV 部（现有）
+test_0000.wav=ba,30,50,-110,50,15
+test_0000.wav=ka,140,60,-140,60,18
+# VC 部（新增）
+test_0000.wav=a k,70,20,-90,40,20
+test_0000.wav=a n,180,25,-100,45,22
+```
+## 8. 注意事项
+1. **跨字边界**：VC 部可能跨越 words 层的边界，需要决定是否限制在同一个字内
+2. **别名冲突**：VC 别名可能与 CV 别名冲突，需要确保分隔符正确
+3. **质量筛选**：VC 部也需要参与质量评分和筛选
+4. **编码兼容**：VC 别名中的分隔符需要兼容目标编码（如 Shift_JIS）

plans/cvvc_implementation_summary.md ADDED Viewed

	@@ -0,0 +1,128 @@

+# CVVC 音源导出功能实现总结
+## 实现完成时间
+2026-02-04
+## 版本更新
+- 插件版本从 1.1.0 更新至 **1.2.0**
+## 新增功能
+### 1. CVVC 模式支持
+为 UTAU oto.ini 导出插件添加了 CVVC（Consonant-Vowel-Vowel-Consonant）音源导出功能，可额外生成 **VC 部（元音到辅音过渡）** 条目。
+### 2. 新增配置选项
+在 [`get_options()`](src/export_plugins/utau_oto_export.py:254) 方法中添加了 4 个新选项：
+| 选项 | 类型 | 默认值 | 说明 |
+|------|------|--------|------|
+| `cvvc_mode` | 开关 | False | 启用/禁用 CVVC 模式 |
+| `vc_alias_separator` | 下拉 | " " (空格) | VC 别名分隔符（空格/下划线/连字符） |
+| `vc_offset_ratio` | 数字 | 0.5 | VC 偏移比例（0.3-0.8） |
+| `vc_overlap_ratio` | 数字 | 0.5 | VC Overlap 比例（0.3-0.8） |
+### 3. 新增方法
+#### [`_calculate_vc_params()`](src/export_plugins/utau_oto_export.py:688)
+计算 VC 部的 oto.ini 参数，包括：
+- **offset**: 元音后半段位置
+- **consonant**: 固定区域（较短）
+- **cutoff**: 负值，到辅音结束
+- **preutterance**: 从 offset 到辅音开始的距离
+- **overlap**: 较大，平滑过渡
+#### [`_extract_vc_pairs()`](src/export_plugins/utau_oto_export.py:649)
+从 TextGrid 的 phones 层提取元音+辅音对（VC 部），遍历音素序列，当检测到元音后跟辅音时生成 VC 条目。
+### 4. 修改的方法
+#### [`_parse_textgrids()`](src/export_plugins/utau_oto_export.py:539)
+- 添加了 4 个新参数支持 CVVC 模式
+- 在提取 CV 对后，如果启用 CVVC 模式，额外调用 [`_extract_vc_pairs()`](src/export_plugins/utau_oto_export.py:649) 提取 VC 对
+#### [`export()`](src/export_plugins/utau_oto_export.py:397)
+- 读取 CVVC 相关配置选项
+- 根据 CVVC 模式显示不同的日志信息
+- 将 CVVC 参数传递给 [`_parse_textgrids()`](src/export_plugins/utau_oto_export.py:539)
+## 工作原理
+### VC 部提取流程
+```
+TextGrid phones 层:
+[元音 V] → [辅音 C] → [元音 V] → [辅音 C]
+    ↓           ↓           ↓           ↓
+生成 VC 条目:  [V C]              [V C]
+```
+### VC 参数计算示例
+假设：
+- 元音时长：100ms (0-100ms)
+- 辅音时长：60ms (100-160ms)
+- `vc_offset_ratio` = 0.5
+- `vc_overlap_ratio` = 0.5
+计算结果：
+- **offset** = 100 - 100×0.5 = 50ms
+- **segment_duration** = 160 - 50 = 110ms
+- **preutterance** = 100 - 50 = 50ms
+- **consonant** = min(30, 110×0.3) = 30ms
+- **overlap** = 50×0.5 = 25ms
+- **cutoff** = -110ms
+## 输出示例
+启用 CVVC 模式后，oto.ini 将包含：
+```ini
+# CV 部（现有功能）
+test_0000.wav=ba,30,50,-110,50,15
+test_0000.wav=ka,140,60,-140,60,18
+# VC 部（新增功能）
+test_0000.wav=a k,70,20,-90,40,20
+test_0000.wav=a n,180,25,-100,45,22
+```
+## 代码验证
+✅ Python 语法检查通过
+```bash
+py -m py_compile src\export_plugins\utau_oto_export.py
+# Exit code: 0 (成功)
+```
+## 使用方法
+1. 在导出插件界面中找到 "UTAU oto.ini 导出" 插件
+2. 启用 **"CVVC 模式"** 开关
+3. 根据需要调整以下参数：
+   - **VC 别名分隔符**：选择空格、下划线或连字符
+   - **VC 偏移比例**：控制 VC 开始位置（推荐 0.5）
+   - **VC Overlap 比例**：控制过渡平滑度（推荐 0.5）
+4. 执行导出
+## 技术特点
+1. **无损兼容**：CVVC 模式为可选功能，不影响现有 CV 导出
+2. **参数可调**：提供多个参数供用户微调 VC 部效果
+3. **自动提取**：从 TextGrid 自动识别元音-辅音序列
+4. **质量筛选**：VC 部条目同样参与质量评分和筛选
+5. **编码兼容**：VC 别名支持多种分隔符，兼容不同编码
+## 注意事项
+1. VC 部的提取依赖于 TextGrid 中音素的正确标注
+2. VC 别名使用分隔符（默认空格）连接元音和辅音
+3. VC 参数的计算基于元音和辅音的时间边界
+4. 建议先用小数据集测试参数效果，再批量导出
+## 后续优化建议
+1. 支持 VV 部（元音到元音过渡）
+2. 支持跨字边界的 VC 提取控制
+3. 添加 VC 部专用的质量评估指标
+4. 支持自定义 VC 别名格式模板

requirements.txt CHANGED Viewed

Binary files a/requirements.txt and b/requirements.txt differ

run_portable.bat ADDED Viewed

	@@ -0,0 +1,10 @@

+@echo off
+chcp 65001 >nul
+echo 启动人力V助手 (便携版)...
+set PYTHONPATH=%~dp0
+set MFA_ROOT_DIR=%~dp0mfa_data
+set PATH=%PATH%;%~dp0tools\ffmpeg\bin
+"%~dp0python\python.exe" "%~dp0main.py"
+pause

src/export_plugins/utau_oto_export.py CHANGED Viewed

@@ -23,28 +23,49 @@ logger = logging.getLogger(__name__)
 # 中文辅音（MFA 输出的 IPA 符号）
 CHINESE_CONSONANTS = {
-    'p', 'pʰ', 'pʲ', 'b', 'm', 'f',
-    't', 'tʰ', 'd', 'n', 'l',
-    'k', 'kʰ', 'ɡ', 'g', 'ŋ', 'x', 'h',
     'tɕ', 'tɕʰ', 'dʑ', 'ɕ', 'ʑ',
     'ts', 'tsʰ', 'dz', 's', 'z',
     'ʈʂ', 'ʈʂʰ', 'ɖʐ', 'ʂ', 'ʐ',
     'ɲ', 'j', 'w', 'ɥ',
-    'ʔ',  # 喉塞音
 }
 # 中文元音（可能带声调标记）
 CHINESE_VOWELS = {
     'a', 'o', 'e', 'i', 'u', 'y', 'ü',
     'ə', 'ɛ', 'ɔ', 'ɤ', 'ɨ', 'ʅ', 'ʉ',
-    'ai', 'ei', 'ao', 'ou',
-    'ia', 'ie', 'iu', 'iao', 'iou',
-    'ua', 'uo', 'ui', 'uai', 'uei',
-    'üe', 'üan', 'ün',
-    'an', 'en', 'in', 'un', 'ün',
-    'ang', 'eng', 'ing', 'ong',
-    'aw', 'ej', 'ow',  # MFA 输出格式
-    'z̩',  # 舌尖元音
 }
 # 日语辅音
@@ -86,6 +107,20 @@ FUZZY_VOWEL_GROUPS = [
     ('in', 'ing'),       # 前鼻/后鼻
     ('ian', 'iang'),     # 前鼻/后鼻
     ('uan', 'uang'),     # 前鼻/后鼻
 ]
@@ -105,11 +140,24 @@ def is_vowel(phone: str, language: str) -> bool:
     base_phone = _strip_tone(phone)
     if language in ('chinese', 'zh', 'mandarin'):
         if base_phone in CHINESE_VOWELS:
             return True
-        for v in ['a', 'o', 'e', 'i', 'u', 'y', 'ə', 'ɛ', 'ɔ', 'ɤ', 'ɨ', 'ʅ', 'ʉ']:
             if base_phone.startswith(v):
                 return True
         return False
     elif language in ('japanese', 'ja', 'jp'):
         return base_phone in JAPANESE_VOWELS or base_phone.rstrip('ː') in {'a', 'i', 'ɯ', 'u', 'e', 'o'}
@@ -127,27 +175,150 @@ def _strip_tone(phone: str) -> str:
 # ==================== IPA 到别名转换 ====================
-# 中文 IPA 到拼音映射
-CHINESE_IPA_TO_PINYIN = {
-    # 辅音
-    'p': 'b', 'pʰ': 'p', 'pʲ': 'p',
     'm': 'm', 'f': 'f',
-    't': 'd', 'tʰ': 't',
     'n': 'n', 'l': 'l',
-    'k': 'g', 'kʰ': 'k',
     'x': 'h', 'h': 'h',
     'tɕ': 'j', 'tɕʰ': 'q', 'ɕ': 'x',
     'ts': 'z', 'tsʰ': 'c', 's': 's',
     'ʈʂ': 'zh', 'ʈʂʰ': 'ch', 'ʂ': 'sh', 'ʐ': 'r',
-    'ɲ': 'n', 'ŋ': 'ng',
-    'j': 'y', 'w': 'w', 'ɥ': 'yu',
     'ʔ': '',
-    # 元音
     'a': 'a', 'o': 'o', 'e': 'e', 'i': 'i', 'u': 'u', 'y': 'v', 'ü': 'v',
-    'ə': 'e', 'ɛ': 'e', 'ɔ': 'o', 'ɤ': 'e',
-    'ai': 'ai', 'ei': 'ei', 'ao': 'ao', 'ou': 'ou',
-    'aw': 'ao', 'ej': 'ei', 'ow': 'ou',
-    'z̩': 'i',
 }
 # 日语 IPA 到罗马音映射
@@ -214,17 +385,13 @@ ROMAJI_TO_HIRAGANA = {
 def ipa_to_alias(consonant: Optional[str], vowel: Optional[str], language: str, use_hiragana: bool = False) -> Optional[str]:
-    """将 IPA 音素转换为别名"""
     c_base = _strip_tone(consonant) if consonant else ''
     v_base = _strip_tone(vowel) if vowel else ''
     if language in ('chinese', 'zh', 'mandarin'):
-        c_alias = CHINESE_IPA_TO_PINYIN.get(c_base, c_base)
-        v_alias = CHINESE_IPA_TO_PINYIN.get(v_base, v_base)
-        alias = (c_alias or '') + (v_alias or '')
-        # 清理非 ASCII 字符
-        alias = ''.join(c for c in alias if c.isascii() and (c.isalnum() or c == '_'))
-        return alias.lower() if alias else None
     else:
         # 日语
         c_alias = JAPANESE_IPA_TO_ROMAJI.get(c_base, c_base)
@@ -243,12 +410,108 @@ def ipa_to_alias(consonant: Optional[str], vowel: Optional[str], language: str,
         return romaji
 class UTAUOtoExportPlugin(ExportPlugin):
     """UTAU oto.ini 导出插件"""
     name = "UTAU oto.ini 导出"
     description = "从 TextGrid 生成 UTAU 音源配置文件，一个 wav 可包含多条配置"
-    version = "1.1.0"
     author = "内置"
     def get_options(self) -> List[PluginOption]:
@@ -348,6 +611,42 @@ class UTAUOtoExportPlugin(ExportPlugin):
                 default="",
                 description="character.txt 中的角色名，留空则使用音源名称"
             ),
         ]
     def export(
@@ -375,6 +674,12 @@ class UTAUOtoExportPlugin(ExportPlugin):
             fuzzy_phoneme = options.get("fuzzy_phoneme", False)
             use_hiragana = (alias_style == "hiragana") and language in ('japanese', 'ja', 'jp')
             # 使用基类方法解析质量评估维度
             enabled_metrics = self.parse_quality_metrics(quality_metrics)
@@ -384,13 +689,20 @@ class UTAUOtoExportPlugin(ExportPlugin):
             os.makedirs(export_dir, exist_ok=True)
             # 步骤1: 解析 TextGrid 并生成 oto 条目
-            self._log("【解析 TextGrid 文件】")
             oto_entries, wav_files = self._parse_textgrids(
                 paths["slices_dir"],
                 paths["textgrid_dir"],
                 language,
                 use_hiragana,
-                overlap_ratio
             )
             if not oto_entries:
@@ -466,9 +778,25 @@ class UTAUOtoExportPlugin(ExportPlugin):
         textgrid_dir: str,
         language: str,
         use_hiragana: bool,
-        overlap_ratio: float
     ) -> Tuple[List[Dict], set]:
-        """解析 TextGrid 文件，提取音素边界"""
         import textgrid
         import soundfile as sf
@@ -522,12 +850,21 @@ class UTAUOtoExportPlugin(ExportPlugin):
             if phones_tier is None:
                 continue
-            # 提取音素对，使用 words 层限制配对范围
             entries = self._extract_cv_pairs(
                 words_tier, phones_tier, wav_name, wav_duration_ms,
                 language, use_hiragana, overlap_ratio
             )
             oto_entries.extend(entries)
         return oto_entries, wav_files
@@ -542,8 +879,8 @@ class UTAUOtoExportPlugin(ExportPlugin):
         overlap_ratio: float
     ) -> List[Dict]:
         """
-        从 phones 层提取辅音+元音对
-        使用 words 层限制配对范围，确保辅音和元音属于同一个字
         """
         entries = []
@@ -584,67 +921,648 @@ class UTAUOtoExportPlugin(ExportPlugin):
             start_ms = interval.minTime * 1000
             end_ms = interval.maxTime * 1000
-            if is_consonant(phone, language):
-                consonant = phone
-                consonant_start = start_ms
-                consonant_end = end_ms
-                consonant_time = interval.minTime  # 用于判断所属 word
-                vowel = None
-                vowel_end = end_ms
-                # 检查下一个音素是否是元音，且在同一个 word 内
-                if i + 1 < len(intervals):
-                    next_interval = intervals[i + 1]
-                    next_phone = next_interval.mark.strip()
-                    next_time = next_interval.minTime
-                    if (next_phone not in SKIP_MARKS and
-                        is_vowel(next_phone, language) and
-                        same_word(consonant_time, next_time)):
-                        vowel = next_phone
-                        vowel_end = next_interval.maxTime * 1000
-                        i += 1
-                alias = ipa_to_alias(consonant, vowel, language, use_hiragana)
-                if not alias:
                     i += 1
-                    continue
-                consonant_duration = consonant_end - consonant_start
-                entry = self._calculate_oto_params(
-                    wav_name=wav_name,
-                    alias=alias,
-                    offset=consonant_start,
-                    consonant_duration=consonant_duration,
-                    segment_end=vowel_end,
-                    wav_duration_ms=wav_duration_ms,
-                    overlap_ratio=overlap_ratio
-                )
-                entries.append(entry)
-            elif is_vowel(phone, language):
-                alias = ipa_to_alias(None, phone, language, use_hiragana)
-                if not alias:
-                    i += 1
                     continue
-                entry = self._calculate_oto_params(
-                    wav_name=wav_name,
-                    alias=alias,
-                    offset=start_ms,
-                    consonant_duration=min(30, (end_ms - start_ms) * 0.2),
-                    segment_end=end_ms,
-                    wav_duration_ms=wav_duration_ms,
-                    overlap_ratio=overlap_ratio
-                )
-                entries.append(entry)
-            i += 1
         return entries
     def _calculate_oto_params(
         self,
         wav_name: str,
@@ -684,6 +1602,67 @@ class UTAUOtoExportPlugin(ExportPlugin):
             "segment_duration": segment_duration,  # 用于排序
         }
     def _filter_by_alias(
         self,
         entries: List[Dict],
@@ -1331,7 +2310,12 @@ class UTAUOtoExportPlugin(ExportPlugin):
         # 获取有效的元音列表（用于验证组合）
         if language in ('chinese', 'zh', 'mandarin'):
-            valid_vowels = {'a', 'o', 'e', 'i', 'u', 'v', 'ai', 'ei', 'ao', 'ou', 'an', 'en', 'ang', 'eng', 'ong', 'er'}
         else:
             valid_vowels = {'a', 'i', 'u', 'e', 'o'}
@@ -1448,10 +2432,13 @@ class UTAUOtoExportPlugin(ExportPlugin):
         all_consonants = ['b', 'p', 'm', 'f', 'd', 't', 'n', 'l', 'g', 'k', 'h',
                           'j', 'q', 'x', 'zh', 'ch', 'sh', 'r', 'z', 'c', 's', 'y', 'w']
-        # 中文所有可能的韵母
-        all_vowels = ['a', 'o', 'e', 'i', 'u', 'v', 'ai', 'ei', 'ao', 'ou',
-                      'an', 'en', 'ang', 'eng', 'ong', 'in', 'ing', 'ian', 'iang',
-                      'uan', 'uang', 'un', 'ia', 'ie', 'iu', 'iao', 'ua', 'uo', 'ui', 'uai']
         fuzzy_count = 0

 # 中文辅音（MFA 输出的 IPA 符号）
 CHINESE_CONSONANTS = {
+    # 双唇音
+    'p', 'pʰ', 'pʲ', 'pʷ', 'b', 'm', 'f',
+    # 齿龈音
+    't', 'tʰ', 'tʲ', 'd', 'n', 'l',
+    # 软腭音
+    'k', 'kʰ', 'kʷ', 'ɡ', 'g', 'ŋ', 'x', 'h',
+    # 齿龈-硬腭音（j, q, x）
     'tɕ', 'tɕʰ', 'dʑ', 'ɕ', 'ʑ',
+    # 齿龈塞擦音（z, c, s）
     'ts', 'tsʰ', 'dz', 's', 'z',
+    # 卷舌音（zh, ch, sh, r）
     'ʈʂ', 'ʈʂʰ', 'ɖʐ', 'ʂ', 'ʐ',
+    # 鼻音和近音
     'ɲ', 'j', 'w', 'ɥ',
+    # 喉塞音
+    'ʔ',
 }
 # 中文元音（可能带声调标记）
+# 注意：MFA 输出的元音通常是单个音素，复合韵母会被拆分成多个音素
 CHINESE_VOWELS = {
+    # 基本单元音
     'a', 'o', 'e', 'i', 'u', 'y', 'ü',
     'ə', 'ɛ', 'ɔ', 'ɤ', 'ɨ', 'ʅ', 'ʉ',
+    # MFA 输出的特殊格式
+    'aw', 'ej', 'ow',  # 双元音的 MFA 表示（ai, ei, ou）
+    # 舌尖元音（zi, ci, si, zhi, chi, shi, ri）
+    'z̩', 'ʐ̩',
+    # 卷舌近音（er）
+    'ɻ',
+    # 儿化音
+    'ɚ',
+}
+# 中文介音（声母和韵母之间的过渡音）
+CHINESE_MEDIALS = {
+    'j', 'w', 'ɥ',  # i, u, ü 介音
+}
+# 中文韵尾（鼻音和元音韵尾）
+CHINESE_CODAS = {
+    'n', 'ŋ',  # 鼻音韵尾
+    'i', 'u',  # 元音韵尾（在复韵母中）
 }
 # 日语辅音
     ('in', 'ing'),       # 前鼻/后鼻
     ('ian', 'iang'),     # 前鼻/后鼻
     ('uan', 'uang'),     # 前鼻/后鼻
+    # i 行韵母近似组（带鼻音韵尾的可以用不带鼻音韵尾的替代）
+    ('ia', 'ian'),       # ia ←→ ian（如 xia ←→ xian）
+    ('ie', 'ian'),       # ie ←→ ian（如 jie ←→ jian）
+    ('iao', 'ian'),      # iao ←→ ian（如 qiao ←→ qian）
+    ('iu', 'in'),        # iu ←→ in（如 liu ←→ lin）
+    # u 行韵母近似组
+    ('ua', 'uan'),       # ua ←→ uan（如 kua ←→ kuan）
+    ('uo', 'un'),        # uo ←→ un（如 duo ←→ dun）
+    ('ui', 'un'),        # ui ←→ un（如 dui ←→ dun）
+    ('uai', 'uan'),      # uai ←→ uan（如 kuai ←→ kuan）
+    # 单元音与复韵母近似组
+    ('a', 'ai', 'ao', 'an'),  # a 系列
+    ('o', 'ou', 'ong'),       # o 系列
+    ('e', 'ei', 'en'),        # e 系列
 ]
     base_phone = _strip_tone(phone)
     if language in ('chinese', 'zh', 'mandarin'):
+        # 直接匹配
         if base_phone in CHINESE_VOWELS:
             return True
+        # 检查是否以元音字符开头（处理复合元音）
+        vowel_starts = ['a', 'o', 'e', 'i', 'u', 'y', 'ə', 'ɛ', 'ɔ', 'ɤ', 'ɨ', 'ʅ', 'ʉ', 'ɚ']
+        for v in vowel_starts:
             if base_phone.startswith(v):
                 return True
+        # 检查特殊的舌尖元音（带组合字符）
+        if 'z̩' in base_phone or 'ʐ̩' in base_phone:
+            return True
+        # 检查卷舌近音
+        if 'ɻ' in base_phone:
+            return True
         return False
     elif language in ('japanese', 'ja', 'jp'):
         return base_phone in JAPANESE_VOWELS or base_phone.rstrip('ː') in {'a', 'i', 'ɯ', 'u', 'e', 'o'}
 # ==================== IPA 到别名转换 ====================
+# 中文 IPA 辅音到拼音声母映射
+CHINESE_CONSONANT_TO_PINYIN = {
+    'p': 'b', 'pʰ': 'p', 'pʲ': 'p', 'pʷ': 'b',
     'm': 'm', 'f': 'f',
+    't': 'd', 'tʰ': 't', 'tʲ': 'd',
     'n': 'n', 'l': 'l',
+    'k': 'g', 'kʰ': 'k', 'kʷ': 'g',
+    'ɡ': 'g', 'g': 'g',
     'x': 'h', 'h': 'h',
     'tɕ': 'j', 'tɕʰ': 'q', 'ɕ': 'x',
     'ts': 'z', 'tsʰ': 'c', 's': 's',
     'ʈʂ': 'zh', 'ʈʂʰ': 'ch', 'ʂ': 'sh', 'ʐ': 'r',
+    'ɲ': 'n', 'ŋ': '',  # ng 不作为声母
+    'j': '', 'w': '', 'ɥ': '',  # 介音不作为声母
     'ʔ': '',
+}
+# 中文 IPA 元音到拼音韵母映射
+CHINESE_VOWEL_TO_PINYIN = {
+    # 单元音韵母
     'a': 'a', 'o': 'o', 'e': 'e', 'i': 'i', 'u': 'u', 'y': 'v', 'ü': 'v',
+    'ə': 'e', 'ɛ': 'e', 'ɔ': 'o', 'ɤ': 'e', 'ɨ': 'i',
+    # 复韵母（MFA 可能的 IPA 格式）
+    'aj': 'ai', 'aw': 'ao', 'ej': 'ei', 'ow': 'ou',
+    'ai': 'ai', 'ao': 'ao', 'ei': 'ei', 'ou': 'ou',  # 直接形式
+    # i 行韵母（MFA 可能的组合形式）
+    'ja': 'ia', 'je': 'ie', 'jɛ': 'ie', 'jao': 'iao', 'jow': 'iu', 'ju': 'iu',
+    'ia': 'ia', 'ie': 'ie', 'iao': 'iao', 'iu': 'iu',  # 直接形式
+    # u 行韵母（MFA 可能的组合形式）
+    'wa': 'ua', 'wo': 'uo', 'wɔ': 'uo', 'wej': 'ui', 'waj': 'uai',
+    'ua': 'ua', 'uo': 'uo', 'ui': 'ui', 'uai': 'uai',  # 直接形式
+    # ü 行韵母（MFA 可能的组合形式）
+    'ɥe': 've', 'ɥɛ': 've',
+    've': 've', 'yue': 've',  # 直接形式
+    # 鼻音韵母（MFA 可能的组合形式）
+    'an': 'an', 'en': 'en', 'ang': 'ang', 'eng': 'eng', 'ong': 'ong',
+    'in': 'in', 'ing': 'ing', 'ian': 'ian', 'iang': 'iang', 'iong': 'iong',
+    'uan': 'uan', 'un': 'un', 'uang': 'uang', 'ueng': 'ueng',
+    'van': 'van', 'vn': 'vn',
+    # 舌尖元音
+    'z̩': 'i', 'ʐ̩': 'i', 'ʅ': 'i',
+    # 卷舌音
+    'ɻ': 'er', 'ɚ': 'er',
+}
+# 介音+元音组合到韵母的映射
+MEDIAL_VOWEL_TO_FINAL = {
+    # j 介音（i 行韵母）
+    ('j', 'a'): 'ia', ('j', 'e'): 'ie', ('j', 'ɛ'): 'ie',
+    ('j', 'aw'): 'iao', ('j', 'o'): 'io',
+    ('j', 'u'): 'iu', ('j', 'ow'): 'iou',
+    # w 介音（u 行韵母）
+    ('w', 'a'): 'ua', ('w', 'o'): 'uo', ('w', 'ɔ'): 'uo',
+    ('w', 'ej'): 'uei', ('w', 'e'): 'ue',
+    ('w', 'aj'): 'uai', ('w', 'ai'): 'uai',
+    # ɥ 介音（ü 行韵母）
+    ('ɥ', 'e'): 've', ('ɥ', 'ɛ'): 've',
+}
+# 介音+元音+韵尾组合到韵母的映射
+MEDIAL_VOWEL_CODA_TO_FINAL = {
+    # j 介音 + 元音 + 韵尾
+    ('j', 'a', 'n'): 'ian', ('j', 'e', 'n'): 'in',
+    ('j', 'a', 'ŋ'): 'iang', ('j', 'o', 'ŋ'): 'iong',
+    # w 介音 + 元音 + 韵尾
+    ('w', 'a', 'n'): 'uan', ('w', 'ə', 'n'): 'uen', ('w', 'e', 'n'): 'uen',
+    ('w', 'a', 'ŋ'): 'uang', ('w', 'ə', 'ŋ'): 'ueng', ('w', 'e', 'ŋ'): 'ueng',
+    # ɥ 介音 + 元音 + 韵尾
+    ('ɥ', 'a', 'n'): 'van', ('ɥ', 'e', 'n'): 'vn',
+}
+# 元音+韵尾组合到拼音韵母的映射
+VOWEL_CODA_TO_PINYIN = {
+    # 前鼻音韵母
+    ('a', 'n'): 'an', ('ə', 'n'): 'en', ('e', 'n'): 'en',
+    ('i', 'n'): 'in', ('y', 'n'): 'un', ('u', 'n'): 'un',
+    # 后鼻音韵母
+    ('a', 'ŋ'): 'ang', ('ə', 'ŋ'): 'eng', ('e', 'ŋ'): 'eng',
+    ('i', 'ŋ'): 'ing', ('o', 'ŋ'): 'ong', ('u', 'ŋ'): 'ong',
+    # 复韵母（元音+元音）
+    ('a', 'i'): 'ai', ('e', 'i'): 'ei', ('ej', 'i'): 'ei',
+    ('a', 'u'): 'ao', ('aw', 'u'): 'ao', ('o', 'u'): 'ou', ('ow', 'u'): 'ou',
+    # i 行韵母
+    ('i', 'a'): 'ia', ('i', 'e'): 'ie', ('i', 'ɛ'): 'ie',
+    ('i', 'u'): 'iu',
+    # u 行韵母
+    ('u', 'a'): 'ua', ('u', 'o'): 'uo', ('u', 'ɔ'): 'uo',
+    ('u', 'i'): 'ui', ('u', 'e'): 'ue',
+    # ü 行韵母
+    ('y', 'e'): 've', ('y', 'ɛ'): 've',
+}
+# IPA 音节组合到标准拼音的映射表（处理特殊组合规则）
+IPA_SYLLABLE_TO_PINYIN = {
+    # j/q/x + ü 系列（ü 简写为 u）
+    ('tɕ', 'y'): 'ju', ('tɕʰ', 'y'): 'qu', ('ɕ', 'y'): 'xu',
+    ('tɕ', 'ɥ'): 'ju', ('tɕʰ', 'ɥ'): 'qu', ('ɕ', 'ɥ'): 'xu',
+    ('tɕ', 'yɛ'): 'jue', ('tɕʰ', 'yɛ'): 'que', ('ɕ', 'yɛ'): 'xue',
+    ('tɕ', 'yan'): 'juan', ('tɕʰ', 'yan'): 'quan', ('ɕ', 'yan'): 'xuan',
+    ('tɕ', 'yn'): 'jun', ('tɕʰ', 'yn'): 'qun', ('ɕ', 'yn'): 'xun',
+    # 零声母 + i/u/ü 开头的韵母（需要加 y/w）
+    ('', 'i'): 'yi', ('', 'in'): 'yin', ('', 'ing'): 'ying',
+    ('', 'u'): 'wu', ('', 'un'): 'wen', ('', 'ong'): 'weng',
+    ('', 'y'): 'yu', ('', 'yn'): 'yun',
+    # i 行韵母（ia, ie, iao, ian, iang, iong, iu）
+    ('', 'ia'): 'ya', ('', 'iɛ'): 'ye', ('', 'ie'): 'ye',
+    ('', 'iao'): 'yao', ('', 'ian'): 'yan', ('', 'iang'): 'yang',
+    ('', 'iou'): 'you', ('', 'iu'): 'you',
+    ('', 'iong'): 'yong',
+    # u 行韵母（ua, uo, uai, uei, uan, uen, uang, ueng）
+    ('', 'ua'): 'wa', ('', 'uɔ'): 'wo', ('', 'uo'): 'wo',
+    ('', 'uai'): 'wai', ('', 'uei'): 'wei', ('', 'ui'): 'wei',
+    ('', 'uan'): 'wan', ('', 'uen'): 'wen',
+    ('', 'uang'): 'wang', ('', 'ueng'): 'weng',
+    # ü 行韵母（üe, üan, ün）
+    ('', 'yɛ'): 'yue', ('', 'üe'): 'yue',
+    ('', 'yan'): 'yuan', ('', 'üan'): 'yuan',
+    ('', 'yn'): 'yun', ('', 'ün'): 'yun',
+    # zh/ch/sh/r + i 实际是舌尖元音
+    ('ʈʂ', 'ʐ̩'): 'zhi', ('ʈʂʰ', 'ʐ̩'): 'chi', ('ʂ', 'ʐ̩'): 'shi', ('ʐ', 'ʐ̩'): 'ri',
+    ('ʈʂ', 'z̩'): 'zhi', ('ʈʂʰ', 'z̩'): 'chi', ('ʂ', 'z̩'): 'shi', ('ʐ', 'z̩'): 'ri',
+    ('ʈʂ', 'ʅ'): 'zhi', ('ʈʂʰ', 'ʅ'): 'chi', ('ʂ', 'ʅ'): 'shi', ('ʐ', 'ʅ'): 'ri',
+    # z/c/s + i 实际是舌尖元音
+    ('ts', 'z̩'): 'zi', ('tsʰ', 'z̩'): 'ci', ('s', 'z̩'): 'si',
+    ('ts', 'ʅ'): 'zi', ('tsʰ', 'ʅ'): 'ci', ('s', 'ʅ'): 'si',
+    # n/l + ü 系列（保持 ü）
+    ('n', 'y'): 'nv', ('l', 'y'): 'lv',
+    ('n', 'yɛ'): 'nve', ('l', 'yɛ'): 'lve',
+    # 其他特殊组合
+    ('ʔ', 'a'): 'a', ('ʔ', 'o'): 'o', ('ʔ', 'e'): 'e',
+    ('ʔ', 'ai'): 'ai', ('ʔ', 'ei'): 'ei', ('ʔ', 'ao'): 'ao', ('ʔ', 'ou'): 'ou',
+    ('ʔ', 'an'): 'an', ('ʔ', 'en'): 'en', ('ʔ', 'ang'): 'ang', ('ʔ', 'eng'): 'eng',
+    ('ʔ', 'ej'): 'ei', ('ʔ', 'aw'): 'ao', ('ʔ', 'ow'): 'ou',
+    # 儿化音
+    ('', 'ɻ'): 'er', ('', 'ɚ'): 'er',
 }
 # 日语 IPA 到罗马音映射
 def ipa_to_alias(consonant: Optional[str], vowel: Optional[str], language: str, use_hiragana: bool = False) -> Optional[str]:
+    """将 IPA 音素转换为别名（标准拼音或罗马音）"""
     c_base = _strip_tone(consonant) if consonant else ''
     v_base = _strip_tone(vowel) if vowel else ''
     if language in ('chinese', 'zh', 'mandarin'):
+        # 中文：使用完整的音节转换规则
+        return _ipa_to_pinyin(c_base, v_base)
     else:
         # 日语
         c_alias = JAPANESE_IPA_TO_ROMAJI.get(c_base, c_base)
         return romaji
+def _ipa_to_pinyin(consonant: str, vowel: str) -> Optional[str]:
+    """
+    将 IPA 辅音+韵母转换为标准汉语拼音
+    参数:
+        consonant: IPA 辅音（已去除声调），可以是空字符串表示零声母
+        vowel: IPA 韵母（已去除声调），可能是单个元音或元音+韵尾的组合
+    返回:
+        标准拼音，如果无法转换则返回 None
+    """
+    # 1. 先查找特殊组合映射
+    syllable_key = (consonant, vowel)
+    if syllable_key in IPA_SYLLABLE_TO_PINYIN:
+        return IPA_SYLLABLE_TO_PINYIN[syllable_key]
+    # 2. 获取声母的拼音
+    c_pinyin = ''
+    if consonant and consonant != 'ʔ':
+        if consonant in CHINESE_CONSONANT_TO_PINYIN:
+            c_pinyin = CHINESE_CONSONANT_TO_PINYIN[consonant]
+        else:
+            # 未知辅音，无法转换
+            return None
+    # 3. 获取韵母的拼音
+    # 韵母可能是单个元音，也可能是元音+韵尾的组合字符串
+    v_pinyin = ''
+    if vowel:
+        # 直接查找完整韵母
+        if vowel in CHINESE_VOWEL_TO_PINYIN:
+            v_pinyin = CHINESE_VOWEL_TO_PINYIN[vowel]
+        else:
+            # 韵母可能是组合形式，无法直接映射
+            # 这种情况应该在 _syllable_to_pinyin 中处理
+            return None
+    if not v_pinyin:
+        return None
+    # 4. 处理零声母（无声母或喉塞音）
+    if not c_pinyin:
+        # 零声母需要根据韵母添加 y/w/yu
+        if v_pinyin == 'i':
+            return 'yi'
+        elif v_pinyin in ('in', 'ing'):
+            return 'y' + v_pinyin
+        elif v_pinyin.startswith('i') and len(v_pinyin) > 1:
+            # ia->ya, ie->ye, iao->yao, ian->yan, iang->yang, iu->you, iong->yong
+            return 'y' + v_pinyin[1:]
+        elif v_pinyin == 'u':
+            return 'wu'
+        elif v_pinyin == 'un':
+            return 'wen'
+        elif v_pinyin == 'ong':
+            return 'weng'
+        elif v_pinyin.startswith('u') and len(v_pinyin) > 1:
+            # ua->wa, uo->wo, uai->wai, ui->wei, uan->wan, uang->wang
+            return 'w' + v_pinyin[1:]
+        elif v_pinyin == 'v':
+            # ü 单独出现写作 yu
+            return 'yu'
+        elif v_pinyin.startswith('v') and len(v_pinyin) > 1:
+            # ve->yue, van->yuan, vn->yun
+            return 'yu' + v_pinyin[1:]
+        else:
+            # a, o, e, ai, ei, ao, ou, an, en, ang, eng, er 等
+            return v_pinyin
+    # 5. 有声母的情况
+    # 5.1 j/q/x + ü 系列：ü 写作 u
+    if c_pinyin in ('j', 'q', 'x'):
+        if v_pinyin == 'v':
+            return c_pinyin + 'u'
+        elif v_pinyin.startswith('v'):
+            # jve->jue, jvan->juan, jvn->jun
+            return c_pinyin + 'u' + v_pinyin[1:]
+        else:
+            return c_pinyin + v_pinyin
+    # 5.2 n/l + ü 系列：保持 v（表示 ü）
+    elif c_pinyin in ('n', 'l'):
+        # 只有 n/l 才需要区分 u 和 ü
+        return c_pinyin + v_pinyin
+    # 5.3 其他声母 + v：v 改写为 u（因为不会产生歧义）
+    elif v_pinyin == 'v':
+        return c_pinyin + 'u'
+    elif v_pinyin.startswith('v'):
+        return c_pinyin + 'u' + v_pinyin[1:]
+    # 5.4 普通组合
+    else:
+        return c_pinyin + v_pinyin
 class UTAUOtoExportPlugin(ExportPlugin):
     """UTAU oto.ini 导出插件"""
     name = "UTAU oto.ini 导出"
     description = "从 TextGrid 生成 UTAU 音源配置文件，一个 wav 可包含多条配置"
+    version = "1.2.0"
     author = "内置"
     def get_options(self) -> List[PluginOption]:
                 default="",
                 description="character.txt 中的角色名，留空则使用音源名称"
             ),
+            PluginOption(
+                key="cvvc_mode",
+                label="CVVC 模式",
+                option_type=OptionType.SWITCH,
+                default=False,
+                description="启用 CVVC 模式，额外生成 VC 部（元音到辅音过渡）条目"
+            ),
+            PluginOption(
+                key="vc_alias_separator",
+                label="VC 别名分隔符",
+                option_type=OptionType.COMBO,
+                default=" ",
+                choices=[" ", "_", "-"],
+                description="VC 部别名中元音和辅音之间的分隔符",
+                visible_when={"cvvc_mode": True}
+            ),
+            PluginOption(
+                key="vc_offset_ratio",
+                label="VC 偏移比例",
+                option_type=OptionType.NUMBER,
+                default=0.5,
+                min_value=0.3,
+                max_value=0.8,
+                description="VC 部开始位置 = 元音结束位置 - 元音时长 × 此比例",
+                visible_when={"cvvc_mode": True}
+            ),
+            PluginOption(
+                key="vc_overlap_ratio",
+                label="VC Overlap 比例",
+                option_type=OptionType.NUMBER,
+                default=0.5,
+                min_value=0.3,
+                max_value=0.8,
+                description="VC 部的 Overlap = Preutterance × 此比例",
+                visible_when={"cvvc_mode": True}
+            ),
         ]
     def export(
             fuzzy_phoneme = options.get("fuzzy_phoneme", False)
             use_hiragana = (alias_style == "hiragana") and language in ('japanese', 'ja', 'jp')
+            # CVVC 模式选项
+            cvvc_mode = options.get("cvvc_mode", False)
+            vc_separator = options.get("vc_alias_separator", " ")
+            vc_offset_ratio = float(options.get("vc_offset_ratio", 0.5))
+            vc_overlap_ratio = float(options.get("vc_overlap_ratio", 0.5))
             # 使用基类方法解析质量评估维度
             enabled_metrics = self.parse_quality_metrics(quality_metrics)
             os.makedirs(export_dir, exist_ok=True)
             # 步骤1: 解析 TextGrid 并生成 oto 条目
+            if cvvc_mode:
+                self._log("【解析 TextGrid 文件】（CVVC 模式）")
+            else:
+                self._log("【解析 TextGrid 文件】")
             oto_entries, wav_files = self._parse_textgrids(
                 paths["slices_dir"],
                 paths["textgrid_dir"],
                 language,
                 use_hiragana,
+                overlap_ratio,
+                cvvc_mode=cvvc_mode,
+                vc_offset_ratio=vc_offset_ratio,
+                vc_overlap_ratio=vc_overlap_ratio,
+                vc_separator=vc_separator
             )
             if not oto_entries:
         textgrid_dir: str,
         language: str,
         use_hiragana: bool,
+        overlap_ratio: float,
+        cvvc_mode: bool = False,
+        vc_offset_ratio: float = 0.5,
+        vc_overlap_ratio: float = 0.5,
+        vc_separator: str = " "
     ) -> Tuple[List[Dict], set]:
+        """解析 TextGrid 文件，提取音素边界
+        参数:
+            slices_dir: 切片目录
+            textgrid_dir: TextGrid 目录
+            language: 语言
+            use_hiragana: 是否使用平假名
+            overlap_ratio: CV 部 overlap 比例
+            cvvc_mode: 是否启用 CVVC 模式
+            vc_offset_ratio: VC 偏移比例
+            vc_overlap_ratio: VC overlap 比例
+            vc_separator: VC 别名分隔符
+        """
         import textgrid
         import soundfile as sf
             if phones_tier is None:
                 continue
+            # 提取 CV 对，使用 words 层限制配对范围
             entries = self._extract_cv_pairs(
                 words_tier, phones_tier, wav_name, wav_duration_ms,
                 language, use_hiragana, overlap_ratio
             )
             oto_entries.extend(entries)
+            # 如果启用 CVVC 模式，额外提取 VC 对
+            if cvvc_mode:
+                vc_entries = self._extract_vc_pairs(
+                    words_tier, phones_tier, wav_name, wav_duration_ms,
+                    language, use_hiragana,
+                    vc_offset_ratio, vc_overlap_ratio, vc_separator
+                )
+                oto_entries.extend(vc_entries)
         return oto_entries, wav_files
         overlap_ratio: float
     ) -> List[Dict]:
         """
+        从 phones 层提取音节（可能包含辅音+元音+韵尾）
+        使用 words 层限制配对范围，确保音素属于同一个字
         """
         entries = []
             start_ms = interval.minTime * 1000
             end_ms = interval.maxTime * 1000
+            # 中文音节结构：(辅音) + (介音) + 元音 + (韵尾)
+            if language in ('chinese', 'zh', 'mandarin'):
+                syllable_phones = []
+                syllable_start = start_ms
+                syllable_end = end_ms
+                consonant_duration = 0
+                # 1. 检查是否有声母（辅音）
+                if is_consonant(phone, language):
+                    syllable_phones.append(phone)
+                    consonant_duration = end_ms - start_ms
+                    i += 1
+                    # 检查下一个音素
+                    if i < len(intervals):
+                        next_interval = intervals[i]
+                        next_phone = next_interval.mark.strip()
+                        if next_phone not in SKIP_MARKS and same_word(interval.minTime, next_interval.minTime):
+                            phone = next_phone
+                            end_ms = next_interval.maxTime * 1000
+                            syllable_end = end_ms
+                        else:
+                            # 只有辅音，没有元音，跳过
+                            continue
+                    else:
+                        # 只有辅音，没有元音，跳过
+                        continue
+                # 2. 检查是否有介音（j, w, ɥ）
+                phone_base = _strip_tone(phone)
+                if phone_base in CHINESE_MEDIALS:
+                    syllable_phones.append(phone)
+                    i += 1
+                    # 检查下一个音素（必须是元音）
+                    if i < len(intervals):
+                        next_interval = intervals[i]
+                        next_phone = next_interval.mark.strip()
+                        if next_phone not in SKIP_MARKS and same_word(interval.minTime, next_interval.minTime):
+                            phone = next_phone
+                            end_ms = next_interval.maxTime * 1000
+                            syllable_end = end_ms
+                        else:
+                            # 只有介音，没有元音，跳过
+                            continue
+                    else:
+                        # 只有介音，没有元音，跳过
+                        continue
+                # 3. 必须有韵母（元音）
+                if is_vowel(phone, language):
+                    syllable_phones.append(phone)
+                    if not consonant_duration:
+                        # 零声母，辅音时长设为元音前30ms
+                        consonant_duration = min(30, (end_ms - start_ms) * 0.2)
+                    syllable_end = end_ms
                     i += 1
+                    # 4. 检查是否有韵尾（n, ng, i, u）
+                    if i < len(intervals):
+                        next_interval = intervals[i]
+                        next_phone = next_interval.mark.strip()
+                        if (next_phone not in SKIP_MARKS and
+                            same_word(interval.minTime, next_interval.minTime)):
+                            # 检查是否是韵尾
+                            next_phone_base = _strip_tone(next_phone)
+                            if next_phone_base in CHINESE_CODAS:
+                                syllable_phones.append(next_phone)
+                                syllable_end = next_interval.maxTime * 1000
+                                i += 1
+                    # 5. 将音节转换为拼音
+                    alias = self._syllable_to_pinyin(syllable_phones, language, use_hiragana)
+                    if alias:
+                        entry = self._calculate_oto_params(
+                            wav_name=wav_name,
+                            alias=alias,
+                            offset=syllable_start,
+                            consonant_duration=consonant_duration,
+                            segment_end=syllable_end,
+                            wav_duration_ms=wav_duration_ms,
+                            overlap_ratio=overlap_ratio
+                        )
+                        entries.append(entry)
+                else:
+                    # 不是元音，跳过
+                    i += 1
+            else:
+                # 日语：简单的 CV 结构
+                if is_consonant(phone, language):
+                    consonant = phone
+                    consonant_start = start_ms
+                    consonant_end = end_ms
+                    consonant_time = interval.minTime
+                    vowel = None
+                    vowel_end = end_ms
+                    if i + 1 < len(intervals):
+                        next_interval = intervals[i + 1]
+                        next_phone = next_interval.mark.strip()
+                        next_time = next_interval.minTime
+                        if (next_phone not in SKIP_MARKS and
+                            is_vowel(next_phone, language) and
+                            same_word(consonant_time, next_time)):
+                            vowel = next_phone
+                            vowel_end = next_interval.maxTime * 1000
+                            i += 1
+                    alias = ipa_to_alias(consonant, vowel, language, use_hiragana)
+                    if alias:
+                        consonant_duration = consonant_end - consonant_start
+                        entry = self._calculate_oto_params(
+                            wav_name=wav_name,
+                            alias=alias,
+                            offset=consonant_start,
+                            consonant_duration=consonant_duration,
+                            segment_end=vowel_end,
+                            wav_duration_ms=wav_duration_ms,
+                            overlap_ratio=overlap_ratio
+                        )
+                        entries.append(entry)
+                elif is_vowel(phone, language):
+                    alias = ipa_to_alias(None, phone, language, use_hiragana)
+                    if alias:
+                        entry = self._calculate_oto_params(
+                            wav_name=wav_name,
+                            alias=alias,
+                            offset=start_ms,
+                            consonant_duration=min(30, (end_ms - start_ms) * 0.2),
+                            segment_end=end_ms,
+                            wav_duration_ms=wav_duration_ms,
+                            overlap_ratio=overlap_ratio
+                        )
+                        entries.append(entry)
+                i += 1
+        return entries
+    def _syllable_to_pinyin(
+        self,
+        phones: List[str],
+        language: str,
+        use_hiragana: bool
+    ) -> Optional[str]:
+        """
+        将音素列表转换为标准汉语拼音（通用方法）
+        采用新的通用转换算法，支持所有标准汉语拼音音节
+        参数:
+            phones: 音素列表（带声调的 IPA 符号）
+            language: 语言
+            use_hiragana: 是否使用平假名（中文忽略此参数）
+        返回:
+            拼音字符串
+        """
+        if not phones:
+            return None
+        # 去除声调
+        phones_base = [_strip_tone(p) for p in phones]
+        # 解析音节结构：(辅音) + (介音) + 元音 + (韵尾)
+        idx = 0
+        c = ''  # 声母
+        m = ''  # 介音
+        v = ''  # 元音
+        cd = ''  # 韵尾
+        # 1. 声母
+        if idx < len(phones_base) and is_consonant(phones_base[idx], language):
+            c = phones_base[idx]
+            idx += 1
+        # 2. 介音
+        if idx < len(phones_base) and phones_base[idx] in CHINESE_MEDIALS:
+            m = phones_base[idx]
+            idx += 1
+        # 3. 元音（必须）
+        if idx < len(phones_base) and is_vowel(phones_base[idx], language):
+            v = phones_base[idx]
+            idx += 1
+        else:
+            # 没有元音，无法形成音节
+            return None
+        # 4. 韵尾
+        if idx < len(phones_base) and phones_base[idx] in CHINESE_CODAS:
+            cd = phones_base[idx]
+            idx += 1
+        # 转换为拼音
+        c_py = CHINESE_CONSONANT_TO_PINYIN.get(c, '')
+        v_py = CHINESE_VOWEL_TO_PINYIN.get(v, v)
+        # 组合韵母
+        final = ''
+        if m == 'j':
+            # i 行韵母
+            if cd == 'n':
+                if v_py == 'a':
+                    final = 'ian'
+                elif v_py == 'e':
+                    final = 'in'  # j + e + n = in (如 xin, yin)
+                else:
+                    final = 'i' + v_py + 'n'
+            elif cd == 'ŋ':
+                if v_py == 'a':
+                    final = 'iang'
+                elif v_py == 'o':
+                    final = 'iong'
+                else:
+                    final = 'i' + v_py + 'ng'
+            elif cd:
+                final = 'i' + v_py + cd
+            else:
+                if v_py == 'a':
+                    final = 'ia'
+                elif v_py == 'e':
+                    final = 'ie'
+                elif v_py == 'ao':
+                    final = 'iao'
+                elif v_py == 'ou':
+                    final = 'iu'
+                else:
+                    final = 'i' + v_py
+        elif m == 'w':
+            # u 行韵母
+            if cd == 'n':
+                if v_py == 'a':
+                    final = 'uan'
+                elif v_py == 'e':
+                    final = 'un'  # w + ə + n = un (如 shun)
+                else:
+                    final = 'u' + v_py + 'n'
+            elif cd == 'ŋ':
+                if v_py == 'a':
+                    final = 'uang'
+                elif v_py == 'e':
+                    final = 'ueng'
+                else:
+                    final = 'u' + v_py + 'ng'
+            elif cd:
+                final = 'u' + v_py + cd
+            else:
+                if v_py == 'a':
+                    final = 'ua'
+                elif v_py == 'o':
+                    final = 'uo'
+                elif v_py == 'ei':
+                    final = 'ui'  # w + ej = ui (如 shui)
+                elif v_py == 'ai':
+                    final = 'uai'
+                else:
+                    final = 'u' + v_py
+        elif m == 'ɥ':
+            # ü 行韵母
+            if cd == 'n':
+                if v_py == 'a':
+                    final = 'van'
+                elif v_py == 'e':
+                    final = 'vn'
+                else:
+                    final = 'v' + v_py + 'n'
+            elif cd:
+                final = 'v' + v_py + cd
+            else:
+                if v_py == 'e':
+                    final = 've'
+                else:
+                    final = 'v' + v_py
+        else:
+            # 无介音
+            if cd == 'n':
+                final = v_py + 'n'
+            elif cd == 'ŋ':
+                final = v_py + 'ng'
+            elif cd:
+                final = v_py + cd
+            else:
+                final = v_py
+        # 组合声母和韵母
+        if not c_py:
+            # 零声母，需要添加 y/w/yu
+            if final.startswith('i'):
+                if final == 'i':
+                    return 'yi'
+                elif final in ('in', 'ing'):
+                    return 'y' + final
+                else:
+                    return 'y' + final[1:]
+            elif final.startswith('u'):
+                if final == 'u':
+                    return 'wu'
+                elif final == 'un':
+                    return 'wen'
+                elif final in ('ueng', 'ong'):
+                    return 'weng'
+                else:
+                    return 'w' + final[1:]
+            elif final.startswith('v'):
+                if final == 'v':
+                    return 'yu'
+                else:
+                    return 'yu' + final[1:]
+            else:
+                return final
+        # 有声母
+        if c_py in ('j', 'q', 'x'):
+            # j/q/x + ü 系列，ü 写作 u
+            if final.startswith('v'):
+                return c_py + 'u' + final[1:]
+            else:
+                return c_py + final
+        elif c_py in ('n', 'l'):
+            # n/l + ü 系列，保持 v
+            return c_py + final
+        else:
+            # 其他声母 + ü，ü 写作 u
+            if final.startswith('v'):
+                return c_py + 'u' + final[1:]
+            else:
+                return c_py + final
+    def _extract_vc_pairs(
+        self,
+        words_tier,
+        phones_tier,
+        wav_name: str,
+        wav_duration_ms: float,
+        language: str,
+        use_hiragana: bool,
+        vc_offset_ratio: float,
+        vc_overlap_ratio: float,
+        vc_separator: str
+    ) -> List[Dict]:
+        """
+        从 phones 层提取元音+辅音对（VC 部）
+        VC 部是当前音节的韵母(V) + 下一个音节的声母(C)
+        用于连接两个相邻音节的过渡部分
+        使用 presamp.ini 中的映射规则来确定韵母和声母的对应关系
+        注意：VC 部的别名始终使用拼音格式，不受 use_hiragana 参数影响
+        参数:
+            words_tier: words 层
+            phones_tier: phones 层
+            wav_name: 音频文件名
+            wav_duration_ms: 音频总时长
+            language: 语言
+            use_hiragana: 是否使用平假名（VC 部忽略此参数，始终用拼音）
+            vc_offset_ratio: VC 偏移比例
+            vc_overlap_ratio: VC overlap 比例
+            vc_separator: VC 别名分隔符
+        返回:
+            VC 条目列表
+        """
+        entries = []
+        if language not in ('chinese', 'zh', 'mandarin'):
+            # 非中文暂不支持 CVVC
+            return entries
+        # 加载 presamp.ini 映射
+        vowel_map, consonant_map = self._load_presamp_mapping()
+        if not vowel_map or not consonant_map:
+            self._log("警告: 无法加载 presamp.ini 映射，跳过 VC 部生成")
+            return entries
+        intervals = list(phones_tier)
+        # 解析所有音节，提取韵母和声母信息
+        syllables = []
+        i = 0
+        while i < len(intervals):
+            interval = intervals[i]
+            phone = interval.mark.strip()
+            if phone in SKIP_MARKS:
+                i += 1
+                continue
+            # 解析一个完整音节：(辅音) + (介音) + 元音 + (韵尾)
+            syllable_phones = []
+            syllable_start = interval.minTime * 1000
+            syllable_end = interval.maxTime * 1000
+            consonant_duration = 0
+            vowel_start = syllable_start
+            vowel_end = syllable_end
+            has_consonant = False
+            # 1. 检查是否有声母（辅音）
+            if is_consonant(phone, language):
+                syllable_phones.append(phone)
+                consonant_duration = interval.maxTime * 1000 - syllable_start
+                has_consonant = True
+                i += 1
+                # 检查下一个音素
+                if i < len(intervals):
+                    next_interval = intervals[i]
+                    next_phone = next_interval.mark.strip()
+                    if next_phone not in SKIP_MARKS:
+                        phone = next_phone
+                        syllable_end = next_interval.maxTime * 1000
+                        vowel_start = next_interval.minTime * 1000
+                    else:
+                        # 只有辅音，没有元音，跳过
+                        continue
+                else:
+                    # 只有辅音，没有元音，跳过
+                    continue
+            # 2. 检查是否有介音（j, w, ɥ）
+            phone_base = _strip_tone(phone)
+            if phone_base in CHINESE_MEDIALS:
+                syllable_phones.append(phone)
+                i += 1
+                # 检查下一个音素（必须是元音）
+                if i < len(intervals):
+                    next_interval = intervals[i]
+                    next_phone = next_interval.mark.strip()
+                    if next_phone not in SKIP_MARKS:
+                        phone = next_phone
+                        syllable_end = next_interval.maxTime * 1000
+                    else:
+                        # 只有介音，没有元音，跳过
+                        continue
+                else:
+                    # 只有介音，没有元音，跳过
                     continue
+            # 3. 必须有韵母（元音）
+            if is_vowel(phone, language):
+                syllable_phones.append(phone)
+                vowel_end = interval.maxTime * 1000
+                if not consonant_duration:
+                    # 零声母，辅音时长设为元音前30ms
+                    consonant_duration = min(30, (vowel_end - vowel_start) * 0.2)
+                syllable_end = vowel_end
+                i += 1
+                # 4. 检查是否有韵尾（n, ng, i, u）
+                if i < len(intervals):
+                    next_interval = intervals[i]
+                    next_phone = next_interval.mark.strip()
+                    if next_phone not in SKIP_MARKS:
+                        # 检查是否是韵尾
+                        next_phone_base = _strip_tone(next_phone)
+                        if next_phone_base in CHINESE_CODAS:
+                            syllable_phones.append(next_phone)
+                            syllable_end = next_interval.maxTime * 1000
+                            vowel_end = next_interval.maxTime * 1000
+                            i += 1
+                # 5. 将音节转换为拼音并保存
+                pinyin = self._syllable_to_pinyin(syllable_phones, language, False)
+                if pinyin:
+                    # 使用 presamp.ini 映射查找韵母和声母
+                    vowel_part = self._find_vowel_in_mapping(pinyin, vowel_map)
+                    consonant_part = self._find_consonant_in_mapping(pinyin, consonant_map) if has_consonant else None
+                    if vowel_part:
+                        syllables.append({
+                            'pinyin': pinyin,
+                            'vowel_part': vowel_part,
+                            'consonant_part': consonant_part,
+                            'vowel_start': vowel_start,
+                            'vowel_end': vowel_end,
+                            'syllable_end': syllable_end
+                        })
+            else:
+                # 不是元音，跳过
+                i += 1
+        # 生成 VC 对：当前音节的韵母 + 下一个音节的声母
+        for idx in range(len(syllables) - 1):
+            current = syllables[idx]
+            next_syl = syllables[idx + 1]
+            # 获取下一个音节的声母
+            next_consonant = next_syl.get('consonant_part')
+            # 如果下一个音节没有声母（零声母），跳过
+            if not next_consonant:
+                continue
+            # 生成 VC 别名
+            vc_alias = f"{current['vowel_part']}{vc_separator}{next_consonant}"
+            # 计算 VC 参数
+            entry = self._calculate_vc_params(
+                wav_name=wav_name,
+                alias=vc_alias,
+                vowel_start_ms=current['vowel_start'],
+                vowel_end_ms=current['vowel_end'],
+                consonant_end_ms=next_syl['syllable_end'],
+                wav_duration_ms=wav_duration_ms,
+                vc_offset_ratio=vc_offset_ratio,
+                vc_overlap_ratio=vc_overlap_ratio
+            )
+            entries.append(entry)
         return entries
+    def _load_presamp_mapping(self) -> Tuple[Dict[str, str], Dict[str, str]]:
+        """
+        加载中文 CVVC 韵母和声母映射（内置数据）
+        返回:
+            (韵母映射字典, 声母映射字典)
+            韵母映射: {完整拼音: 韵母标识}
+            声母映射: {完整拼音: 声母标识}
+        """
+        vowel_map = {}  # {拼音: 韵母标识}
+        consonant_map = {}  # {拼音: 声母标识}
+        # 内置韵母映射数据（来自 presamp.ini [VOWEL] 部分）
+        vowel_data = {
+            'a': ['a', 'ba', 'pa', 'ma', 'fa', 'da', 'ta', 'na', 'la', 'ga', 'ka', 'ha', 'zha', 'cha', 'sha', 'za', 'ca', 'sa', 'ya', 'lia', 'jia', 'qia', 'xia', 'wa', 'gua', 'kua', 'hua', 'zhua', 'shua', 'dia'],
+            'ai': ['ai', 'bai', 'pai', 'mai', 'dai', 'tai', 'nai', 'lai', 'gai', 'kai', 'hai', 'zhai', 'chai', 'shai', 'zai', 'cai', 'sai', 'wai', 'guai', 'kuai', 'huai', 'zhuai', 'chuai', 'shuai'],
+            'an': ['an', 'ban', 'pan', 'man', 'fan', 'dan', 'tan', 'nan', 'lan', 'gan', 'kan', 'han', 'zhan', 'chan', 'shan', 'ran', 'zan', 'can', 'san', 'wan', 'duan', 'tuan', 'nuan', 'luan', 'guan', 'kuan', 'huan', 'zhuan', 'chuan', 'shuan', 'ruan', 'zuan', 'cuan', 'suan'],
+            'ang': ['ang', 'bang', 'pang', 'mang', 'fang', 'dang', 'tang', 'nang', 'lang', 'gang', 'kang', 'hang', 'zhang', 'chang', 'shang', 'rang', 'zang', 'cang', 'sang', 'yang', 'liang', 'jiang', 'qiang', 'xiang', 'wang', 'guang', 'kuang', 'huang', 'zhuang', 'chuang', 'shuang', 'niang'],
+            'ao': ['ao', 'bao', 'pao', 'mao', 'dao', 'tao', 'nao', 'lao', 'gao', 'kao', 'hao', 'zhao', 'chao', 'shao', 'rao', 'zao', 'cao', 'sao', 'yao', 'biao', 'piao', 'miao', 'diao', 'tiao', 'niao', 'liao', 'jiao', 'qiao', 'xiao'],
+            'e': ['e', 'me', 'de', 'te', 'ne', 'le', 'ge', 'ke', 'he', 'zhe', 'che', 'she', 're', 'ze', 'ce', 'se'],
+            'e0': ['ye', 'bie', 'pie', 'mie', 'die', 'tie', 'nie', 'lie', 'jie', 'qie', 'xie', 'yue', 'nue', 'lue', 'jue', 'que', 'xue'],
+            'ei': ['ei', 'bei', 'pei', 'mei', 'fei', 'dei', 'tei', 'nei', 'lei', 'gei', 'kei', 'hei', 'zhei', 'shei', 'zei', 'wei', 'dui', 'tui', 'gui', 'kui', 'hui', 'zhui', 'chui', 'shui', 'rui', 'zui', 'cui', 'sui'],
+            'en': ['en', 'ben', 'pen', 'men', 'fen', 'nen', 'gen', 'ken', 'hen', 'zhen', 'chen', 'shen', 'ren', 'zen', 'cen', 'sen', 'wen', 'dun', 'tun', 'lun', 'gun', 'kun', 'hun', 'zhun', 'chun', 'shun', 'run', 'zun', 'cun', 'sun'],
+            'en0': ['yan', 'bian', 'pian', 'mian', 'dian', 'tian', 'nian', 'lian', 'jian', 'qian', 'xian', 'yuan', 'juan', 'quan', 'xuan'],
+            'eng': ['beng', 'peng', 'meng', 'feng', 'deng', 'teng', 'neng', 'leng', 'geng', 'keng', 'heng', 'weng', 'zheng', 'cheng', 'sheng', 'reng', 'zeng', 'ceng', 'seng'],
+            'er': ['er'],
+            'i': ['bi', 'pi', 'mi', 'di', 'ti', 'ni', 'li', 'ji', 'qi', 'xi', 'yi'],
+            'in': ['yin', 'bin', 'pin', 'min', 'nin', 'lin', 'jin', 'qin', 'xin'],
+            'ing': ['ying', 'bing', 'ping', 'ming', 'ding', 'ting', 'ning', 'ling', 'jing', 'qing', 'xing'],
+            'i0': ['zi', 'ci', 'si'],
+            'ir': ['zhi', 'chi', 'shi', 'ri'],
+            'o': ['bo', 'po', 'mo', 'fo', 'wo', 'duo', 'tuo', 'nuo', 'luo', 'guo', 'kuo', 'huo', 'zhuo', 'chuo', 'shuo', 'ruo', 'zuo', 'cuo', 'suo'],
+            'ong': ['dong', 'tong', 'nong', 'long', 'gong', 'kong', 'hong', 'zhong', 'chong', 'rong', 'zong', 'cong', 'song', 'yong', 'jiong', 'qiong', 'xiong'],
+            'ou': ['ou', 'pou', 'mou', 'fou', 'dou', 'tou', 'lou', 'gou', 'kou', 'hou', 'zhou', 'chou', 'shou', 'rou', 'zou', 'cou', 'sou', 'you', 'miu', 'diu', 'niu', 'liu', 'jiu', 'qiu', 'xiu'],
+            'u': ['bu', 'pu', 'mu', 'fu', 'du', 'tu', 'nu', 'lu', 'gu', 'ku', 'hu', 'zhu', 'chu', 'shu', 'ru', 'zu', 'cu', 'su', 'wu'],
+            'v': ['yu', 'nv', 'lv', 'ju', 'qu', 'xu'],
+            'vn': ['yun', 'jun', 'qun', 'xun'],
+        }
+        # 内置声母映射数据（来自 presamp.ini [CONSONANT] 部分）
+        consonant_data = {
+            'b': ['ba', 'bai', 'ban', 'bang', 'bao', 'biao', 'bie', 'bei', 'ben', 'bian', 'beng', 'bi', 'bin', 'bing', 'bo', 'bu'],
+            'p': ['pa', 'pai', 'pan', 'pang', 'pao', 'piao', 'pie', 'pei', 'pen', 'pian', 'peng', 'pi', 'pin', 'ping', 'po', 'pou', 'pu'],
+            'm': ['ma', 'mai', 'man', 'mang', 'mao', 'me', 'mei', 'men', 'meng', 'mo', 'mou', 'mu'],
+            'f': ['fa', 'fan', 'fang', 'fei', 'fen', 'feng', 'fo', 'fou', 'fu'],
+            'd': ['da', 'dia', 'dai', 'dan', 'duan', 'dang', 'dao', 'diao', 'de', 'die', 'dei', 'dui', 'dun', 'dian', 'deng', 'di', 'ding', 'duo', 'dong', 'dou', 'diu', 'du'],
+            't': ['ta', 'tai', 'tan', 'tuan', 'tang', 'tao', 'tiao', 'te', 'tie', 'tei', 'tui', 'tun', 'tian', 'teng', 'ti', 'ting', 'tuo', 'tong', 'tou', 'tu'],
+            'n': ['na', 'nai', 'nan', 'nuan', 'nang', 'nao', 'ne', 'nue', 'nei', 'nen', 'neng', 'nuo', 'nong', 'nu', 'nv'],
+            'l': ['la', 'lai', 'lan', 'luan', 'lang', 'lao', 'le', 'lue', 'lei', 'lun', 'leng', 'luo', 'long', 'lou', 'lu', 'lv'],
+            'g': ['ga', 'gua', 'gai', 'guai', 'gan', 'guan', 'gang', 'guang', 'gao', 'ge', 'gei', 'gui', 'gen', 'gun', 'geng', 'guo', 'gong', 'gou', 'gu'],
+            'k': ['ka', 'kua', 'kai', 'kuai', 'kan', 'kuan', 'kang', 'kuang', 'kao', 'ke', 'kei', 'kui', 'ken', 'kun', 'keng', 'kuo', 'kong', 'kou', 'ku'],
+            'h': ['ha', 'hai', 'han', 'hang', 'hao', 'he', 'hei', 'hen', 'heng', 'hong', 'hou'],
+            'zh': ['zha', 'zhua', 'zhai', 'zhuai', 'zhan', 'zhuan', 'zhang', 'zhuang', 'zhao', 'zhe', 'zhei', 'zhui', 'zhen', 'zhun', 'zheng', 'zhi', 'zhuo', 'zhong', 'zhou', 'zhu'],
+            'ch': ['cha', 'chai', 'chuai', 'chan', 'chuan', 'chang', 'chuang', 'chao', 'che', 'chui', 'chen', 'chun', 'cheng', 'chi', 'chuo', 'chong', 'chou', 'chu'],
+            'sh': ['sha', 'shai', 'shan', 'shang', 'shao', 'she', 'shei', 'shen', 'sheng', 'shi', 'shou'],
+            'z': ['za', 'zai', 'zan', 'zuan', 'zang', 'zao', 'ze', 'zei', 'zui', 'zen', 'zun', 'zeng', 'zi', 'zuo', 'zong', 'zou', 'zu'],
+            'c': ['ca', 'cai', 'can', 'cuan', 'cang', 'cao', 'ce', 'cui', 'cen', 'cun', 'ceng', 'ci', 'cuo', 'cong', 'cou', 'cu'],
+            's': ['sa', 'sai', 'san', 'sang', 'sao', 'se', 'sen', 'seng', 'si', 'song', 'sou'],
+            'y': ['ya', 'yang', 'yao', 'ye', 'yan', 'yi', 'yin', 'ying', 'yong', 'you'],
+            'ly': ['lia', 'liang', 'liao', 'lie', 'lian', 'li', 'lin', 'ling', 'liu'],
+            'j': ['jia', 'jiang', 'jiao', 'jie', 'jue', 'jian', 'juan', 'ji', 'jin', 'jing', 'jiong', 'jiu', 'ju', 'jun'],
+            'q': ['qia', 'qiang', 'qiao', 'qie', 'que', 'qian', 'quan', 'qi', 'qin', 'qing', 'qiong', 'qiu', 'qu', 'qun'],
+            'xy': ['xia', 'xiang', 'xiao', 'xie', 'xian', 'xi', 'xin', 'xing', 'xiong', 'xiu'],
+            'w': ['wa', 'wai', 'wan', 'wang', 'wei', 'wen', 'weng', 'wo', 'wu'],
+            'hw': ['hua', 'huai', 'huan', 'huang', 'hui', 'hun', 'huo', 'hu'],
+            'shw': ['shua', 'shuai', 'shuan', 'shuang', 'shui', 'shun', 'shuo', 'shu'],
+            'r': ['ran', 'ruan', 'rang', 'rao', 're', 'rui', 'ren', 'run', 'reng', 'ri', 'ruo', 'rong', 'rou', 'ru'],
+            'sw': ['suan', 'sui', 'sun', 'suo', 'su'],
+            'ny': ['niang', 'niao', 'nie', 'nian', 'ni', 'nin', 'ning', 'niu'],
+            'my': ['miao', 'mie', 'mian', 'mi', 'min', 'ming', 'miu'],
+            'v': ['yu', 'yue', 'yuan', 'yun'],
+            'xw': ['xue', 'xuan', 'xu', 'xun'],
+        }
+        # 构建韵母映射
+        for vowel_id, pinyins in vowel_data.items():
+            for pinyin in pinyins:
+                vowel_map[pinyin] = vowel_id
+        # 构建声母映射
+        for consonant_id, pinyins in consonant_data.items():
+            for pinyin in pinyins:
+                consonant_map[pinyin] = consonant_id
+        self._log(f"加载内置 CVVC 映射: {len(vowel_map)} 个韵母映射, {len(consonant_map)} 个声母映射")
+        return vowel_map, consonant_map
+    def _find_vowel_in_mapping(self, pinyin: str, vowel_map: Dict[str, str]) -> Optional[str]:
+        """
+        在韵母映射中查找拼音对应的韵母标识
+        参数:
+            pinyin: 完整拼音
+            vowel_map: 韵母映射字典
+        返回:
+            韵母标识，如果未找到则返回 None
+        """
+        return vowel_map.get(pinyin)
+    def _find_consonant_in_mapping(self, pinyin: str, consonant_map: Dict[str, str]) -> Optional[str]:
+        """
+        在声母映射中查找拼音对应的声母标识
+        参数:
+            pinyin: 完整拼音
+            consonant_map: 声母映射字典
+        返回:
+            声母标识，如果未找到则返回 None
+        """
+        return consonant_map.get(pinyin)
     def _calculate_oto_params(
         self,
         wav_name: str,
             "segment_duration": segment_duration,  # 用于排序
         }
+    def _calculate_vc_params(
+        self,
+        wav_name: str,
+        alias: str,
+        vowel_start_ms: float,
+        vowel_end_ms: float,
+        consonant_end_ms: float,
+        wav_duration_ms: float,
+        vc_offset_ratio: float,
+        vc_overlap_ratio: float
+    ) -> Dict:
+        """
+        计算 VC 部的 oto.ini 参数
+        VC 部从元音后半段开始，到辅音结束
+        参数:
+            wav_name: 音频文件名
+            alias: VC 别名
+            vowel_start_ms: 元音开始时间
+            vowel_end_ms: 元音结束时间（即辅音开始时间）
+            consonant_end_ms: 辅音结束时间
+            wav_duration_ms: 音频总时长
+            vc_offset_ratio: VC 偏移比例
+            vc_overlap_ratio: VC overlap 比例
+        返回:
+            oto 参数字典
+        """
+        vowel_duration = vowel_end_ms - vowel_start_ms
+        # offset: 元音后半段位置
+        offset = vowel_end_ms - vowel_duration * vc_offset_ratio
+        # 总时长（从 offset 到辅音结束）
+        segment_duration = consonant_end_ms - offset
+        # preutterance: 从 offset 到辅音开始（即元音结束）的距离
+        preutterance = vowel_end_ms - offset
+        # consonant: 固定区域，较短
+        consonant = min(30, segment_duration * 0.3)
+        # overlap: 较大，平滑过渡
+        overlap = preutterance * vc_overlap_ratio
+        # cutoff: 负值，表示总时长
+        cutoff = -segment_duration
+        return {
+            "wav_name": wav_name,
+            "alias": alias,
+            "offset": round(offset, 1),
+            "consonant": round(consonant, 1),
+            "cutoff": round(cutoff, 1),
+            "preutterance": round(preutterance, 1),
+            "overlap": round(overlap, 1),
+            "segment_duration": segment_duration,
+            "is_vc": True  # 标记为 VC 部
+        }
     def _filter_by_alias(
         self,
         entries: List[Dict],
         # 获取有效的元音列表（用于验证组合）
         if language in ('chinese', 'zh', 'mandarin'):
+            valid_vowels = {'a', 'o', 'e', 'i', 'u', 'v',
+                          'ai', 'ei', 'ao', 'ou',
+                          'an', 'en', 'ang', 'eng', 'ong',
+                          'ia', 'ie', 'iao', 'iu', 'ian', 'in', 'iang', 'ing', 'iong',
+                          'ua', 'uo', 'uai', 'ui', 'uan', 'un', 'uang', 'ueng',
+                          've', 'van', 'vn', 'er'}
         else:
             valid_vowels = {'a', 'i', 'u', 'e', 'o'}
         all_consonants = ['b', 'p', 'm', 'f', 'd', 't', 'n', 'l', 'g', 'k', 'h',
                           'j', 'q', 'x', 'zh', 'ch', 'sh', 'r', 'z', 'c', 's', 'y', 'w']
+        # 中文所有可能的韵母（包含所有标准韵母）
+        all_vowels = ['a', 'o', 'e', 'i', 'u', 'v',
+                      'ai', 'ei', 'ao', 'ou',
+                      'an', 'en', 'ang', 'eng', 'ong',
+                      'ia', 'ie', 'iao', 'iu', 'ian', 'in', 'iang', 'ing', 'iong',
+                      'ua', 'uo', 'uai', 'ui', 'uan', 'un', 'uang', 'ueng',
+                      've', 'van', 'vn', 'er']
         fuzzy_count = 0

src/gui_cloud.py CHANGED Viewed

@@ -60,18 +60,10 @@ def safe_gradio_handler(func):
     Gradio 处理函数的安全包装器
     捕获所有异常并返回友好的错误信息，避免 Gradio 显示默认的"错误"状态
-    同时确保异常时释放并发计数，防止计数滞留
     """
     import functools
     import traceback
-    # 需要管理并发计数的函数列表
-    CONCURRENCY_MANAGED_FUNCS = {
-        'process_make_voicebank',
-        'process_export_voicebank',
-        'process_mfa_realign'
-    }
     @functools.wraps(func)
     def wrapper(*args, **kwargs):
         try:
@@ -81,15 +73,11 @@ def safe_gradio_handler(func):
             error_trace = traceback.format_exc()
             logger.error(f"处理函数 {func.__name__} 发生异常:\n{error_trace}")
-            # 如果是并发管理的函数，确保释放并发计数
-            # 注意：函数内部可能已经调用了 decrement_concurrency()，
-            # 但如果异常发生在 increment 之后、decrement 之前，这里需要补救
-            # decrement_concurrency() 内部有 max(0, ...) 保护，不会变成负数
-            if func.__name__ in CONCURRENCY_MANAGED_FUNCS:
-                decrement_concurrency()
-                logger.info(f"异常处理：已释放 {func.__name__} 的并发计数")
             # 根据函数返回值数量返回错误信息
             error_msg = f"❌ 系统错误: {str(e)}"
             error_detail = f"异常类型: {type(e).__name__}\n详情: {str(e)}"
@@ -143,144 +131,6 @@ def create_temp_workspace() -> str:
     return workspace
-def cleanup_gradio_cache(max_age_hours: float = 1.0):
-    """
-    清理 Gradio 临时文件缓存
-    参数:
-        max_age_hours: 文件最大保留时间（小时），超过此时间的文件将被删除
-    """
-    import time
-    gradio_tmp_dir = os.path.join(tempfile.gettempdir(), "gradio")
-    if not os.path.exists(gradio_tmp_dir):
-        return
-    current_time = time.time()
-    max_age_seconds = max_age_hours * 3600
-    cleaned_count = 0
-    cleaned_size = 0
-    try:
-        for root, dirs, files in os.walk(gradio_tmp_dir, topdown=False):
-            for name in files:
-                file_path = os.path.join(root, name)
-                try:
-                    file_age = current_time - os.path.getmtime(file_path)
-                    if file_age > max_age_seconds:
-                        file_size = os.path.getsize(file_path)
-                        os.remove(file_path)
-                        cleaned_count += 1
-                        cleaned_size += file_size
-                except Exception:
-                    pass
-            # 删除空目录
-            for name in dirs:
-                dir_path = os.path.join(root, name)
-                try:
-                    if not os.listdir(dir_path):
-                        os.rmdir(dir_path)
-                except Exception:
-                    pass
-        if cleaned_count > 0:
-            size_mb = cleaned_size / (1024 * 1024)
-            logger.info(f"Gradio 缓存清理: 删除 {cleaned_count} 个文件, 释放 {size_mb:.1f} MB")
-    except Exception as e:
-        logger.warning(f"Gradio 缓存清理失败: {e}")
-def cleanup_old_jinriki_workspaces(max_age_hours: float = 2.0):
-    """
-    清理旧的 jinriki 工作空间
-    参数:
-        max_age_hours: 工作空间最大保留时间（小时）
-    """
-    import time
-    current_time = time.time()
-    max_age_seconds = max_age_hours * 3600
-    cleaned_count = 0
-    try:
-        for item in os.listdir(CloudConfig.TEMP_BASE):
-            if item.startswith("jinriki_"):
-                workspace_path = os.path.join(CloudConfig.TEMP_BASE, item)
-                if os.path.isdir(workspace_path):
-                    try:
-                        dir_age = current_time - os.path.getmtime(workspace_path)
-                        if dir_age > max_age_seconds:
-                            shutil.rmtree(workspace_path)
-                            cleaned_count += 1
-                    except Exception:
-                        pass
-        if cleaned_count > 0:
-            logger.info(f"工作空间清理: 删除 {cleaned_count} 个旧工作空间")
-    except Exception as e:
-        logger.warning(f"工作空间清理失败: {e}")
-def start_periodic_cleanup(interval_minutes: int = 15):
-    """
-    启动定期清理任务
-    参数:
-        interval_minutes: 清理间隔（分钟），默认15分钟
-    """
-    import time
-    def cleanup_task():
-        while True:
-            try:
-                time.sleep(interval_minutes * 60)
-                logger.info("执行定期清理...")
-                cleanup_gradio_cache(max_age_hours=0.5)  # 30分钟以上的缓存
-                cleanup_old_jinriki_workspaces(max_age_hours=1.0)  # 1小时以上的工作空间
-            except Exception as e:
-                logger.error(f"定期清理任务异常: {e}")
-    cleanup_thread = threading.Thread(target=cleanup_task, daemon=True)
-    cleanup_thread.start()
-    logger.info(f"定期清理任务已启动，间隔 {interval_minutes} 分钟")
-def check_disk_space(min_mb: int = 100) -> Tuple[bool, str]:
-    """
-    检查磁盘空间是否充足
-    参数:
-        min_mb: 最小可用空间（MB）
-    返回:
-        (是否充足, 消息)
-    """
-    try:
-        import shutil
-        total, used, free = shutil.disk_usage("/tmp")
-        free_mb = free / (1024 * 1024)
-        if free_mb < min_mb:
-            # 尝试清理
-            logger.warning(f"磁盘空间不足 ({free_mb:.0f} MB)，尝试清理...")
-            cleanup_gradio_cache(max_age_hours=0)  # 清理所有缓存
-            cleanup_old_jinriki_workspaces(max_age_hours=0)  # 清理所有工作空间
-            # 重新检查
-            total, used, free = shutil.disk_usage("/tmp")
-            free_mb = free / (1024 * 1024)
-            if free_mb < min_mb:
-                return False, f"磁盘空间不足，仅剩 {free_mb:.0f} MB，请稍后重试"
-        return True, f"可用空间: {free_mb:.0f} MB"
-    except Exception as e:
-        logger.warning(f"检查磁盘空间失败: {e}")
-        return True, "无法检查磁盘空间"  # 无法检查时允许继续
 def cleanup_workspace(workspace: str):
     """清理工作空间"""
     if workspace and os.path.exists(workspace):
@@ -351,47 +201,34 @@ def get_audio_duration(file_path: str) -> Optional[float]:
     返回: 时长秒数，失败返回 None
     """
-    import subprocess
-    try:
-        # 优先使用 ffprobe（更轻量，不需要解码整个文件）
-        cmd = [
-            'ffprobe', '-v', 'error',
-            '-show_entries', 'format=duration',
-            '-of', 'default=noprint_wrappers=1:nokey=1',
-            file_path
-        ]
-        result = subprocess.run(cmd, capture_output=True, text=True, timeout=10)
-        if result.returncode == 0 and result.stdout.strip():
-            duration = float(result.stdout.strip())
-            if duration > 0:
-                return duration
-        # ffprobe 失败时的错误信息
-        if result.stderr:
-            logger.debug(f"ffprobe 错误: {result.stderr.strip()}")
-    except subprocess.TimeoutExpired:
-        logger.warning(f"ffprobe 超时: {file_path}")
-    except (ValueError, Exception) as e:
-        logger.debug(f"ffprobe 获取时长失败: {e}")
-    # 回退：对于 WAV 文件，使用 wave 模块
     try:
         import wave
         import contextlib
         if file_path.lower().endswith('.wav'):
             with contextlib.closing(wave.open(file_path, 'r')) as f:
                 frames = f.getnframes()
                 rate = f.getframerate()
                 return frames / float(rate)
     except Exception as e:
-        logger.debug(f"wave 模块读取失败: {e}")
-    logger.warning(f"无法获取音频时长: {os.path.basename(file_path)}")
-    return None
 # 云端音频时长限制（秒）
@@ -407,24 +244,15 @@ def validate_audio_upload(files) -> Tuple[bool, str, List[str]]:
     if not files:
         return False, "请上传音频文件", []
-    # 调试：记录 Gradio 传入的文件对象类型
-    logger.info(f"Gradio 文件对象类型: {type(files)}, 数量: {len(files) if hasattr(files, '__len__') else 'N/A'}")
     valid_files = []
-    for i, f in enumerate(files):
-        logger.info(f"文件[{i}] 类型: {type(f)}, 值: {f}")
-        # Gradio 6.x 可能直接传入字符串路径
-        if isinstance(f, str):
-            path = f
-        elif hasattr(f, 'name'):
             path = f.name
         else:
             path = str(f)
         if path.lower().endswith(CloudConfig.AUDIO_EXTENSIONS):
             valid_files.append(path)
-            logger.info(f"文���[{i}] 有效路径: {path}")
     if not valid_files:
         return False, f"未找到有效音频文件，支持格式: {', '.join(CloudConfig.AUDIO_EXTENSIONS)}", []
@@ -493,12 +321,6 @@ def process_make_voicebank(
         logs.append(msg)
         logger.info(msg)
-    # 检查磁盘空间
-    space_ok, space_msg = check_disk_space(min_mb=200)
-    if not space_ok:
-        decrement_concurrency()
-        return f"❌ {space_msg}", "", None, None
     try:
         # 导入依赖（放在 try 块内以捕获导入错误）
         from src.pipeline import PipelineConfig, VoiceBankPipeline
@@ -541,73 +363,22 @@ def process_make_voicebank(
         os.makedirs(input_dir, exist_ok=True)
         os.makedirs(bank_dir, exist_ok=True)
-        # 复制音频文件到输入目录（重命名为安全文件名）
         progress(0.05, desc="复制音频文件...")
         copied_count = 0
-        copy_errors = []
-        for idx, src_path in enumerate(file_paths):
-            original_name = os.path.basename(src_path)
-            # 检查源文件
             if not os.path.exists(src_path):
-                copy_errors.append(f"{original_name}: 文件不存在")
-                continue
-            src_size = os.path.getsize(src_path)
-            if src_size == 0:
-                copy_errors.append(f"{original_name}: 文件为空")
-                continue
-            # 验证文件是否为有效音频（检查文件头）
-            try:
-                with open(src_path, 'rb') as f:
-                    header = f.read(12)
-                # 检查常见音频格式的文件头
-                is_valid_audio = False
-                if header[:4] == b'RIFF' and header[8:12] == b'WAVE':  # WAV
-                    is_valid_audio = True
-                elif header[:4] == b'OggS':  # OGG
-                    is_valid_audio = True
-                elif header[:3] == b'ID3' or header[:2] == b'\xff\xfb':  # MP3
-                    is_valid_audio = True
-                elif header[:4] == b'fLaC':  # FLAC
-                    is_valid_audio = True
-                elif header[4:8] == b'ftyp':  # M4A/MP4
-                    is_valid_audio = True
-                if not is_valid_audio:
-                    copy_errors.append(f"{original_name}: 无效的音频文件格式 (header: {header[:8].hex()})")
-                    continue
-            except Exception as e:
-                copy_errors.append(f"{original_name}: 无法读取文件头 ({e})")
                 continue
             try:
-                # 生成安全的文件名
-                _, ext = os.path.splitext(original_name)
-                safe_name = f"audio_{idx:04d}{ext.lower()}"
-                dst_path = os.path.join(input_dir, safe_name)
                 shutil.copy2(src_path, dst_path)
-                # 验证复制结果
-                if os.path.getsize(dst_path) == src_size:
-                    copied_count += 1
-                    log(f"  {original_name} ({src_size} bytes) -> {safe_name}")
-                else:
-                    copy_errors.append(f"{original_name}: 复制不完整")
             except Exception as e:
-                copy_errors.append(f"{original_name}: {e}")
-        if copy_errors:
-            for err in copy_errors:
-                log(f"⚠️ {err}")
         if copied_count == 0:
-            decrement_concurrency()
             return "❌ 无法访问上传的文件，请重新上传", "\n".join(logs), None, None
         log(f"📋 已复制 {copied_count}/{len(file_paths)} 个文件到工作目录")
@@ -826,12 +597,6 @@ def process_export_voicebank(
         logs.append(msg)
         logger.info(msg)
-    # 检查磁盘空间
-    space_ok, space_msg = check_disk_space(min_mb=100)
-    if not space_ok:
-        decrement_concurrency()
-        return f"❌ {space_msg}", "", None
     # 验证输入
     valid, msg, source_name = validate_voicebank_zip(zip_file)
     if not valid:
@@ -1054,12 +819,6 @@ def process_mfa_realign(
         logs.append(msg)
         logger.info(msg)
-    # 检查磁盘空间
-    space_ok, space_msg = check_disk_space(min_mb=100)
-    if not space_ok:
-        decrement_concurrency()
-        return f"❌ {space_msg}", "", None
     # 验证输入
     if not zip_file:
         decrement_concurrency()
@@ -1949,14 +1708,6 @@ def create_cloud_ui():
 def main():
     """云端入口"""
-    # 启动时执行一次清理
-    logger.info("启动时执行缓存清理...")
-    cleanup_gradio_cache(max_age_hours=0.5)  # 清理超过30分钟的缓存
-    cleanup_old_jinriki_workspaces(max_age_hours=1.0)  # 清理超过1小时的工作空间
-    # 启动定期清理任务
-    start_periodic_cleanup(interval_minutes=30)
     app = create_cloud_ui()
     # 启用队列，魔搭CPU按需分配，无需设置并发上限
     app.queue()

     Gradio 处理函数的安全包装器
     捕获所有异常并返回友好的错误信息，避免 Gradio 显示默认的"错误"状态
     """
     import functools
     import traceback
     @functools.wraps(func)
     def wrapper(*args, **kwargs):
         try:
             error_trace = traceback.format_exc()
             logger.error(f"处理函数 {func.__name__} 发生异常:\n{error_trace}")
             # 根据函数返回值数量返回错误信息
+            # 检查函数的类型注解来确定返回值数量
+            annotations = getattr(func, '__annotations__', {})
+            return_type = annotations.get('return', None)
             error_msg = f"❌ 系统错误: {str(e)}"
             error_detail = f"异常类型: {type(e).__name__}\n详情: {str(e)}"
     return workspace
 def cleanup_workspace(workspace: str):
     """清理工作空间"""
     if workspace and os.path.exists(workspace):
     返回: 时长秒数，失败返回 None
     """
     try:
         import wave
         import contextlib
+        # 对于 WAV 文件，使用 wave 模块快速获取时长
         if file_path.lower().endswith('.wav'):
             with contextlib.closing(wave.open(file_path, 'r')) as f:
                 frames = f.getnframes()
                 rate = f.getframerate()
                 return frames / float(rate)
+        # 对于其他格式，使用 pydub（如果可用）
+        try:
+            from pydub import AudioSegment
+            audio = AudioSegment.from_file(file_path)
+            return len(audio) / 1000.0  # 毫秒转秒
+        except ImportError:
+            # pydub 不可用，尝试使用 librosa
+            try:
+                import librosa
+                duration = librosa.get_duration(path=file_path)
+                return duration
+            except ImportError:
+                logger.warning(f"无法获取音频时长，缺少 pydub 或 librosa: {file_path}")
+                return None
     except Exception as e:
+        logger.warning(f"获取音频时长失败 {file_path}: {e}")
+        return None
 # 云端音频时长限制（秒）
     if not files:
         return False, "请上传音频文件", []
     valid_files = []
+    for f in files:
+        if hasattr(f, 'name'):
             path = f.name
         else:
             path = str(f)
         if path.lower().endswith(CloudConfig.AUDIO_EXTENSIONS):
             valid_files.append(path)
     if not valid_files:
         return False, f"未找到有效音频文件，支持格式: {', '.join(CloudConfig.AUDIO_EXTENSIONS)}", []
         logs.append(msg)
         logger.info(msg)
     try:
         # 导入依赖（放在 try 块内以捕获导入错误）
         from src.pipeline import PipelineConfig, VoiceBankPipeline
         os.makedirs(input_dir, exist_ok=True)
         os.makedirs(bank_dir, exist_ok=True)
+        # 复制音频文件到输入目录
         progress(0.05, desc="复制音频文件...")
         copied_count = 0
+        for src_path in file_paths:
+            # 检查源文件是否存在
             if not os.path.exists(src_path):
+                log(f"⚠️ 文件不存在或已被清理: {src_path}")
                 continue
             try:
+                dst_path = os.path.join(input_dir, os.path.basename(src_path))
                 shutil.copy2(src_path, dst_path)
+                copied_count += 1
             except Exception as e:
+                log(f"⚠️ 复制文件失败 {os.path.basename(src_path)}: {e}")
         if copied_count == 0:
             return "❌ 无法访问上传的文件，请重新上传", "\n".join(logs), None, None
         log(f"📋 已复制 {copied_count}/{len(file_paths)} 个文件到工作目录")
         logs.append(msg)
         logger.info(msg)
     # 验证输入
     valid, msg, source_name = validate_voicebank_zip(zip_file)
     if not valid:
         logs.append(msg)
         logger.info(msg)
     # 验证输入
     if not zip_file:
         decrement_concurrency()
 def main():
     """云端入口"""
     app = create_cloud_ui()
     # 启用队列，魔搭CPU按需分配，无需设置并发上限
     app.queue()

src/mfa_model_downloader.py CHANGED Viewed

@@ -3,7 +3,6 @@
 MFA 模型下载模块
 支持下载中文和日文的声学模型及字典
 包含 SHA256 哈希校验，确保文件完整性
-支持 GitHub 代理镜像（云端环境）
 """
 import os
@@ -12,7 +11,7 @@ import logging
 import urllib.request
 import urllib.error
 from pathlib import Path
-from typing import Optional, Callable, List
 logger = logging.getLogger(__name__)
@@ -20,14 +19,6 @@ logger = logging.getLogger(__name__)
 GITHUB_RELEASE_BASE = "https://github.com/MontrealCorpusTools/mfa-models/releases/download"
 GITHUB_RAW_BASE = "https://raw.githubusercontent.com/MontrealCorpusTools/mfa-models/main"
-# GitHub 代理镜像列表（云端环境使用）
-# 格式: 代理前缀 + 原始 GitHub URL
-GITHUB_PROXIES = [
-    "https://ghfast.top/",
-    "https://gh-proxy.com/",
-    "",  # 最后尝试直连
-]
 # 支持的语言配置
 # 格式: {语言代码: {名称, 声学模型信息, 字典信息}}
 # sha256: 官方文件的 SHA256 哈希值（清理空行后），用于校验文件完整性
@@ -168,36 +159,16 @@ def _verify_file_integrity(
     return True, "文件完整"
-def _is_cloud_environment() -> bool:
-    """检测是否在云端环境运行"""
-    return any([
-        os.environ.get("SPACE_ID"),           # Hugging Face Spaces
-        os.environ.get("MODELSCOPE_SPACE"),   # 魔塔社区
-        os.environ.get("GRADIO_SERVER_NAME"), # 通用 Gradio 云端
-        Path("/home/studio_service").exists(), # 魔搭创空间特征目录
-    ])
-def _get_proxy_urls(original_url: str) -> List[str]:
-    """
-    获取带代理的 URL 列表
-    云端环境返回多个代理 URL，本地环境只返回原始 URL
-    """
-    if _is_cloud_environment():
-        return [f"{proxy}{original_url}" for proxy in GITHUB_PROXIES]
-    return [original_url]
 def _download_file(
     url: str,
     dest_path: str,
     progress_callback: Optional[Callable[[str], None]] = None
 ) -> bool:
     """
-    下载文件（支持代理镜像自动切换）
     参数:
-        url: 下载地址（原始 GitHub URL）
         dest_path: 保存路径
         progress_callback: 进度回调
@@ -209,73 +180,65 @@ def _download_file(
         if progress_callback:
             progress_callback(msg)
-    # 获取所有可用的 URL（包括代理）
-    urls = _get_proxy_urls(url)
-    for try_url in urls:
-        try:
-            log(f"正在下载: {try_url}")
-            # 创建目录
-            os.makedirs(os.path.dirname(dest_path), exist_ok=True)
-            # 下载到临时文件，完成后再重命名
-            temp_path = dest_path + ".downloading"
-            # 下载文件
-            req = urllib.request.Request(try_url, headers={"User-Agent": "Mozilla/5.0"})
-            with urllib.request.urlopen(req, timeout=180) as response:
-                total_size = response.headers.get("Content-Length")
-                if total_size:
-                    total_size = int(total_size)
-                    log(f"文件大小: {total_size / 1024 / 1024:.1f} MB")
-                # 分块下载
-                block_size = 8192
-                downloaded = 0
-                with open(temp_path, "wb") as f:
-                    while True:
-                        chunk = response.read(block_size)
-                        if not chunk:
-                            break
-                        f.write(chunk)
-                        downloaded += len(chunk)
-                        if total_size and downloaded % (block_size * 100) == 0:
-                            percent = downloaded / total_size * 100
-                            log(f"下载进度: {percent:.1f}%")
-            # 下载完成，重命名
-            if os.path.exists(dest_path):
-                os.remove(dest_path)
-            os.rename(temp_path, dest_path)
-            log(f"下载完成: {dest_path}")
-            return True
-        except urllib.error.HTTPError as e:
-            log(f"HTTP 错误: {e.code} - {e.reason}")
-        except urllib.error.URLError as e:
-            log(f"��络错误: {e.reason}")
-        except Exception as e:
-            log(f"下载失败: {e}")
-        finally:
-            # 清理临时文件
-            temp_path = dest_path + ".downloading"
-            if os.path.exists(temp_path):
-                try:
-                    os.remove(temp_path)
-                except:
-                    pass
-        # 当前 URL 失败，尝试下一个
-        if try_url != urls[-1]:
-            log("尝试下一个镜像...")
-    log("所有镜像均下载失败")
-    return False

 MFA 模型下载模块
 支持下载中文和日文的声学模型及字典
 包含 SHA256 哈希校验，确保文件完整性
 """
 import os
 import urllib.request
 import urllib.error
 from pathlib import Path
+from typing import Optional, Callable
 logger = logging.getLogger(__name__)
 GITHUB_RELEASE_BASE = "https://github.com/MontrealCorpusTools/mfa-models/releases/download"
 GITHUB_RAW_BASE = "https://raw.githubusercontent.com/MontrealCorpusTools/mfa-models/main"
 # 支持的语言配置
 # 格式: {语言代码: {名称, 声学模型信息, 字典信息}}
 # sha256: 官方文件的 SHA256 哈希值（清理空行后），用于校验文件完整性
     return True, "文件完整"
 def _download_file(
     url: str,
     dest_path: str,
     progress_callback: Optional[Callable[[str], None]] = None
 ) -> bool:
     """
+    下载文件
     参数:
+        url: 下载地址
         dest_path: 保存路径
         progress_callback: 进度回调
         if progress_callback:
             progress_callback(msg)
+    try:
+        log(f"正在下载: {url}")
+        # 创建目录
+        os.makedirs(os.path.dirname(dest_path), exist_ok=True)
+        # 下载到临时文件，完成后再重命名
+        temp_path = dest_path + ".downloading"
+        # 下载文件
+        req = urllib.request.Request(url, headers={"User-Agent": "Mozilla/5.0"})
+        with urllib.request.urlopen(req, timeout=120) as response:
+            total_size = response.headers.get("Content-Length")
+            if total_size:
+                total_size = int(total_size)
+                log(f"文件大小: {total_size / 1024 / 1024:.1f} MB")
+            # 分块下载
+            block_size = 8192
+            downloaded = 0
+            with open(temp_path, "wb") as f:
+                while True:
+                    chunk = response.read(block_size)
+                    if not chunk:
+                        break
+                    f.write(chunk)
+                    downloaded += len(chunk)
+                    if total_size and downloaded % (block_size * 100) == 0:
+                        percent = downloaded / total_size * 100
+                        log(f"下载进度: {percent:.1f}%")
+        # 下载完成，重命名
+        if os.path.exists(dest_path):
+            os.remove(dest_path)
+        os.rename(temp_path, dest_path)
+        log(f"下载完成: {dest_path}")
+        return True
+    except urllib.error.HTTPError as e:
+        log(f"HTTP 错误: {e.code} - {e.reason}")
+        return False
+    except urllib.error.URLError as e:
+        log(f"网络错误: {e.reason}")
+        return False
+    except Exception as e:
+        log(f"下载失败: {e}")
+        return False
+    finally:
+        # 清理临时文件
+        temp_path = dest_path + ".downloading"
+        if os.path.exists(temp_path):
+            try:
+                os.remove(temp_path)
+            except:
+                pass

src/mfa_runner.py CHANGED Viewed

@@ -102,18 +102,6 @@ def _build_mfa_env(mfa_root: Optional[Path] = None) -> dict:
             str(MFA_ENGINE_DIR / "bin"),
         ]
         env["PATH"] = ";".join(mfa_paths) + ";" + env.get("PATH", "")
-        # Windows: 设置 MFA_ROOT_DIR 到纯 ASCII 路径
-        # 解决用户名包含中文时 OpenFST 无法写入文件的问题
-        # 优先使用项目目录下的 mfa_temp，确保路径为纯 ASCII
-        if mfa_root:
-            env["MFA_ROOT_DIR"] = str(mfa_root)
-        else:
-            # 默认使用项目目录下的 mfa_data 作为 MFA 数据目录
-            mfa_data_dir = BASE_DIR / "mfa_data"
-            mfa_data_dir.mkdir(parents=True, exist_ok=True)
-            env["MFA_ROOT_DIR"] = str(mfa_data_dir)
-        logger.info(f"设置 MFA_ROOT_DIR: {env['MFA_ROOT_DIR']}")
     else:
         # Linux: 设置会话独立的 MFA_ROOT_DIR（解决并发数据库冲突）
         if mfa_root:

             str(MFA_ENGINE_DIR / "bin"),
         ]
         env["PATH"] = ";".join(mfa_paths) + ";" + env.get("PATH", "")
     else:
         # Linux: 设置会话独立的 MFA_ROOT_DIR（解决并发数据库冲突）
         if mfa_root:

src/pipeline.py CHANGED Viewed

@@ -346,24 +346,12 @@ class VoiceBankPipeline:
         import subprocess
         import numpy as np
-        # 确保路径是绝对路径，避免编码问题
-        audio_path = os.path.abspath(audio_path)
-        # 检查文件是否存在
-        if not os.path.exists(audio_path):
-            raise RuntimeError(f"音频文件不存在: {audio_path}")
         # 使用 ffprobe 获取采样率
         probe_cmd = [
             'ffprobe', '-v', 'quiet', '-print_format', 'json',
             '-show_streams', audio_path
         ]
-        probe_result = subprocess.run(
-            probe_cmd,
-            capture_output=True,
-            encoding='utf-8',
-            errors='replace'
-        )
         sr = 44100  # 默认采样率
         if probe_result.returncode == 0:
@@ -384,18 +372,13 @@ class VoiceBankPipeline:
             '-acodec', 'pcm_s16le',
             '-ac', '1',  # 单声道
             '-ar', str(sr),  # 保持原采样率
-            '-v', 'error',  # 只输出错误信息
             '-'
         ]
         result = subprocess.run(cmd, capture_output=True)
         if result.returncode != 0:
-            # 获取错误信息
-            stderr_msg = result.stderr.decode('utf-8', errors='replace').strip()
-            raise RuntimeError(f"ffmpeg 读取音频失败: {audio_path}\n错误: {stderr_msg}")
-        if len(result.stdout) == 0:
-            raise RuntimeError(f"ffmpeg 输出为空，可能是文件损坏或格式不支持: {audio_path}")
         # 转换为 numpy 数组
         audio = np.frombuffer(result.stdout, dtype=np.int16).astype(np.float32) / 32768.0

         import subprocess
         import numpy as np
         # 使用 ffprobe 获取采样率
         probe_cmd = [
             'ffprobe', '-v', 'quiet', '-print_format', 'json',
             '-show_streams', audio_path
         ]
+        probe_result = subprocess.run(probe_cmd, capture_output=True, text=True)
         sr = 44100  # 默认采样率
         if probe_result.returncode == 0:
             '-acodec', 'pcm_s16le',
             '-ac', '1',  # 单声道
             '-ar', str(sr),  # 保持原采样率
+            '-v', 'quiet',
             '-'
         ]
         result = subprocess.run(cmd, capture_output=True)
         if result.returncode != 0:
+            raise RuntimeError(f"ffmpeg 读取音频失败: {audio_path}")
         # 转换为 numpy 数组
         audio = np.frombuffer(result.stdout, dtype=np.int16).astype(np.float32) / 32768.0

tests/.gitkeep DELETED Viewed

File without changes

tests/test_mfa_model_downloader.py DELETED Viewed

@@ -1,182 +0,0 @@
-# -*- coding: utf-8 -*-
-"""
-MFA 模型下载模块单元测试
-"""
-import os
-import sys
-import unittest
-from unittest.mock import patch, MagicMock
-from pathlib import Path
-# 添加项目根目录到路径
-sys.path.insert(0, str(Path(__file__).parent.parent))
-from src.mfa_model_downloader import (
-    get_available_languages,
-    LANGUAGE_MODELS,
-    GITHUB_RELEASE_BASE,
-    download_acoustic_model,
-    download_dictionary,
-    download_language_models,
-)
-class TestGetAvailableLanguages(unittest.TestCase):
-    """测试获取可用语言列表"""
-    def test_returns_dict(self):
-        """返回值应为字典"""
-        result = get_available_languages()
-        self.assertIsInstance(result, dict)
-    def test_contains_mandarin(self):
-        """应包含中文"""
-        result = get_available_languages()
-        self.assertIn("mandarin", result)
-        self.assertEqual(result["mandarin"], "中文 (普通话)")
-    def test_contains_japanese(self):
-        """应包含日文"""
-        result = get_available_languages()
-        self.assertIn("japanese", result)
-        self.assertEqual(result["japanese"], "日文")
-class TestLanguageModelsConfig(unittest.TestCase):
-    """测试语言模型配置"""
-    def test_mandarin_config_complete(self):
-        """中文配置应完整"""
-        config = LANGUAGE_MODELS["mandarin"]
-        self.assertIn("name", config)
-        self.assertIn("acoustic", config)
-        self.assertIn("dictionary", config)
-        # 声学模型配置
-        acoustic = config["acoustic"]
-        self.assertIn("tag", acoustic)
-        self.assertIn("filename", acoustic)
-        self.assertTrue(acoustic["filename"].endswith(".zip"))
-        # 字典配置
-        dictionary = config["dictionary"]
-        self.assertIn("tag", dictionary)
-        self.assertIn("filename", dictionary)
-        self.assertTrue(dictionary["filename"].endswith(".dict"))
-    def test_japanese_config_complete(self):
-        """日文配置应完整"""
-        config = LANGUAGE_MODELS["japanese"]
-        self.assertIn("name", config)
-        self.assertIn("acoustic", config)
-        self.assertIn("dictionary", config)
-    def test_acoustic_url_format(self):
-        """声学模型 URL 格式应正确"""
-        for lang, config in LANGUAGE_MODELS.items():
-            acoustic = config["acoustic"]
-            url = f"{GITHUB_RELEASE_BASE}/{acoustic['tag']}/{acoustic['filename']}"
-            self.assertTrue(url.startswith("https://github.com/"))
-            self.assertIn("mfa-models", url)
-    def test_dictionary_url_format(self):
-        """字典 URL 格式应正确"""
-        for lang, config in LANGUAGE_MODELS.items():
-            dictionary = config["dictionary"]
-            url = f"{GITHUB_RELEASE_BASE}/{dictionary['tag']}/{dictionary['filename']}"
-            self.assertTrue(url.startswith("https://github.com/"))
-            self.assertIn("dictionary-", url)
-class TestDownloadAcousticModel(unittest.TestCase):
-    """测试声学模型下载"""
-    def test_invalid_language(self):
-        """不支持的语言应返回失败"""
-        success, result = download_acoustic_model("invalid_lang", "/tmp")
-        self.assertFalse(success)
-        self.assertIn("不支持的语言", result)
-    @patch('src.mfa_model_downloader._download_file')
-    def test_download_called_with_correct_url(self, mock_download):
-        """应使用正确的 URL 下载"""
-        mock_download.return_value = True
-        with patch('os.path.exists', return_value=False):
-            download_acoustic_model("mandarin", "/tmp/models")
-        # 验证调用参数
-        call_args = mock_download.call_args
-        url = call_args[0][0]
-        self.assertIn("mandarin_mfa.zip", url)
-        self.assertIn("acoustic-mandarin_mfa", url)
-    @patch('os.path.exists')
-    def test_skip_if_exists(self, mock_exists):
-        """文件已存在时应跳过下载"""
-        mock_exists.return_value = True
-        success, result = download_acoustic_model("mandarin", "/tmp/models")
-        self.assertTrue(success)
-        self.assertIn("mandarin_mfa.zip", result)
-class TestDownloadDictionary(unittest.TestCase):
-    """测试字典下载"""
-    def test_invalid_language(self):
-        """不支持的语言应返回失败"""
-        success, result = download_dictionary("invalid_lang", "/tmp")
-        self.assertFalse(success)
-        self.assertIn("不支持的语言", result)
-    @patch('src.mfa_model_downloader._download_file')
-    def test_download_called_with_correct_url(self, mock_download):
-        """应使用正确的 URL 下载"""
-        mock_download.return_value = True
-        with patch('os.path.exists', return_value=False):
-            download_dictionary("japanese", "/tmp/models")
-        call_args = mock_download.call_args
-        url = call_args[0][0]
-        self.assertIn("github.com", url)
-        self.assertIn("dictionary-japanese", url)
-class TestDownloadLanguageModels(unittest.TestCase):
-    """测试完整语言模型下载"""
-    def test_invalid_language(self):
-        """不支持的语言应返回失败"""
-        success, acoustic, dict_path = download_language_models("invalid", "/tmp")
-        self.assertFalse(success)
-    @patch('src.mfa_model_downloader.download_dictionary')
-    @patch('src.mfa_model_downloader.download_acoustic_model')
-    def test_downloads_both_models(self, mock_acoustic, mock_dict):
-        """应同时下载声学模型和字典"""
-        mock_acoustic.return_value = (True, "/tmp/acoustic.zip")
-        mock_dict.return_value = (True, "/tmp/dict.dict")
-        success, acoustic, dict_path = download_language_models("mandarin", "/tmp")
-        self.assertTrue(success)
-        mock_acoustic.assert_called_once()
-        mock_dict.assert_called_once()
-    @patch('src.mfa_model_downloader.download_dictionary')
-    @patch('src.mfa_model_downloader.download_acoustic_model')
-    def test_stops_on_acoustic_failure(self, mock_acoustic, mock_dict):
-        """声学模型下载失败时应停止"""
-        mock_acoustic.return_value = (False, "下载失败")
-        success, _, _ = download_language_models("mandarin", "/tmp")
-        self.assertFalse(success)
-        mock_dict.assert_not_called()
-if __name__ == "__main__":
-    unittest.main()

tests/test_mfa_runner.py DELETED Viewed

@@ -1,243 +0,0 @@
-# -*- coding: utf-8 -*-
-"""
-MFA 运行模块单元测试
-"""
-import os
-import sys
-import unittest
-from unittest.mock import patch, MagicMock
-from pathlib import Path
-# 添加项目根目录到路径
-sys.path.insert(0, str(Path(__file__).parent.parent))
-from src.mfa_runner import (
-    check_mfa_available,
-    _build_mfa_env,
-    run_mfa_alignment,
-    run_mfa_validate,
-    BASE_DIR,
-    MFA_ENGINE_DIR,
-    MFA_PYTHON,
-)
-class TestCheckMfaAvailable(unittest.TestCase):
-    """测试 MFA 环境检查"""
-    @patch('src.mfa_runner.MFA_ENGINE_DIR')
-    def test_returns_false_when_dir_not_exists(self, mock_dir):
-        """目录不存在时应返回 False"""
-        mock_path = MagicMock()
-        mock_path.exists.return_value = False
-        with patch.object(Path, 'exists', return_value=False):
-            # 由于模块级变量，需要重新导入或直接测试逻辑
-            pass
-    def test_path_constants_defined(self):
-        """路径常量应正确定义"""
-        self.assertIsInstance(BASE_DIR, Path)
-        self.assertIsInstance(MFA_ENGINE_DIR, Path)
-        self.assertIsInstance(MFA_PYTHON, Path)
-        # 验证路径结构
-        self.assertTrue(str(MFA_ENGINE_DIR).endswith("mfa_engine"))
-        self.assertTrue(str(MFA_PYTHON).endswith("python.exe"))
-class TestBuildMfaEnv(unittest.TestCase):
-    """测试 MFA 环境变量构建"""
-    def test_returns_dict(self):
-        """应返回字典"""
-        env = _build_mfa_env()
-        self.assertIsInstance(env, dict)
-    def test_path_contains_mfa_dirs(self):
-        """PATH 应包含 MFA 相关目录"""
-        env = _build_mfa_env()
-        path = env.get("PATH", "")
-        self.assertIn("mfa_engine", path)
-        self.assertIn("Library", path)
-    def test_preserves_original_path(self):
-        """应保留原始 PATH"""
-        original_path = os.environ.get("PATH", "")
-        env = _build_mfa_env()
-        # 原始 PATH 应在新 PATH 中
-        self.assertIn(original_path.split(";")[0], env["PATH"])
-class TestRunMfaAlignment(unittest.TestCase):
-    """测试 MFA 对齐功能"""
-    @patch('src.mfa_runner.check_mfa_available')
-    def test_fails_when_mfa_unavailable(self, mock_check):
-        """MFA 不可用时应返回失败"""
-        mock_check.return_value = False
-        success, msg = run_mfa_alignment("/input", "/output")
-        self.assertFalse(success)
-        self.assertIn("不可用", msg)
-    @patch('src.mfa_runner.check_mfa_available')
-    @patch('os.path.isdir')
-    def test_fails_when_corpus_not_exists(self, mock_isdir, mock_check):
-        """输入目录不存在时应返回失败"""
-        mock_check.return_value = True
-        mock_isdir.return_value = False
-        success, msg = run_mfa_alignment("/nonexistent", "/output")
-        self.assertFalse(success)
-        self.assertIn("不存在", msg)
-    @patch('src.mfa_runner.check_mfa_available')
-    @patch('os.path.isdir')
-    @patch('os.path.isfile')
-    def test_fails_when_dict_not_exists(self, mock_isfile, mock_isdir, mock_check):
-        """字典文件不存在时应返回失败"""
-        mock_check.return_value = True
-        mock_isdir.return_value = True
-        mock_isfile.return_value = False
-        success, msg = run_mfa_alignment(
-            "/input", "/output",
-            dict_path="/nonexistent.dict"
-        )
-        self.assertFalse(success)
-        self.assertIn("不存在", msg)
-    @patch('src.mfa_runner.check_mfa_available')
-    @patch('os.path.isdir')
-    @patch('os.path.isfile')
-    @patch('os.makedirs')
-    @patch('subprocess.run')
-    def test_calls_subprocess_with_correct_args(
-        self, mock_run, mock_makedirs, mock_isfile, mock_isdir, mock_check
-    ):
-        """应使用正确的参数调用 subprocess"""
-        mock_check.return_value = True
-        mock_isdir.return_value = True
-        mock_isfile.return_value = True
-        mock_run.return_value = MagicMock(returncode=0, stdout="", stderr="")
-        run_mfa_alignment(
-            "/input", "/output",
-            dict_path="/dict.dict",
-            model_path="/model.zip",
-            single_speaker=True,
-            clean=True
-        )
-        # 验证 subprocess.run 被调用
-        mock_run.assert_called_once()
-        # 验证命令参数
-        call_args = mock_run.call_args
-        cmd = call_args[0][0]
-        self.assertIn("align", cmd)
-        self.assertIn("/input", cmd)
-        self.assertIn("/dict.dict", cmd)
-        self.assertIn("/model.zip", cmd)
-        self.assertIn("/output", cmd)
-        self.assertIn("--single_speaker", cmd)
-        self.assertIn("--clean", cmd)
-    @patch('src.mfa_runner.check_mfa_available')
-    @patch('os.path.isdir')
-    @patch('os.path.isfile')
-    @patch('os.makedirs')
-    @patch('subprocess.run')
-    def test_returns_success_on_zero_returncode(
-        self, mock_run, mock_makedirs, mock_isfile, mock_isdir, mock_check
-    ):
-        """返回码为 0 时应返回成功"""
-        mock_check.return_value = True
-        mock_isdir.return_value = True
-        mock_isfile.return_value = True
-        mock_run.return_value = MagicMock(returncode=0, stdout="完成", stderr="")
-        success, msg = run_mfa_alignment(
-            "/input", "/output",
-            dict_path="/dict.dict",
-            model_path="/model.zip"
-        )
-        self.assertTrue(success)
-    @patch('src.mfa_runner.check_mfa_available')
-    @patch('os.path.isdir')
-    @patch('os.path.isfile')
-    @patch('os.makedirs')
-    @patch('subprocess.run')
-    def test_returns_failure_on_nonzero_returncode(
-        self, mock_run, mock_makedirs, mock_isfile, mock_isdir, mock_check
-    ):
-        """返回码非 0 时应返回失败"""
-        mock_check.return_value = True
-        mock_isdir.return_value = True
-        mock_isfile.return_value = True
-        mock_run.return_value = MagicMock(returncode=1, stdout="", stderr="错误")
-        success, msg = run_mfa_alignment(
-            "/input", "/output",
-            dict_path="/dict.dict",
-            model_path="/model.zip"
-        )
-        self.assertFalse(success)
-class TestRunMfaValidate(unittest.TestCase):
-    """测试 MFA 验证功能"""
-    @patch('src.mfa_runner.check_mfa_available')
-    def test_fails_when_mfa_unavailable(self, mock_check):
-        """MFA 不可用时应返回失败"""
-        mock_check.return_value = False
-        success, msg = run_mfa_validate("/corpus")
-        self.assertFalse(success)
-        self.assertIn("不可用", msg)
-class TestProgressCallback(unittest.TestCase):
-    """测试进度回调"""
-    @patch('src.mfa_runner.check_mfa_available')
-    @patch('os.path.isdir')
-    @patch('os.path.isfile')
-    @patch('os.makedirs')
-    @patch('subprocess.run')
-    def test_callback_called_on_success(
-        self, mock_run, mock_makedirs, mock_isfile, mock_isdir, mock_check
-    ):
-        """成功时应调用回调"""
-        mock_check.return_value = True
-        mock_isdir.return_value = True
-        mock_isfile.return_value = True
-        mock_run.return_value = MagicMock(returncode=0, stdout="完成", stderr="")
-        callback = MagicMock()
-        run_mfa_alignment(
-            "/input", "/output",
-            dict_path="/dict.dict",
-            model_path="/model.zip",
-            progress_callback=callback
-        )
-        # 回调应被调用（至少一次）
-        self.assertTrue(callback.called)
-if __name__ == "__main__":
-    unittest.main()

tests/test_silero_vad_downloader.py DELETED Viewed

@@ -1,65 +0,0 @@
-# -*- coding: utf-8 -*-
-"""
-Silero VAD 下载模块测试
-"""
-import os
-import tempfile
-import unittest
-from unittest.mock import patch, MagicMock
-from src.silero_vad_downloader import (
-    get_vad_model_path,
-    is_vad_model_downloaded,
-    download_silero_vad,
-    ensure_vad_model,
-    SILERO_VAD_CONFIG
-)
-class TestSileroVadDownloader(unittest.TestCase):
-    """Silero VAD 下载器测试类"""
-    def test_get_vad_model_path(self):
-        """测试获取模型路径"""
-        models_dir = "/test/models"
-        expected = os.path.join(models_dir, "silero_vad", "silero_vad.onnx")
-        self.assertEqual(get_vad_model_path(models_dir), expected)
-    def test_is_vad_model_downloaded_false(self):
-        """测试模型未下载时返回 False"""
-        with tempfile.TemporaryDirectory() as tmpdir:
-            self.assertFalse(is_vad_model_downloaded(tmpdir))
-    def test_is_vad_model_downloaded_true(self):
-        """测试模型已下载时返回 True"""
-        with tempfile.TemporaryDirectory() as tmpdir:
-            vad_dir = os.path.join(tmpdir, "silero_vad")
-            os.makedirs(vad_dir)
-            model_path = os.path.join(vad_dir, "silero_vad.onnx")
-            with open(model_path, "w") as f:
-                f.write("dummy")
-            self.assertTrue(is_vad_model_downloaded(tmpdir))
-    def test_download_silero_vad_already_exists(self):
-        """测试模型已存在时跳过下载"""
-        with tempfile.TemporaryDirectory() as tmpdir:
-            vad_dir = os.path.join(tmpdir, "silero_vad")
-            os.makedirs(vad_dir)
-            model_path = os.path.join(vad_dir, "silero_vad.onnx")
-            with open(model_path, "w") as f:
-                f.write("dummy")
-            success, result = download_silero_vad(tmpdir)
-            self.assertTrue(success)
-            self.assertEqual(result, model_path)
-    def test_config_values(self):
-        """测试配置值正确性"""
-        self.assertEqual(SILERO_VAD_CONFIG["onnx_filename"], "silero_vad.onnx")
-        self.assertEqual(SILERO_VAD_CONFIG["jit_filename"], "silero_vad.jit")
-        self.assertIn("snakers4/silero-vad", SILERO_VAD_CONFIG["repo"])
-if __name__ == "__main__":
-    unittest.main()

目前便携版仅支持Windows！.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+ 没有条件可以使用：https://www.modelscope.cn/studios/TNOTqwq/JinrikiHelper/