Spaces:
Running
Running
sync: align master with local snapshot without force
Browse files- .gitattributes +2 -47
- .github/ISSUE_TEMPLATE/bug_report.md +0 -32
- .gitignore +24 -44
- README_1.1.txt +3 -0
- app.py +5 -96
- docs/.gitkeep +0 -0
- docs/FFmpeg环境变量问题排查.md +96 -0
- docs/MFA引擎安装说明.md +0 -64
- plans/cvvc_export_design.md +339 -0
- plans/cvvc_implementation_summary.md +128 -0
- requirements.txt +0 -0
- run_portable.bat +10 -0
- src/export_plugins/utau_oto_export.py +1080 -93
- src/gui_cloud.py +31 -280
- src/mfa_model_downloader.py +59 -96
- src/mfa_runner.py +0 -12
- src/pipeline.py +3 -20
- tests/.gitkeep +0 -0
- tests/test_mfa_model_downloader.py +0 -182
- tests/test_mfa_runner.py +0 -243
- tests/test_silero_vad_downloader.py +0 -65
- 目前便携版仅支持Windows!.txt +1 -0
.gitattributes
CHANGED
|
@@ -1,47 +1,2 @@
|
|
| 1 |
-
|
| 2 |
-
|
| 3 |
-
*.bin filter=lfs diff=lfs merge=lfs -text
|
| 4 |
-
*.bin.* filter=lfs diff=lfs merge=lfs -text
|
| 5 |
-
*.bz2 filter=lfs diff=lfs merge=lfs -text
|
| 6 |
-
*.ftz filter=lfs diff=lfs merge=lfs -text
|
| 7 |
-
*.gz filter=lfs diff=lfs merge=lfs -text
|
| 8 |
-
*.h5 filter=lfs diff=lfs merge=lfs -text
|
| 9 |
-
*.joblib filter=lfs diff=lfs merge=lfs -text
|
| 10 |
-
*.lfs.* filter=lfs diff=lfs merge=lfs -text
|
| 11 |
-
*.model filter=lfs diff=lfs merge=lfs -text
|
| 12 |
-
*.msgpack filter=lfs diff=lfs merge=lfs -text
|
| 13 |
-
*.onnx filter=lfs diff=lfs merge=lfs -text
|
| 14 |
-
*.ot filter=lfs diff=lfs merge=lfs -text
|
| 15 |
-
*.parquet filter=lfs diff=lfs merge=lfs -text
|
| 16 |
-
*.pb filter=lfs diff=lfs merge=lfs -text
|
| 17 |
-
*.pt filter=lfs diff=lfs merge=lfs -text
|
| 18 |
-
*.pth filter=lfs diff=lfs merge=lfs -text
|
| 19 |
-
*.rar filter=lfs diff=lfs merge=lfs -text
|
| 20 |
-
saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
| 21 |
-
*.tar.* filter=lfs diff=lfs merge=lfs -text
|
| 22 |
-
*.tflite filter=lfs diff=lfs merge=lfs -text
|
| 23 |
-
*.tgz filter=lfs diff=lfs merge=lfs -text
|
| 24 |
-
*.xz filter=lfs diff=lfs merge=lfs -text
|
| 25 |
-
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 26 |
-
*.zstandard filter=lfs diff=lfs merge=lfs -text
|
| 27 |
-
*.tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 28 |
-
*.db* filter=lfs diff=lfs merge=lfs -text
|
| 29 |
-
*.ark* filter=lfs diff=lfs merge=lfs -text
|
| 30 |
-
**/*ckpt*data* filter=lfs diff=lfs merge=lfs -text
|
| 31 |
-
**/*ckpt*.meta filter=lfs diff=lfs merge=lfs -text
|
| 32 |
-
**/*ckpt*.index filter=lfs diff=lfs merge=lfs -text
|
| 33 |
-
*.safetensors filter=lfs diff=lfs merge=lfs -text
|
| 34 |
-
*.ckpt filter=lfs diff=lfs merge=lfs -text
|
| 35 |
-
*.gguf* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
-
*.ggml filter=lfs diff=lfs merge=lfs -text
|
| 37 |
-
*.llamafile* filter=lfs diff=lfs merge=lfs -text
|
| 38 |
-
*.pt2 filter=lfs diff=lfs merge=lfs -text
|
| 39 |
-
*.mlmodel filter=lfs diff=lfs merge=lfs -text
|
| 40 |
-
*.npy filter=lfs diff=lfs merge=lfs -text
|
| 41 |
-
*.npz filter=lfs diff=lfs merge=lfs -text
|
| 42 |
-
*.pickle filter=lfs diff=lfs merge=lfs -text
|
| 43 |
-
*.pkl filter=lfs diff=lfs merge=lfs -text
|
| 44 |
-
*.tar filter=lfs diff=lfs merge=lfs -text
|
| 45 |
-
*.wasm filter=lfs diff=lfs merge=lfs -text
|
| 46 |
-
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 47 |
-
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
| 1 |
+
tools/mfa_engine/Lib/site-packages/sudachidict_core/resources/system.dic filter=lfs diff=lfs merge=lfs -text
|
| 2 |
+
tools/mfa_engine/Library/lib/SPIRV-Tools-opt.lib filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
.github/ISSUE_TEMPLATE/bug_report.md
DELETED
|
@@ -1,32 +0,0 @@
|
|
| 1 |
-
---
|
| 2 |
-
name: Bug 报告
|
| 3 |
-
about: 报告程序问题或错误
|
| 4 |
-
title: '[BUG] '
|
| 5 |
-
labels: bug
|
| 6 |
-
assignees: ''
|
| 7 |
-
---
|
| 8 |
-
|
| 9 |
-
## 问题描述
|
| 10 |
-
简要描述遇到的问题
|
| 11 |
-
|
| 12 |
-
## 复现步骤
|
| 13 |
-
1. 打开程序
|
| 14 |
-
2. 点击 ...
|
| 15 |
-
3. 出现错误
|
| 16 |
-
|
| 17 |
-
## 期望行为
|
| 18 |
-
描述期望的正确行为
|
| 19 |
-
|
| 20 |
-
## 环境信息
|
| 21 |
-
- 操作系统: Windows 10 / Windows 11
|
| 22 |
-
- Python 版本:
|
| 23 |
-
- 显卡: 有 NVIDIA GPU / 无独立显卡
|
| 24 |
-
- 内存:
|
| 25 |
-
|
| 26 |
-
## 日志输出
|
| 27 |
-
```
|
| 28 |
-
粘贴相关日志或错误信息
|
| 29 |
-
```
|
| 30 |
-
|
| 31 |
-
## 截图
|
| 32 |
-
如有必要,添加截图帮助说明问题
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
.gitignore
CHANGED
|
@@ -1,49 +1,29 @@
|
|
| 1 |
-
#
|
| 2 |
-
|
| 3 |
-
|
| 4 |
-
|
| 5 |
-
|
| 6 |
-
__pycache__/
|
| 7 |
-
*.py[cod]
|
| 8 |
-
*$py.class
|
| 9 |
-
*.so
|
| 10 |
-
*.egg
|
| 11 |
-
*.egg-info/
|
| 12 |
-
|
| 13 |
-
# IDE
|
| 14 |
-
.idea/
|
| 15 |
-
.kiro/
|
| 16 |
-
.vscode/
|
| 17 |
-
*.swp
|
| 18 |
-
*.swo
|
| 19 |
-
|
| 20 |
-
# 日志和临时文件
|
| 21 |
-
*.log
|
| 22 |
-
*.tmp
|
| 23 |
-
temp/
|
| 24 |
mfa_temp/
|
|
|
|
|
|
|
|
|
|
|
|
|
| 25 |
|
| 26 |
-
#
|
| 27 |
config.json
|
|
|
|
|
|
|
|
|
|
| 28 |
|
| 29 |
-
#
|
| 30 |
-
|
| 31 |
-
|
| 32 |
-
|
| 33 |
-
|
| 34 |
-
|
| 35 |
-
!bank/.gitkeep
|
| 36 |
-
!export/.gitkeep
|
| 37 |
-
|
| 38 |
-
# 测试临时文件
|
| 39 |
-
tests/temp/*
|
| 40 |
-
!tests/temp/.gitkeep
|
| 41 |
-
|
| 42 |
-
# MFA 引擎 (体积大,需单独下载)
|
| 43 |
-
tools/mfa_engine/
|
| 44 |
|
| 45 |
-
#
|
| 46 |
-
|
| 47 |
-
|
| 48 |
-
|
| 49 |
-
|
|
|
|
| 1 |
+
# Large local/portable data folders
|
| 2 |
+
便携版打包/
|
| 3 |
+
bank/
|
| 4 |
+
export/
|
| 5 |
+
mfa_data/
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 6 |
mfa_temp/
|
| 7 |
+
models/
|
| 8 |
+
python/
|
| 9 |
+
tools/mfa_engine/
|
| 10 |
+
tools/ffmpeg/
|
| 11 |
|
| 12 |
+
# Local config files
|
| 13 |
config.json
|
| 14 |
+
*.local.json
|
| 15 |
+
.env
|
| 16 |
+
.env.*
|
| 17 |
|
| 18 |
+
# Python cache/log files
|
| 19 |
+
__pycache__/
|
| 20 |
+
*.py[cod]
|
| 21 |
+
*.pyo
|
| 22 |
+
*.pyd
|
| 23 |
+
*.log
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 24 |
|
| 25 |
+
# OS/editor
|
| 26 |
+
.DS_Store
|
| 27 |
+
Thumbs.db
|
| 28 |
+
.vscode/
|
| 29 |
+
.idea/
|
README_1.1.txt
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
此包为增量更新包!请将其替换1.0的对应文件以完成更新。
|
| 2 |
+
|
| 3 |
+
更新内容大概为:更新了UTAU oto导出,现在支持中文,并且能导出日语的CVVC音源,内置了FFmpeg,修复了可能的路径问题
|
app.py
CHANGED
|
@@ -13,14 +13,10 @@ from pathlib import Path
|
|
| 13 |
|
| 14 |
logging.basicConfig(
|
| 15 |
level=logging.INFO,
|
| 16 |
-
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
|
| 17 |
-
handlers=[logging.StreamHandler(sys.stdout)]
|
| 18 |
)
|
| 19 |
logger = logging.getLogger(__name__)
|
| 20 |
|
| 21 |
-
# 确保日志立即输出(禁用缓冲)
|
| 22 |
-
sys.stdout.reconfigure(line_buffering=True) if hasattr(sys.stdout, 'reconfigure') else None
|
| 23 |
-
|
| 24 |
# 项目根目录
|
| 25 |
BASE_DIR = Path(__file__).parent.absolute()
|
| 26 |
|
|
@@ -53,69 +49,6 @@ MODELS_DIR = None # 延迟初始化
|
|
| 53 |
MFA_DIR = None
|
| 54 |
|
| 55 |
|
| 56 |
-
def cleanup_temp_files():
|
| 57 |
-
"""
|
| 58 |
-
启动时清理临时文件,释放磁盘空间
|
| 59 |
-
|
| 60 |
-
清理目标:
|
| 61 |
-
- /tmp/gradio/* (Gradio 上传缓存)
|
| 62 |
-
- /tmp/jinriki_* (本应用的工作空间)
|
| 63 |
-
- /tmp/mfa_* (MFA 临时文件)
|
| 64 |
-
"""
|
| 65 |
-
import shutil
|
| 66 |
-
import time
|
| 67 |
-
|
| 68 |
-
logger.info("清理临时文件...")
|
| 69 |
-
|
| 70 |
-
cleaned_size = 0
|
| 71 |
-
cleaned_count = 0
|
| 72 |
-
|
| 73 |
-
# 清理 Gradio 缓存
|
| 74 |
-
gradio_tmp = Path("/tmp/gradio")
|
| 75 |
-
if gradio_tmp.exists():
|
| 76 |
-
try:
|
| 77 |
-
for item in gradio_tmp.iterdir():
|
| 78 |
-
try:
|
| 79 |
-
if item.is_dir():
|
| 80 |
-
size = sum(f.stat().st_size for f in item.rglob('*') if f.is_file())
|
| 81 |
-
shutil.rmtree(item)
|
| 82 |
-
else:
|
| 83 |
-
size = item.stat().st_size
|
| 84 |
-
item.unlink()
|
| 85 |
-
cleaned_size += size
|
| 86 |
-
cleaned_count += 1
|
| 87 |
-
except Exception:
|
| 88 |
-
pass
|
| 89 |
-
except Exception as e:
|
| 90 |
-
logger.warning(f"清理 Gradio 缓存失败: {e}")
|
| 91 |
-
|
| 92 |
-
# 清理 jinriki 工作空间
|
| 93 |
-
tmp_dir = Path("/tmp")
|
| 94 |
-
if tmp_dir.exists():
|
| 95 |
-
try:
|
| 96 |
-
for item in tmp_dir.iterdir():
|
| 97 |
-
if item.name.startswith("jinriki_") or item.name.startswith("mfa_"):
|
| 98 |
-
try:
|
| 99 |
-
if item.is_dir():
|
| 100 |
-
size = sum(f.stat().st_size for f in item.rglob('*') if f.is_file())
|
| 101 |
-
shutil.rmtree(item)
|
| 102 |
-
else:
|
| 103 |
-
size = item.stat().st_size
|
| 104 |
-
item.unlink()
|
| 105 |
-
cleaned_size += size
|
| 106 |
-
cleaned_count += 1
|
| 107 |
-
except Exception:
|
| 108 |
-
pass
|
| 109 |
-
except Exception as e:
|
| 110 |
-
logger.warning(f"清理工作空间失败: {e}")
|
| 111 |
-
|
| 112 |
-
if cleaned_count > 0:
|
| 113 |
-
size_mb = cleaned_size / (1024 * 1024)
|
| 114 |
-
logger.info(f"已清理 {cleaned_count} 个临时文件/目录,释放 {size_mb:.1f} MB")
|
| 115 |
-
else:
|
| 116 |
-
logger.info("无需清理临时文件")
|
| 117 |
-
|
| 118 |
-
|
| 119 |
def ensure_ffmpeg():
|
| 120 |
"""确保 ffmpeg 已安装(用于音频格式转换,支持 m4a 等格式)"""
|
| 121 |
import shutil
|
|
@@ -168,17 +101,9 @@ def setup_environment():
|
|
| 168 |
Path("/home/studio_service").exists(), # 魔搭创空间特征目录
|
| 169 |
])
|
| 170 |
|
| 171 |
-
# 云端环境启动时清理临时文件,释放磁盘空间
|
| 172 |
-
if is_cloud:
|
| 173 |
-
cleanup_temp_files()
|
| 174 |
-
|
| 175 |
-
logger.info("清理完成,继续初始化...")
|
| 176 |
-
|
| 177 |
# 确保 ffmpeg 已安装(支持 m4a 等音频格式)
|
| 178 |
if is_cloud or platform.system() != "Windows":
|
| 179 |
-
logger.info("检查 ffmpeg...")
|
| 180 |
ensure_ffmpeg()
|
| 181 |
-
logger.info("ffmpeg 检查完成")
|
| 182 |
|
| 183 |
# 魔搭创空间无法访问 HuggingFace,使用镜像
|
| 184 |
if is_cloud and Path("/home/studio_service").exists():
|
|
@@ -196,7 +121,6 @@ def setup_environment():
|
|
| 196 |
if platform.system() != "Windows":
|
| 197 |
logger.info("Linux 环境,检查并安装 MFA...")
|
| 198 |
setup_mfa_linux()
|
| 199 |
-
logger.info("MFA 设置完成")
|
| 200 |
|
| 201 |
if is_cloud:
|
| 202 |
logger.info("检测到云端环境,正在初始化...")
|
|
@@ -205,9 +129,7 @@ def setup_environment():
|
|
| 205 |
os.environ.setdefault("TMPDIR", "/tmp")
|
| 206 |
|
| 207 |
# 下载所有必需模型
|
| 208 |
-
logger.info("开始下载模型...")
|
| 209 |
download_all_models()
|
| 210 |
-
logger.info("模型下载完成")
|
| 211 |
else:
|
| 212 |
logger.info("本地环境运行")
|
| 213 |
|
|
@@ -248,13 +170,11 @@ def setup_mfa_linux():
|
|
| 248 |
mamba_root.mkdir(parents=True, exist_ok=True)
|
| 249 |
|
| 250 |
# 下载并安装 micromamba
|
| 251 |
-
|
| 252 |
"bash", "-c",
|
| 253 |
f'curl -Ls https://micro.mamba.pm/api/micromamba/linux-64/latest | tar -xvj -C {mamba_root} bin/micromamba'
|
| 254 |
-
], check=True, capture_output=True,
|
| 255 |
logger.info("micromamba 下载完成")
|
| 256 |
-
else:
|
| 257 |
-
logger.info("micromamba 已存在,跳过下载")
|
| 258 |
|
| 259 |
# 2. 使用 micromamba 创建环境并安装 MFA
|
| 260 |
mfa_bin_path = mfa_env / "bin" / "mfa"
|
|
@@ -277,23 +197,20 @@ def setup_mfa_linux():
|
|
| 277 |
logger.info("MFA 环境验证通过,无需重新安装")
|
| 278 |
|
| 279 |
if need_install:
|
| 280 |
-
logger.info("使用 micromamba 安装 MFA
|
| 281 |
env = os.environ.copy()
|
| 282 |
env["MAMBA_ROOT_PREFIX"] = str(mamba_root)
|
| 283 |
|
| 284 |
# 创建环境并安装 MFA(指定 Python 3.11)
|
| 285 |
-
|
| 286 |
-
result = subprocess.run([
|
| 287 |
str(mamba_bin), "create", "-n", "mfa",
|
| 288 |
"-c", "conda-forge",
|
| 289 |
"python=3.11",
|
| 290 |
"montreal-forced-aligner",
|
| 291 |
"-y"
|
| 292 |
], env=env, check=True, capture_output=True, text=True, timeout=600)
|
| 293 |
-
logger.info("MFA 环境创建完成")
|
| 294 |
|
| 295 |
# 更新确保使用 CPU 版本的 kaldi
|
| 296 |
-
logger.info("安装 CPU 版本 kaldi...")
|
| 297 |
subprocess.run([
|
| 298 |
str(mamba_bin), "install", "-n", "mfa",
|
| 299 |
"-c", "conda-forge",
|
|
@@ -653,21 +570,13 @@ def download_mfa_models_all() -> bool:
|
|
| 653 |
|
| 654 |
def main():
|
| 655 |
"""主入口"""
|
| 656 |
-
logger.info("=" * 50)
|
| 657 |
-
logger.info("人力V助手 云端版启动")
|
| 658 |
-
logger.info("=" * 50)
|
| 659 |
-
|
| 660 |
setup_environment()
|
| 661 |
|
| 662 |
-
logger.info("环境初始化完成,启动 Gradio UI...")
|
| 663 |
-
|
| 664 |
# 导入并启动云端 GUI
|
| 665 |
from src.gui_cloud import create_cloud_ui
|
| 666 |
|
| 667 |
app = create_cloud_ui()
|
| 668 |
|
| 669 |
-
logger.info("Gradio UI 创建完成,启动服务...")
|
| 670 |
-
|
| 671 |
# 云端配置
|
| 672 |
# 启用队列,魔搭CPU按需分配,无需设置并发上限
|
| 673 |
app.queue()
|
|
|
|
| 13 |
|
| 14 |
logging.basicConfig(
|
| 15 |
level=logging.INFO,
|
| 16 |
+
format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
|
|
|
|
| 17 |
)
|
| 18 |
logger = logging.getLogger(__name__)
|
| 19 |
|
|
|
|
|
|
|
|
|
|
| 20 |
# 项目根目录
|
| 21 |
BASE_DIR = Path(__file__).parent.absolute()
|
| 22 |
|
|
|
|
| 49 |
MFA_DIR = None
|
| 50 |
|
| 51 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 52 |
def ensure_ffmpeg():
|
| 53 |
"""确保 ffmpeg 已安装(用于音频格式转换,支持 m4a 等格式)"""
|
| 54 |
import shutil
|
|
|
|
| 101 |
Path("/home/studio_service").exists(), # 魔搭创空间特征目录
|
| 102 |
])
|
| 103 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 104 |
# 确保 ffmpeg 已安装(支持 m4a 等音频格式)
|
| 105 |
if is_cloud or platform.system() != "Windows":
|
|
|
|
| 106 |
ensure_ffmpeg()
|
|
|
|
| 107 |
|
| 108 |
# 魔搭创空间无法访问 HuggingFace,使用镜像
|
| 109 |
if is_cloud and Path("/home/studio_service").exists():
|
|
|
|
| 121 |
if platform.system() != "Windows":
|
| 122 |
logger.info("Linux 环境,检查并安装 MFA...")
|
| 123 |
setup_mfa_linux()
|
|
|
|
| 124 |
|
| 125 |
if is_cloud:
|
| 126 |
logger.info("检测到云端环境,正在初始化...")
|
|
|
|
| 129 |
os.environ.setdefault("TMPDIR", "/tmp")
|
| 130 |
|
| 131 |
# 下载所有必需模型
|
|
|
|
| 132 |
download_all_models()
|
|
|
|
| 133 |
else:
|
| 134 |
logger.info("本地环境运行")
|
| 135 |
|
|
|
|
| 170 |
mamba_root.mkdir(parents=True, exist_ok=True)
|
| 171 |
|
| 172 |
# 下载并安装 micromamba
|
| 173 |
+
subprocess.run([
|
| 174 |
"bash", "-c",
|
| 175 |
f'curl -Ls https://micro.mamba.pm/api/micromamba/linux-64/latest | tar -xvj -C {mamba_root} bin/micromamba'
|
| 176 |
+
], check=True, capture_output=True, timeout=120)
|
| 177 |
logger.info("micromamba 下载完成")
|
|
|
|
|
|
|
| 178 |
|
| 179 |
# 2. 使用 micromamba 创建环境并安装 MFA
|
| 180 |
mfa_bin_path = mfa_env / "bin" / "mfa"
|
|
|
|
| 197 |
logger.info("MFA 环境验证通过,无需重新安装")
|
| 198 |
|
| 199 |
if need_install:
|
| 200 |
+
logger.info("使用 micromamba 安装 MFA...")
|
| 201 |
env = os.environ.copy()
|
| 202 |
env["MAMBA_ROOT_PREFIX"] = str(mamba_root)
|
| 203 |
|
| 204 |
# 创建环境并安装 MFA(指定 Python 3.11)
|
| 205 |
+
subprocess.run([
|
|
|
|
| 206 |
str(mamba_bin), "create", "-n", "mfa",
|
| 207 |
"-c", "conda-forge",
|
| 208 |
"python=3.11",
|
| 209 |
"montreal-forced-aligner",
|
| 210 |
"-y"
|
| 211 |
], env=env, check=True, capture_output=True, text=True, timeout=600)
|
|
|
|
| 212 |
|
| 213 |
# 更新确保使用 CPU 版本的 kaldi
|
|
|
|
| 214 |
subprocess.run([
|
| 215 |
str(mamba_bin), "install", "-n", "mfa",
|
| 216 |
"-c", "conda-forge",
|
|
|
|
| 570 |
|
| 571 |
def main():
|
| 572 |
"""主入口"""
|
|
|
|
|
|
|
|
|
|
|
|
|
| 573 |
setup_environment()
|
| 574 |
|
|
|
|
|
|
|
| 575 |
# 导入并启动云端 GUI
|
| 576 |
from src.gui_cloud import create_cloud_ui
|
| 577 |
|
| 578 |
app = create_cloud_ui()
|
| 579 |
|
|
|
|
|
|
|
| 580 |
# 云端配置
|
| 581 |
# 启用队列,魔搭CPU按需分配,无需设置并发上限
|
| 582 |
app.queue()
|
docs/.gitkeep
DELETED
|
File without changes
|
docs/FFmpeg环境变量问题排查.md
ADDED
|
@@ -0,0 +1,96 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# FFmpeg 环境变量问题排查指南
|
| 2 |
+
|
| 3 |
+
## 问题现象
|
| 4 |
+
|
| 5 |
+
用户已安装 FFmpeg 并设置了系统环境变量,但运行便携版时仍报错:
|
| 6 |
+
|
| 7 |
+
```
|
| 8 |
+
FileNotFoundError: [WinError 2] 系统找不到指定的档案。
|
| 9 |
+
```
|
| 10 |
+
|
| 11 |
+
错误发生在 `pipeline.py` 调用 FFmpeg 进行音频处理时。
|
| 12 |
+
|
| 13 |
+
## 原因分析
|
| 14 |
+
|
| 15 |
+
### 便携版环境隔离
|
| 16 |
+
|
| 17 |
+
便携版通过 `run_portable.bat` 启动,使用内嵌的 `python\python.exe`。该 Python 环境可能存在以下问题:
|
| 18 |
+
|
| 19 |
+
1. **PATH 环境变量未正确继承** - 便携版 Python 可能无法访问系统 PATH 中的 FFmpeg
|
| 20 |
+
2. **CMD 窗口环境变量刷新问题** - 新设置的环境变量需要重启 CMD 窗口才能生效
|
| 21 |
+
|
| 22 |
+
## 解决方案
|
| 23 |
+
|
| 24 |
+
### 方案一:重启命令提示符(推荐先尝试)
|
| 25 |
+
|
| 26 |
+
如果刚刚设置完 FFmpeg 环境变量,需要:
|
| 27 |
+
|
| 28 |
+
1. **关闭所有 CMD 窗口**
|
| 29 |
+
2. **重新打开 CMD 窗口**
|
| 30 |
+
3. 再次运行 `run_portable.bat`
|
| 31 |
+
|
| 32 |
+
> 环境变量修改后,已打开的 CMD 窗口不会自动刷新,必须重新打开。
|
| 33 |
+
|
| 34 |
+
### 方案二:验证 FFmpeg 是否正确安装
|
| 35 |
+
|
| 36 |
+
在 CMD 中执行以下命令验证:
|
| 37 |
+
|
| 38 |
+
```cmd
|
| 39 |
+
where ffmpeg
|
| 40 |
+
ffmpeg -version
|
| 41 |
+
```
|
| 42 |
+
|
| 43 |
+
如果显示 "找不到文件" 或报错,说明环境变量设置有问题。
|
| 44 |
+
|
| 45 |
+
### 方案三:检查环境变量设置
|
| 46 |
+
|
| 47 |
+
1. 按 `Win + R`,输入 `sysdm.cpl`,回车
|
| 48 |
+
2. 点击「高级」→「环境变量」
|
| 49 |
+
3. 在「系统变量」或「用户变量」中找到 `Path`
|
| 50 |
+
4. 确认 FFmpeg 的 `bin` 目录已添加,例如:
|
| 51 |
+
```
|
| 52 |
+
C:\ffmpeg\bin
|
| 53 |
+
```
|
| 54 |
+
5. 点击确定保存,然后**重新打开 CMD 窗口**
|
| 55 |
+
|
| 56 |
+
### 方案四:在便携版脚本中显式指定 FFmpeg 路径
|
| 57 |
+
|
| 58 |
+
如果上述方案无效,可以修改 `run_portable.bat`,在启动前手动添加 FFmpeg 路径:
|
| 59 |
+
|
| 60 |
+
```bat
|
| 61 |
+
@echo off
|
| 62 |
+
chcp 65001 >nul
|
| 63 |
+
echo 启动人力V助手 (便携版)...
|
| 64 |
+
|
| 65 |
+
REM 添加 FFmpeg 到 PATH(请修改为你的实际路径)
|
| 66 |
+
set PATH=%PATH%;C:\ffmpeg\bin
|
| 67 |
+
|
| 68 |
+
set PYTHONPATH=%~dp0
|
| 69 |
+
"%~dp0python\python.exe" "%~dp0main.py"
|
| 70 |
+
pause
|
| 71 |
+
```
|
| 72 |
+
|
| 73 |
+
将 `C:\ffmpeg\bin` 替换为你的 FFmpeg 实际安装路径。
|
| 74 |
+
|
| 75 |
+
### 方案五:将 FFmpeg 放入便携版目录
|
| 76 |
+
|
| 77 |
+
将 `ffmpeg.exe` 和 `ffprobe.exe` 直接复制到便携版根目录(与 `main.py` 同级),程序会优先使用当前目录下的可执行文件。
|
| 78 |
+
|
| 79 |
+
## 快速诊断命令
|
| 80 |
+
|
| 81 |
+
在 `run_portable.bat` 所在目录打开 CMD,执行:
|
| 82 |
+
|
| 83 |
+
```cmd
|
| 84 |
+
REM 检查系统 FFmpeg
|
| 85 |
+
where ffmpeg
|
| 86 |
+
|
| 87 |
+
REM 检查便携版 Python 能否找到 FFmpeg
|
| 88 |
+
python\python.exe -c "import subprocess; subprocess.run(['ffmpeg', '-version'])"
|
| 89 |
+
```
|
| 90 |
+
|
| 91 |
+
如果第一条命令成功但第二条失败,说明便携版 Python 环境与系统环境隔离,请使用方案四或方案五。
|
| 92 |
+
|
| 93 |
+
## 相关文件
|
| 94 |
+
|
| 95 |
+
- `run_portable.bat` - 便携版启动脚本
|
| 96 |
+
- `src/pipeline.py` - 音频处理流水线,调用 FFmpeg 的位置
|
docs/MFA引擎安装说明.md
DELETED
|
@@ -1,64 +0,0 @@
|
|
| 1 |
-
# MFA 引擎本地安装说明
|
| 2 |
-
|
| 3 |
-
本文档介绍如何在本地部署 MFA (Montreal Forced Aligner) 引擎,适用于想从源码运行项目的用户。
|
| 4 |
-
|
| 5 |
-
## 前提条件
|
| 6 |
-
|
| 7 |
-
- Windows 系统
|
| 8 |
-
- 已安装 conda 或 micromamba
|
| 9 |
-
|
| 10 |
-
## 安装步骤
|
| 11 |
-
|
| 12 |
-
### 1. 创建 MFA 环境
|
| 13 |
-
|
| 14 |
-
使用 conda:
|
| 15 |
-
```bash
|
| 16 |
-
conda create -n mfa_engine -c conda-forge montreal-forced-aligner
|
| 17 |
-
```
|
| 18 |
-
|
| 19 |
-
或使用 micromamba (推荐):
|
| 20 |
-
```bash
|
| 21 |
-
micromamba create -n mfa_engine -c conda-forge montreal-forced-aligner
|
| 22 |
-
```
|
| 23 |
-
|
| 24 |
-
### 2. 提取环境到项目目录
|
| 25 |
-
|
| 26 |
-
安装完成后,将环境目录复制到项目的 `tools` 文件夹中:
|
| 27 |
-
|
| 28 |
-
conda 默认路径:
|
| 29 |
-
```
|
| 30 |
-
%USERPROFILE%\anaconda3\envs\mfa_engine
|
| 31 |
-
或
|
| 32 |
-
%USERPROFILE%\miniconda3\envs\mfa_engine
|
| 33 |
-
```
|
| 34 |
-
|
| 35 |
-
micromamba 默认路径:
|
| 36 |
-
```
|
| 37 |
-
%USERPROFILE%\micromamba\envs\mfa_engine
|
| 38 |
-
```
|
| 39 |
-
|
| 40 |
-
将整个 `mfa_engine` 文件夹复制到项目的 `tools/` 目录下,最终结构:
|
| 41 |
-
|
| 42 |
-
```
|
| 43 |
-
项目根目录/
|
| 44 |
-
└── tools/
|
| 45 |
-
└── mfa_engine/
|
| 46 |
-
├── python.exe
|
| 47 |
-
├── Scripts/
|
| 48 |
-
│ └── mfa.exe
|
| 49 |
-
└── ...
|
| 50 |
-
```
|
| 51 |
-
|
| 52 |
-
### 3. 验证安装
|
| 53 |
-
|
| 54 |
-
运行以下命令验证 MFA 是否可用:
|
| 55 |
-
|
| 56 |
-
```bash
|
| 57 |
-
tools\mfa_engine\Scripts\mfa.exe version
|
| 58 |
-
```
|
| 59 |
-
|
| 60 |
-
如果正确输出版本号,说明安装成功。
|
| 61 |
-
|
| 62 |
-
## 常见问题
|
| 63 |
-
|
| 64 |
-
安装过程中如遇到问题或报错,建议将错误信息提供给 AI 助手寻求帮助。
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
plans/cvvc_export_design.md
ADDED
|
@@ -0,0 +1,339 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# CVVC 音源导出功能设计方案
|
| 2 |
+
|
| 3 |
+
## 1. 概述
|
| 4 |
+
|
| 5 |
+
本方案为 [`utau_oto_export.py`](src/export_plugins/utau_oto_export.py) 插件添加 CVVC(Consonant-Vowel-Vowel-Consonant)音源导出功能。CVVC 相比传统 CV 音源,额外生成 **VC 部(元音到辅音过渡)** 条目,使音源在连续演唱时过渡更加自然流畅。
|
| 6 |
+
|
| 7 |
+
## 2. CVVC 音源结构
|
| 8 |
+
|
| 9 |
+
### 2.1 条目类型
|
| 10 |
+
|
| 11 |
+
| 类型 | 别名格式 | 示例 | 说明 |
|
| 12 |
+
|------|----------|------|------|
|
| 13 |
+
| **CV** | `{辅音}{元音}` | `ba`, `ka`, `ni` | 辅音+元音(现有功能) |
|
| 14 |
+
| **V** | `- {元音}` | `- a`, `- i` | 句首元音(现有功能,纯元音) |
|
| 15 |
+
| **VC** | `{元音} {辅音}` | `a k`, `i n` | 元音到辅音过渡(**新增**) |
|
| 16 |
+
| **VV** | `{元音} {元音}` | `a i`, `i u` | 元音到元音过渡(可选,暂不实现) |
|
| 17 |
+
|
| 18 |
+
### 2.2 VC 部参数计算
|
| 19 |
+
|
| 20 |
+
VC 部捕捉从元音到下一个辅音的过渡,参数计算逻辑如下:
|
| 21 |
+
|
| 22 |
+
```
|
| 23 |
+
音频时间线: |----元音(V)----|----辅音(C)----|
|
| 24 |
+
^ ^
|
| 25 |
+
VC开始 VC结束
|
| 26 |
+
(offset) (cutoff位置)
|
| 27 |
+
```
|
| 28 |
+
|
| 29 |
+
**VC 部参数说明:**
|
| 30 |
+
|
| 31 |
+
| 参数 | 计算方式 | 说明 |
|
| 32 |
+
|------|----------|------|
|
| 33 |
+
| offset | `vowel_end - vowel_duration × vc_offset_ratio` | VC 开始位置,在元音后半段 |
|
| 34 |
+
| consonant | `min(30, (consonant_end - offset) × 0.3)` | 固定区域,较短 |
|
| 35 |
+
| cutoff | `-(consonant_end - offset)` | 负值,到辅音结束 |
|
| 36 |
+
| preutterance | `vowel_end - offset` | 从 offset 到辅音开始的距离 |
|
| 37 |
+
| overlap | `preutterance × overlap_ratio` | 与前一音符的交叉淡化 |
|
| 38 |
+
|
| 39 |
+
## 3. 代码修改设计
|
| 40 |
+
|
| 41 |
+
### 3.1 新增配置选项
|
| 42 |
+
|
| 43 |
+
在 [`get_options()`](src/export_plugins/utau_oto_export.py:254) 方法中添加以下选项:
|
| 44 |
+
|
| 45 |
+
```python
|
| 46 |
+
PluginOption(
|
| 47 |
+
key="cvvc_mode",
|
| 48 |
+
label="CVVC 模式",
|
| 49 |
+
option_type=OptionType.SWITCH,
|
| 50 |
+
default=False,
|
| 51 |
+
description="启用 CVVC 模式,额外生成 VC 部(元音到辅音过渡)条目"
|
| 52 |
+
),
|
| 53 |
+
PluginOption(
|
| 54 |
+
key="vc_alias_separator",
|
| 55 |
+
label="VC 别名分隔符",
|
| 56 |
+
option_type=OptionType.COMBO,
|
| 57 |
+
default=" ",
|
| 58 |
+
choices=[" ", "_", "-"],
|
| 59 |
+
description="VC 部别名中元音和辅音之间的分隔符",
|
| 60 |
+
visible_when={"cvvc_mode": True}
|
| 61 |
+
),
|
| 62 |
+
PluginOption(
|
| 63 |
+
key="vc_offset_ratio",
|
| 64 |
+
label="VC 偏移比例",
|
| 65 |
+
option_type=OptionType.NUMBER,
|
| 66 |
+
default=0.5,
|
| 67 |
+
min_value=0.3,
|
| 68 |
+
max_value=0.8,
|
| 69 |
+
description="VC 部开始位置 = 元音结束位置 - 元音时长 × 此比例",
|
| 70 |
+
visible_when={"cvvc_mode": True}
|
| 71 |
+
),
|
| 72 |
+
PluginOption(
|
| 73 |
+
key="vc_overlap_ratio",
|
| 74 |
+
label="VC Overlap 比例",
|
| 75 |
+
option_type=OptionType.NUMBER,
|
| 76 |
+
default=0.5,
|
| 77 |
+
min_value=0.3,
|
| 78 |
+
max_value=0.8,
|
| 79 |
+
description="VC 部的 Overlap = Preutterance × 此比例",
|
| 80 |
+
visible_when={"cvvc_mode": True}
|
| 81 |
+
),
|
| 82 |
+
```
|
| 83 |
+
|
| 84 |
+
### 3.2 新增方法
|
| 85 |
+
|
| 86 |
+
#### 3.2.1 `_extract_vc_pairs()` - 提取 VC 对
|
| 87 |
+
|
| 88 |
+
在 [`_extract_cv_pairs()`](src/export_plugins/utau_oto_export.py:534) 方法基础上,新增 VC 对提取逻辑:
|
| 89 |
+
|
| 90 |
+
```python
|
| 91 |
+
def _extract_vc_pairs(
|
| 92 |
+
self,
|
| 93 |
+
words_tier,
|
| 94 |
+
phones_tier,
|
| 95 |
+
wav_name: str,
|
| 96 |
+
wav_duration_ms: float,
|
| 97 |
+
language: str,
|
| 98 |
+
use_hiragana: bool,
|
| 99 |
+
vc_offset_ratio: float,
|
| 100 |
+
vc_overlap_ratio: float,
|
| 101 |
+
vc_separator: str
|
| 102 |
+
) -> List[Dict]:
|
| 103 |
+
"""
|
| 104 |
+
从 phones 层提取元音+辅音对(VC 部)
|
| 105 |
+
|
| 106 |
+
VC 部捕捉从当前元音到下一个辅音的过渡
|
| 107 |
+
"""
|
| 108 |
+
entries = []
|
| 109 |
+
intervals = list(phones_tier)
|
| 110 |
+
|
| 111 |
+
for i, interval in enumerate(intervals):
|
| 112 |
+
phone = interval.mark.strip()
|
| 113 |
+
|
| 114 |
+
if phone in SKIP_MARKS:
|
| 115 |
+
continue
|
| 116 |
+
|
| 117 |
+
# 当前是元音,检查下一个是否是辅音
|
| 118 |
+
if is_vowel(phone, language):
|
| 119 |
+
vowel = phone
|
| 120 |
+
vowel_start_ms = interval.minTime * 1000
|
| 121 |
+
vowel_end_ms = interval.maxTime * 1000
|
| 122 |
+
vowel_duration = vowel_end_ms - vowel_start_ms
|
| 123 |
+
|
| 124 |
+
# 检查下一个音素
|
| 125 |
+
if i + 1 < len(intervals):
|
| 126 |
+
next_interval = intervals[i + 1]
|
| 127 |
+
next_phone = next_interval.mark.strip()
|
| 128 |
+
|
| 129 |
+
if next_phone not in SKIP_MARKS and is_consonant(next_phone, language):
|
| 130 |
+
consonant = next_phone
|
| 131 |
+
consonant_end_ms = next_interval.maxTime * 1000
|
| 132 |
+
|
| 133 |
+
# 生成 VC 别名
|
| 134 |
+
v_alias = ipa_to_alias(None, vowel, language, use_hiragana)
|
| 135 |
+
c_alias = ipa_to_alias(consonant, None, language, use_hiragana)
|
| 136 |
+
|
| 137 |
+
if v_alias and c_alias:
|
| 138 |
+
vc_alias = f"{v_alias}{vc_separator}{c_alias}"
|
| 139 |
+
|
| 140 |
+
# 计算 VC 参数
|
| 141 |
+
entry = self._calculate_vc_params(
|
| 142 |
+
wav_name=wav_name,
|
| 143 |
+
alias=vc_alias,
|
| 144 |
+
vowel_start_ms=vowel_start_ms,
|
| 145 |
+
vowel_end_ms=vowel_end_ms,
|
| 146 |
+
consonant_end_ms=consonant_end_ms,
|
| 147 |
+
wav_duration_ms=wav_duration_ms,
|
| 148 |
+
vc_offset_ratio=vc_offset_ratio,
|
| 149 |
+
vc_overlap_ratio=vc_overlap_ratio
|
| 150 |
+
)
|
| 151 |
+
entries.append(entry)
|
| 152 |
+
|
| 153 |
+
return entries
|
| 154 |
+
```
|
| 155 |
+
|
| 156 |
+
#### 3.2.2 `_calculate_vc_params()` - 计算 VC 参数
|
| 157 |
+
|
| 158 |
+
```python
|
| 159 |
+
def _calculate_vc_params(
|
| 160 |
+
self,
|
| 161 |
+
wav_name: str,
|
| 162 |
+
alias: str,
|
| 163 |
+
vowel_start_ms: float,
|
| 164 |
+
vowel_end_ms: float,
|
| 165 |
+
consonant_end_ms: float,
|
| 166 |
+
wav_duration_ms: float,
|
| 167 |
+
vc_offset_ratio: float,
|
| 168 |
+
vc_overlap_ratio: float
|
| 169 |
+
) -> Dict:
|
| 170 |
+
"""
|
| 171 |
+
计算 VC 部的 oto.ini 参数
|
| 172 |
+
|
| 173 |
+
VC 部从元音后半段开始,到辅音结束
|
| 174 |
+
"""
|
| 175 |
+
vowel_duration = vowel_end_ms - vowel_start_ms
|
| 176 |
+
|
| 177 |
+
# offset: 元音后半段位置
|
| 178 |
+
offset = vowel_end_ms - vowel_duration * vc_offset_ratio
|
| 179 |
+
|
| 180 |
+
# 总时长
|
| 181 |
+
segment_duration = consonant_end_ms - offset
|
| 182 |
+
|
| 183 |
+
# preutterance: 从 offset 到辅音开始(即元音结束)的距离
|
| 184 |
+
preutterance = vowel_end_ms - offset
|
| 185 |
+
|
| 186 |
+
# consonant: 固定区域,较短
|
| 187 |
+
consonant = min(30, segment_duration * 0.3)
|
| 188 |
+
|
| 189 |
+
# overlap: 较大,平滑过渡
|
| 190 |
+
overlap = preutterance * vc_overlap_ratio
|
| 191 |
+
|
| 192 |
+
# cutoff: 负值,表示总时长
|
| 193 |
+
cutoff = -segment_duration
|
| 194 |
+
|
| 195 |
+
return {
|
| 196 |
+
"wav_name": wav_name,
|
| 197 |
+
"alias": alias,
|
| 198 |
+
"offset": round(offset, 1),
|
| 199 |
+
"consonant": round(consonant, 1),
|
| 200 |
+
"cutoff": round(cutoff, 1),
|
| 201 |
+
"preutterance": round(preutterance, 1),
|
| 202 |
+
"overlap": round(overlap, 1),
|
| 203 |
+
"segment_duration": segment_duration,
|
| 204 |
+
"is_vc": True # 标记为 VC 部
|
| 205 |
+
}
|
| 206 |
+
```
|
| 207 |
+
|
| 208 |
+
### 3.3 修改 `_parse_textgrids()` 方法
|
| 209 |
+
|
| 210 |
+
在 [`_parse_textgrids()`](src/export_plugins/utau_oto_export.py:463) 中添加 CVVC 模式支持:
|
| 211 |
+
|
| 212 |
+
```python
|
| 213 |
+
def _parse_textgrids(
|
| 214 |
+
self,
|
| 215 |
+
slices_dir: str,
|
| 216 |
+
textgrid_dir: str,
|
| 217 |
+
language: str,
|
| 218 |
+
use_hiragana: bool,
|
| 219 |
+
overlap_ratio: float,
|
| 220 |
+
cvvc_mode: bool = False, # 新增
|
| 221 |
+
vc_offset_ratio: float = 0.5, # 新增
|
| 222 |
+
vc_overlap_ratio: float = 0.5, # 新增
|
| 223 |
+
vc_separator: str = " " # 新增
|
| 224 |
+
) -> Tuple[List[Dict], set]:
|
| 225 |
+
# ... 现有代码 ...
|
| 226 |
+
|
| 227 |
+
# 提取 CV 对(现有逻辑)
|
| 228 |
+
entries = self._extract_cv_pairs(...)
|
| 229 |
+
oto_entries.extend(entries)
|
| 230 |
+
|
| 231 |
+
# 如果启用 CVVC 模式,额外提取 VC 对
|
| 232 |
+
if cvvc_mode:
|
| 233 |
+
vc_entries = self._extract_vc_pairs(
|
| 234 |
+
words_tier, phones_tier, wav_name, wav_duration_ms,
|
| 235 |
+
language, use_hiragana,
|
| 236 |
+
vc_offset_ratio, vc_overlap_ratio, vc_separator
|
| 237 |
+
)
|
| 238 |
+
oto_entries.extend(vc_entries)
|
| 239 |
+
|
| 240 |
+
# ... 现有代码 ...
|
| 241 |
+
```
|
| 242 |
+
|
| 243 |
+
### 3.4 修改 `export()` 方法
|
| 244 |
+
|
| 245 |
+
在 [`export()`](src/export_plugins/utau_oto_export.py:353) 中读取 CVVC 相关选项:
|
| 246 |
+
|
| 247 |
+
```python
|
| 248 |
+
def export(self, source_name: str, bank_dir: str, options: Dict[str, Any]) -> Tuple[bool, str]:
|
| 249 |
+
# ... 现有选项读取 ...
|
| 250 |
+
|
| 251 |
+
# CVVC 模式选项
|
| 252 |
+
cvvc_mode = options.get("cvvc_mode", False)
|
| 253 |
+
vc_separator = options.get("vc_alias_separator", " ")
|
| 254 |
+
vc_offset_ratio = float(options.get("vc_offset_ratio", 0.5))
|
| 255 |
+
vc_overlap_ratio = float(options.get("vc_overlap_ratio", 0.5))
|
| 256 |
+
|
| 257 |
+
# 调用 _parse_textgrids 时传入新参数
|
| 258 |
+
oto_entries, wav_files = self._parse_textgrids(
|
| 259 |
+
paths["slices_dir"],
|
| 260 |
+
paths["textgrid_dir"],
|
| 261 |
+
language,
|
| 262 |
+
use_hiragana,
|
| 263 |
+
overlap_ratio,
|
| 264 |
+
cvvc_mode=cvvc_mode,
|
| 265 |
+
vc_offset_ratio=vc_offset_ratio,
|
| 266 |
+
vc_overlap_ratio=vc_overlap_ratio,
|
| 267 |
+
vc_separator=vc_separator
|
| 268 |
+
)
|
| 269 |
+
|
| 270 |
+
# ... 现有代码 ...
|
| 271 |
+
```
|
| 272 |
+
|
| 273 |
+
## 4. 流程图
|
| 274 |
+
|
| 275 |
+
```mermaid
|
| 276 |
+
flowchart TD
|
| 277 |
+
A[开始导出] --> B{CVVC 模式?}
|
| 278 |
+
B -->|否| C[仅提取 CV 对]
|
| 279 |
+
B -->|是| D[提取 CV 对]
|
| 280 |
+
D --> E[提取 VC 对]
|
| 281 |
+
E --> F[合并条目]
|
| 282 |
+
C --> G[筛选最佳样本]
|
| 283 |
+
F --> G
|
| 284 |
+
G --> H[复制音频文件]
|
| 285 |
+
H --> I[写入 oto.ini]
|
| 286 |
+
I --> J[写入 character.txt]
|
| 287 |
+
J --> K[导出完成]
|
| 288 |
+
```
|
| 289 |
+
|
| 290 |
+
## 5. VC 部提取流程
|
| 291 |
+
|
| 292 |
+
```mermaid
|
| 293 |
+
flowchart LR
|
| 294 |
+
subgraph TextGrid
|
| 295 |
+
V1[元音 V] --> C1[辅音 C]
|
| 296 |
+
C1 --> V2[元音 V]
|
| 297 |
+
V2 --> C2[辅音 C]
|
| 298 |
+
end
|
| 299 |
+
|
| 300 |
+
subgraph VC条目
|
| 301 |
+
VC1[V C - VC部]
|
| 302 |
+
VC2[V C - VC部]
|
| 303 |
+
end
|
| 304 |
+
|
| 305 |
+
V1 -.-> VC1
|
| 306 |
+
C1 -.-> VC1
|
| 307 |
+
V2 -.-> VC2
|
| 308 |
+
C2 -.-> VC2
|
| 309 |
+
```
|
| 310 |
+
|
| 311 |
+
## 6. 实现步骤
|
| 312 |
+
|
| 313 |
+
1. **添加配置选项** - 在 [`get_options()`](src/export_plugins/utau_oto_export.py:254) 中添加 CVVC 相关选项
|
| 314 |
+
2. **实现 VC 参数计算** - 新增 `_calculate_vc_params()` 方法
|
| 315 |
+
3. **实现 VC 对提取** - 新增 `_extract_vc_pairs()` 方法
|
| 316 |
+
4. **修改解析逻辑** - 更新 [`_parse_textgrids()`](src/export_plugins/utau_oto_export.py:463) 支持 CVVC 模式
|
| 317 |
+
5. **修改导出入口** - 更新 [`export()`](src/export_plugins/utau_oto_export.py:353) 读取新选项
|
| 318 |
+
6. **更新版本号** - 将版本从 1.1.0 更新为 1.2.0
|
| 319 |
+
|
| 320 |
+
## 7. 预期输出示例
|
| 321 |
+
|
| 322 |
+
启用 CVVC 模式后,oto.ini 将包含:
|
| 323 |
+
|
| 324 |
+
```ini
|
| 325 |
+
# CV 部(现有)
|
| 326 |
+
test_0000.wav=ba,30,50,-110,50,15
|
| 327 |
+
test_0000.wav=ka,140,60,-140,60,18
|
| 328 |
+
|
| 329 |
+
# VC 部(新增)
|
| 330 |
+
test_0000.wav=a k,70,20,-90,40,20
|
| 331 |
+
test_0000.wav=a n,180,25,-100,45,22
|
| 332 |
+
```
|
| 333 |
+
|
| 334 |
+
## 8. 注意事项
|
| 335 |
+
|
| 336 |
+
1. **跨字边界**:VC 部可能跨越 words 层的边界,需要决定是否限制在同一个字内
|
| 337 |
+
2. **别名冲突**:VC 别名可能与 CV 别名冲突,需要确保分隔符正确
|
| 338 |
+
3. **质量筛选**:VC 部也需要参与质量评分和筛选
|
| 339 |
+
4. **编码兼容**:VC 别名中的分隔符需要兼容目标编码(如 Shift_JIS)
|
plans/cvvc_implementation_summary.md
ADDED
|
@@ -0,0 +1,128 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# CVVC 音源导出功能实现总结
|
| 2 |
+
|
| 3 |
+
## 实现完成时间
|
| 4 |
+
2026-02-04
|
| 5 |
+
|
| 6 |
+
## 版本更新
|
| 7 |
+
- 插件版本从 1.1.0 更新至 **1.2.0**
|
| 8 |
+
|
| 9 |
+
## 新增功能
|
| 10 |
+
|
| 11 |
+
### 1. CVVC 模式支持
|
| 12 |
+
为 UTAU oto.ini 导出插件添加了 CVVC(Consonant-Vowel-Vowel-Consonant)音源导出功能,可额外生成 **VC 部(元音到辅音过渡)** 条目。
|
| 13 |
+
|
| 14 |
+
### 2. 新增配置选项
|
| 15 |
+
|
| 16 |
+
在 [`get_options()`](src/export_plugins/utau_oto_export.py:254) 方法中添加了 4 个新选项:
|
| 17 |
+
|
| 18 |
+
| 选项 | 类型 | 默认值 | 说明 |
|
| 19 |
+
|------|------|--------|------|
|
| 20 |
+
| `cvvc_mode` | 开关 | False | 启用/禁用 CVVC 模式 |
|
| 21 |
+
| `vc_alias_separator` | 下拉 | " " (空格) | VC 别名分隔符(空格/下划线/连字符) |
|
| 22 |
+
| `vc_offset_ratio` | 数字 | 0.5 | VC 偏移比例(0.3-0.8) |
|
| 23 |
+
| `vc_overlap_ratio` | 数字 | 0.5 | VC Overlap 比例(0.3-0.8) |
|
| 24 |
+
|
| 25 |
+
### 3. 新增方法
|
| 26 |
+
|
| 27 |
+
#### [`_calculate_vc_params()`](src/export_plugins/utau_oto_export.py:688)
|
| 28 |
+
计算 VC 部的 oto.ini 参数,包括:
|
| 29 |
+
- **offset**: 元音后半段位置
|
| 30 |
+
- **consonant**: 固定区域(较短)
|
| 31 |
+
- **cutoff**: 负值,到辅音结束
|
| 32 |
+
- **preutterance**: 从 offset 到辅音开始的距离
|
| 33 |
+
- **overlap**: 较大,平滑过渡
|
| 34 |
+
|
| 35 |
+
#### [`_extract_vc_pairs()`](src/export_plugins/utau_oto_export.py:649)
|
| 36 |
+
从 TextGrid 的 phones 层提取元音+辅音对(VC 部),遍历音素序列,当检测到元音后跟辅音时生成 VC 条目。
|
| 37 |
+
|
| 38 |
+
### 4. 修改的方法
|
| 39 |
+
|
| 40 |
+
#### [`_parse_textgrids()`](src/export_plugins/utau_oto_export.py:539)
|
| 41 |
+
- 添加了 4 个新参数支持 CVVC 模式
|
| 42 |
+
- 在提取 CV 对后,如果启用 CVVC 模式,额外调用 [`_extract_vc_pairs()`](src/export_plugins/utau_oto_export.py:649) 提取 VC 对
|
| 43 |
+
|
| 44 |
+
#### [`export()`](src/export_plugins/utau_oto_export.py:397)
|
| 45 |
+
- 读取 CVVC 相关配置选项
|
| 46 |
+
- 根据 CVVC 模式显示不同的日志信息
|
| 47 |
+
- 将 CVVC 参数传递给 [`_parse_textgrids()`](src/export_plugins/utau_oto_export.py:539)
|
| 48 |
+
|
| 49 |
+
## 工作原理
|
| 50 |
+
|
| 51 |
+
### VC 部提取流程
|
| 52 |
+
|
| 53 |
+
```
|
| 54 |
+
TextGrid phones 层:
|
| 55 |
+
[元音 V] → [辅音 C] → [元音 V] → [辅音 C]
|
| 56 |
+
↓ ↓ ↓ ↓
|
| 57 |
+
生成 VC 条目: [V C] [V C]
|
| 58 |
+
```
|
| 59 |
+
|
| 60 |
+
### VC 参数计算示例
|
| 61 |
+
|
| 62 |
+
假设:
|
| 63 |
+
- 元音时长:100ms (0-100ms)
|
| 64 |
+
- 辅音时长:60ms (100-160ms)
|
| 65 |
+
- `vc_offset_ratio` = 0.5
|
| 66 |
+
- `vc_overlap_ratio` = 0.5
|
| 67 |
+
|
| 68 |
+
计算结果:
|
| 69 |
+
- **offset** = 100 - 100×0.5 = 50ms
|
| 70 |
+
- **segment_duration** = 160 - 50 = 110ms
|
| 71 |
+
- **preutterance** = 100 - 50 = 50ms
|
| 72 |
+
- **consonant** = min(30, 110×0.3) = 30ms
|
| 73 |
+
- **overlap** = 50×0.5 = 25ms
|
| 74 |
+
- **cutoff** = -110ms
|
| 75 |
+
|
| 76 |
+
## 输出示例
|
| 77 |
+
|
| 78 |
+
启用 CVVC 模式后,oto.ini 将包含:
|
| 79 |
+
|
| 80 |
+
```ini
|
| 81 |
+
# CV 部(现有功能)
|
| 82 |
+
test_0000.wav=ba,30,50,-110,50,15
|
| 83 |
+
test_0000.wav=ka,140,60,-140,60,18
|
| 84 |
+
|
| 85 |
+
# VC 部(新增功能)
|
| 86 |
+
test_0000.wav=a k,70,20,-90,40,20
|
| 87 |
+
test_0000.wav=a n,180,25,-100,45,22
|
| 88 |
+
```
|
| 89 |
+
|
| 90 |
+
## 代码验证
|
| 91 |
+
|
| 92 |
+
✅ Python 语法检查通过
|
| 93 |
+
```bash
|
| 94 |
+
py -m py_compile src\export_plugins\utau_oto_export.py
|
| 95 |
+
# Exit code: 0 (成功)
|
| 96 |
+
```
|
| 97 |
+
|
| 98 |
+
## 使用方法
|
| 99 |
+
|
| 100 |
+
1. 在导出插件界面中找到 "UTAU oto.ini 导出" 插件
|
| 101 |
+
2. 启用 **"CVVC 模式"** 开关
|
| 102 |
+
3. 根据需要调整以下参数:
|
| 103 |
+
- **VC 别名分隔符**:选择空格、下划线或连字符
|
| 104 |
+
- **VC 偏移比例**:控制 VC 开始位置(推荐 0.5)
|
| 105 |
+
- **VC Overlap 比例**:控制过渡平滑度(推荐 0.5)
|
| 106 |
+
4. 执行导出
|
| 107 |
+
|
| 108 |
+
## 技术特点
|
| 109 |
+
|
| 110 |
+
1. **无损兼容**:CVVC 模式为可选功能,不影响现有 CV 导出
|
| 111 |
+
2. **参数可调**:提供多个参数供用户微调 VC 部效果
|
| 112 |
+
3. **自动提取**:从 TextGrid 自动识别元音-辅音序列
|
| 113 |
+
4. **质量筛选**:VC 部条目同样参与质量评分和筛选
|
| 114 |
+
5. **编码兼容**:VC 别名支持多种分隔符,兼容不同编码
|
| 115 |
+
|
| 116 |
+
## 注意事项
|
| 117 |
+
|
| 118 |
+
1. VC 部的提取依赖于 TextGrid 中音素的正确标注
|
| 119 |
+
2. VC 别名使用分隔符(默认空格)连接元音和辅音
|
| 120 |
+
3. VC 参数的计算基于元音和辅音的时间边界
|
| 121 |
+
4. 建议先用小数据集测试参数效果,再批量导出
|
| 122 |
+
|
| 123 |
+
## 后续优化建议
|
| 124 |
+
|
| 125 |
+
1. 支持 VV 部(元音到元音过渡)
|
| 126 |
+
2. 支持跨字边界的 VC 提取控制
|
| 127 |
+
3. 添加 VC 部专用的质量评估指标
|
| 128 |
+
4. 支持自定义 VC 别名格式模板
|
requirements.txt
CHANGED
|
Binary files a/requirements.txt and b/requirements.txt differ
|
|
|
run_portable.bat
ADDED
|
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
@echo off
|
| 2 |
+
chcp 65001 >nul
|
| 3 |
+
echo 启动人力V助手 (便携版)...
|
| 4 |
+
set PYTHONPATH=%~dp0
|
| 5 |
+
|
| 6 |
+
set MFA_ROOT_DIR=%~dp0mfa_data
|
| 7 |
+
set PATH=%PATH%;%~dp0tools\ffmpeg\bin
|
| 8 |
+
|
| 9 |
+
"%~dp0python\python.exe" "%~dp0main.py"
|
| 10 |
+
pause
|
src/export_plugins/utau_oto_export.py
CHANGED
|
@@ -23,28 +23,49 @@ logger = logging.getLogger(__name__)
|
|
| 23 |
|
| 24 |
# 中文辅音(MFA 输出的 IPA 符号)
|
| 25 |
CHINESE_CONSONANTS = {
|
| 26 |
-
|
| 27 |
-
'
|
| 28 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 29 |
'tɕ', 'tɕʰ', 'dʑ', 'ɕ', 'ʑ',
|
|
|
|
| 30 |
'ts', 'tsʰ', 'dz', 's', 'z',
|
|
|
|
| 31 |
'ʈʂ', 'ʈʂʰ', 'ɖʐ', 'ʂ', 'ʐ',
|
|
|
|
| 32 |
'ɲ', 'j', 'w', 'ɥ',
|
| 33 |
-
|
|
|
|
| 34 |
}
|
| 35 |
|
| 36 |
# 中文元音(可能带声调标记)
|
|
|
|
| 37 |
CHINESE_VOWELS = {
|
|
|
|
| 38 |
'a', 'o', 'e', 'i', 'u', 'y', 'ü',
|
| 39 |
'ə', 'ɛ', 'ɔ', 'ɤ', 'ɨ', 'ʅ', 'ʉ',
|
| 40 |
-
|
| 41 |
-
'
|
| 42 |
-
|
| 43 |
-
'
|
| 44 |
-
|
| 45 |
-
'
|
| 46 |
-
|
| 47 |
-
'
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 48 |
}
|
| 49 |
|
| 50 |
# 日语辅音
|
|
@@ -86,6 +107,20 @@ FUZZY_VOWEL_GROUPS = [
|
|
| 86 |
('in', 'ing'), # 前鼻/后鼻
|
| 87 |
('ian', 'iang'), # 前鼻/后鼻
|
| 88 |
('uan', 'uang'), # 前鼻/后鼻
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 89 |
]
|
| 90 |
|
| 91 |
|
|
@@ -105,11 +140,24 @@ def is_vowel(phone: str, language: str) -> bool:
|
|
| 105 |
base_phone = _strip_tone(phone)
|
| 106 |
|
| 107 |
if language in ('chinese', 'zh', 'mandarin'):
|
|
|
|
| 108 |
if base_phone in CHINESE_VOWELS:
|
| 109 |
return True
|
| 110 |
-
|
|
|
|
|
|
|
|
|
|
| 111 |
if base_phone.startswith(v):
|
| 112 |
return True
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 113 |
return False
|
| 114 |
elif language in ('japanese', 'ja', 'jp'):
|
| 115 |
return base_phone in JAPANESE_VOWELS or base_phone.rstrip('ː') in {'a', 'i', 'ɯ', 'u', 'e', 'o'}
|
|
@@ -127,27 +175,150 @@ def _strip_tone(phone: str) -> str:
|
|
| 127 |
|
| 128 |
# ==================== IPA 到别名转换 ====================
|
| 129 |
|
| 130 |
-
# 中文 IPA 到拼音映射
|
| 131 |
-
|
| 132 |
-
|
| 133 |
-
'p': 'b', 'pʰ': 'p', 'pʲ': 'p',
|
| 134 |
'm': 'm', 'f': 'f',
|
| 135 |
-
't': 'd', 'tʰ': 't',
|
| 136 |
'n': 'n', 'l': 'l',
|
| 137 |
-
'k': 'g', 'kʰ': 'k',
|
|
|
|
| 138 |
'x': 'h', 'h': 'h',
|
| 139 |
'tɕ': 'j', 'tɕʰ': 'q', 'ɕ': 'x',
|
| 140 |
'ts': 'z', 'tsʰ': 'c', 's': 's',
|
| 141 |
'ʈʂ': 'zh', 'ʈʂʰ': 'ch', 'ʂ': 'sh', 'ʐ': 'r',
|
| 142 |
-
'ɲ': 'n', 'ŋ': '
|
| 143 |
-
'j': '
|
| 144 |
'ʔ': '',
|
| 145 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
| 146 |
'a': 'a', 'o': 'o', 'e': 'e', 'i': 'i', 'u': 'u', 'y': 'v', 'ü': 'v',
|
| 147 |
-
'ə': 'e', 'ɛ': 'e', 'ɔ': 'o', 'ɤ': 'e',
|
| 148 |
-
|
| 149 |
-
'aw': 'ao', 'ej': 'ei', 'ow': 'ou',
|
| 150 |
-
'
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 151 |
}
|
| 152 |
|
| 153 |
# 日语 IPA 到罗马音映射
|
|
@@ -214,17 +385,13 @@ ROMAJI_TO_HIRAGANA = {
|
|
| 214 |
|
| 215 |
|
| 216 |
def ipa_to_alias(consonant: Optional[str], vowel: Optional[str], language: str, use_hiragana: bool = False) -> Optional[str]:
|
| 217 |
-
"""将 IPA 音素转换为别名"""
|
| 218 |
c_base = _strip_tone(consonant) if consonant else ''
|
| 219 |
v_base = _strip_tone(vowel) if vowel else ''
|
| 220 |
|
| 221 |
if language in ('chinese', 'zh', 'mandarin'):
|
| 222 |
-
|
| 223 |
-
|
| 224 |
-
alias = (c_alias or '') + (v_alias or '')
|
| 225 |
-
# 清理非 ASCII 字符
|
| 226 |
-
alias = ''.join(c for c in alias if c.isascii() and (c.isalnum() or c == '_'))
|
| 227 |
-
return alias.lower() if alias else None
|
| 228 |
else:
|
| 229 |
# 日语
|
| 230 |
c_alias = JAPANESE_IPA_TO_ROMAJI.get(c_base, c_base)
|
|
@@ -243,12 +410,108 @@ def ipa_to_alias(consonant: Optional[str], vowel: Optional[str], language: str,
|
|
| 243 |
return romaji
|
| 244 |
|
| 245 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 246 |
class UTAUOtoExportPlugin(ExportPlugin):
|
| 247 |
"""UTAU oto.ini 导出插件"""
|
| 248 |
|
| 249 |
name = "UTAU oto.ini 导出"
|
| 250 |
description = "从 TextGrid 生成 UTAU 音源配置文件,一个 wav 可包含多条配置"
|
| 251 |
-
version = "1.
|
| 252 |
author = "内置"
|
| 253 |
|
| 254 |
def get_options(self) -> List[PluginOption]:
|
|
@@ -348,6 +611,42 @@ class UTAUOtoExportPlugin(ExportPlugin):
|
|
| 348 |
default="",
|
| 349 |
description="character.txt 中的角色名,留空则使用音源名称"
|
| 350 |
),
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 351 |
]
|
| 352 |
|
| 353 |
def export(
|
|
@@ -375,6 +674,12 @@ class UTAUOtoExportPlugin(ExportPlugin):
|
|
| 375 |
fuzzy_phoneme = options.get("fuzzy_phoneme", False)
|
| 376 |
use_hiragana = (alias_style == "hiragana") and language in ('japanese', 'ja', 'jp')
|
| 377 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 378 |
# 使用基类方法解析质量评估维度
|
| 379 |
enabled_metrics = self.parse_quality_metrics(quality_metrics)
|
| 380 |
|
|
@@ -384,13 +689,20 @@ class UTAUOtoExportPlugin(ExportPlugin):
|
|
| 384 |
os.makedirs(export_dir, exist_ok=True)
|
| 385 |
|
| 386 |
# 步骤1: 解析 TextGrid 并生成 oto 条目
|
| 387 |
-
|
|
|
|
|
|
|
|
|
|
| 388 |
oto_entries, wav_files = self._parse_textgrids(
|
| 389 |
paths["slices_dir"],
|
| 390 |
paths["textgrid_dir"],
|
| 391 |
language,
|
| 392 |
use_hiragana,
|
| 393 |
-
overlap_ratio
|
|
|
|
|
|
|
|
|
|
|
|
|
| 394 |
)
|
| 395 |
|
| 396 |
if not oto_entries:
|
|
@@ -466,9 +778,25 @@ class UTAUOtoExportPlugin(ExportPlugin):
|
|
| 466 |
textgrid_dir: str,
|
| 467 |
language: str,
|
| 468 |
use_hiragana: bool,
|
| 469 |
-
overlap_ratio: float
|
|
|
|
|
|
|
|
|
|
|
|
|
| 470 |
) -> Tuple[List[Dict], set]:
|
| 471 |
-
"""解析 TextGrid 文件,提取音素边界
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 472 |
import textgrid
|
| 473 |
import soundfile as sf
|
| 474 |
|
|
@@ -522,12 +850,21 @@ class UTAUOtoExportPlugin(ExportPlugin):
|
|
| 522 |
if phones_tier is None:
|
| 523 |
continue
|
| 524 |
|
| 525 |
-
# 提取
|
| 526 |
entries = self._extract_cv_pairs(
|
| 527 |
words_tier, phones_tier, wav_name, wav_duration_ms,
|
| 528 |
language, use_hiragana, overlap_ratio
|
| 529 |
)
|
| 530 |
oto_entries.extend(entries)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 531 |
|
| 532 |
return oto_entries, wav_files
|
| 533 |
|
|
@@ -542,8 +879,8 @@ class UTAUOtoExportPlugin(ExportPlugin):
|
|
| 542 |
overlap_ratio: float
|
| 543 |
) -> List[Dict]:
|
| 544 |
"""
|
| 545 |
-
从 phones 层提取辅音+元音
|
| 546 |
-
使用 words 层限制配对范围,确保
|
| 547 |
"""
|
| 548 |
entries = []
|
| 549 |
|
|
@@ -584,67 +921,648 @@ class UTAUOtoExportPlugin(ExportPlugin):
|
|
| 584 |
start_ms = interval.minTime * 1000
|
| 585 |
end_ms = interval.maxTime * 1000
|
| 586 |
|
| 587 |
-
|
| 588 |
-
|
| 589 |
-
|
| 590 |
-
|
| 591 |
-
|
|
|
|
| 592 |
|
| 593 |
-
|
| 594 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 595 |
|
| 596 |
-
# 检查
|
| 597 |
-
|
| 598 |
-
|
| 599 |
-
|
| 600 |
-
|
| 601 |
|
| 602 |
-
|
| 603 |
-
|
| 604 |
-
|
| 605 |
-
|
| 606 |
-
|
| 607 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 608 |
|
| 609 |
-
|
| 610 |
-
if
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 611 |
i += 1
|
| 612 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 613 |
|
| 614 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 615 |
|
| 616 |
-
|
| 617 |
-
|
| 618 |
-
|
| 619 |
-
|
| 620 |
-
|
| 621 |
-
|
| 622 |
-
|
| 623 |
-
|
| 624 |
-
|
| 625 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 626 |
|
| 627 |
-
|
| 628 |
-
|
| 629 |
-
|
| 630 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 631 |
continue
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 632 |
|
| 633 |
-
|
| 634 |
-
|
| 635 |
-
|
| 636 |
-
|
| 637 |
-
|
| 638 |
-
|
| 639 |
-
|
| 640 |
-
|
| 641 |
-
|
| 642 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 643 |
|
| 644 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 645 |
|
| 646 |
return entries
|
| 647 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 648 |
def _calculate_oto_params(
|
| 649 |
self,
|
| 650 |
wav_name: str,
|
|
@@ -684,6 +1602,67 @@ class UTAUOtoExportPlugin(ExportPlugin):
|
|
| 684 |
"segment_duration": segment_duration, # 用于排序
|
| 685 |
}
|
| 686 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 687 |
def _filter_by_alias(
|
| 688 |
self,
|
| 689 |
entries: List[Dict],
|
|
@@ -1331,7 +2310,12 @@ class UTAUOtoExportPlugin(ExportPlugin):
|
|
| 1331 |
|
| 1332 |
# 获取有效的元音列表(用于验证组合)
|
| 1333 |
if language in ('chinese', 'zh', 'mandarin'):
|
| 1334 |
-
valid_vowels = {'a', 'o', 'e', 'i', 'u', 'v',
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1335 |
else:
|
| 1336 |
valid_vowels = {'a', 'i', 'u', 'e', 'o'}
|
| 1337 |
|
|
@@ -1448,10 +2432,13 @@ class UTAUOtoExportPlugin(ExportPlugin):
|
|
| 1448 |
all_consonants = ['b', 'p', 'm', 'f', 'd', 't', 'n', 'l', 'g', 'k', 'h',
|
| 1449 |
'j', 'q', 'x', 'zh', 'ch', 'sh', 'r', 'z', 'c', 's', 'y', 'w']
|
| 1450 |
|
| 1451 |
-
# 中文所有可能的韵母
|
| 1452 |
-
all_vowels = ['a', 'o', 'e', 'i', 'u', 'v',
|
| 1453 |
-
'
|
| 1454 |
-
'
|
|
|
|
|
|
|
|
|
|
| 1455 |
|
| 1456 |
fuzzy_count = 0
|
| 1457 |
|
|
|
|
| 23 |
|
| 24 |
# 中文辅音(MFA 输出的 IPA 符号)
|
| 25 |
CHINESE_CONSONANTS = {
|
| 26 |
+
# 双唇音
|
| 27 |
+
'p', 'pʰ', 'pʲ', 'pʷ', 'b', 'm', 'f',
|
| 28 |
+
# 齿龈音
|
| 29 |
+
't', 'tʰ', 'tʲ', 'd', 'n', 'l',
|
| 30 |
+
# 软腭音
|
| 31 |
+
'k', 'kʰ', 'kʷ', 'ɡ', 'g', 'ŋ', 'x', 'h',
|
| 32 |
+
# 齿龈-硬腭音(j, q, x)
|
| 33 |
'tɕ', 'tɕʰ', 'dʑ', 'ɕ', 'ʑ',
|
| 34 |
+
# 齿龈塞擦音(z, c, s)
|
| 35 |
'ts', 'tsʰ', 'dz', 's', 'z',
|
| 36 |
+
# 卷舌音(zh, ch, sh, r)
|
| 37 |
'ʈʂ', 'ʈʂʰ', 'ɖʐ', 'ʂ', 'ʐ',
|
| 38 |
+
# 鼻音和近音
|
| 39 |
'ɲ', 'j', 'w', 'ɥ',
|
| 40 |
+
# 喉塞音
|
| 41 |
+
'ʔ',
|
| 42 |
}
|
| 43 |
|
| 44 |
# 中文元音(可能带声调标记)
|
| 45 |
+
# 注意:MFA 输出的元音通常是单个音素,复合韵母会被拆分成多个音素
|
| 46 |
CHINESE_VOWELS = {
|
| 47 |
+
# 基本单元音
|
| 48 |
'a', 'o', 'e', 'i', 'u', 'y', 'ü',
|
| 49 |
'ə', 'ɛ', 'ɔ', 'ɤ', 'ɨ', 'ʅ', 'ʉ',
|
| 50 |
+
# MFA 输出的特殊格式
|
| 51 |
+
'aw', 'ej', 'ow', # 双元音的 MFA 表示(ai, ei, ou)
|
| 52 |
+
# 舌尖元音(zi, ci, si, zhi, chi, shi, ri)
|
| 53 |
+
'z̩', 'ʐ̩',
|
| 54 |
+
# 卷舌近音(er)
|
| 55 |
+
'ɻ',
|
| 56 |
+
# 儿化音
|
| 57 |
+
'ɚ',
|
| 58 |
+
}
|
| 59 |
+
|
| 60 |
+
# 中文介音(声母和韵母之间的过渡音)
|
| 61 |
+
CHINESE_MEDIALS = {
|
| 62 |
+
'j', 'w', 'ɥ', # i, u, ü 介音
|
| 63 |
+
}
|
| 64 |
+
|
| 65 |
+
# 中文韵尾(鼻音和元音韵尾)
|
| 66 |
+
CHINESE_CODAS = {
|
| 67 |
+
'n', 'ŋ', # 鼻音韵尾
|
| 68 |
+
'i', 'u', # 元音韵尾(在复韵母中)
|
| 69 |
}
|
| 70 |
|
| 71 |
# 日语辅音
|
|
|
|
| 107 |
('in', 'ing'), # 前鼻/后鼻
|
| 108 |
('ian', 'iang'), # 前鼻/后鼻
|
| 109 |
('uan', 'uang'), # 前鼻/后鼻
|
| 110 |
+
# i 行韵母近似组(带鼻音韵尾的可以用不带鼻音韵尾的替代)
|
| 111 |
+
('ia', 'ian'), # ia ←→ ian(如 xia ←→ xian)
|
| 112 |
+
('ie', 'ian'), # ie ←→ ian(如 jie ←→ jian)
|
| 113 |
+
('iao', 'ian'), # iao ←→ ian(如 qiao ←→ qian)
|
| 114 |
+
('iu', 'in'), # iu ←→ in(如 liu ←→ lin)
|
| 115 |
+
# u 行韵母近似组
|
| 116 |
+
('ua', 'uan'), # ua ←→ uan(如 kua ←→ kuan)
|
| 117 |
+
('uo', 'un'), # uo ←→ un(如 duo ←→ dun)
|
| 118 |
+
('ui', 'un'), # ui ←→ un(如 dui ←→ dun)
|
| 119 |
+
('uai', 'uan'), # uai ←→ uan(如 kuai ←→ kuan)
|
| 120 |
+
# 单元音与复韵母近似组
|
| 121 |
+
('a', 'ai', 'ao', 'an'), # a 系列
|
| 122 |
+
('o', 'ou', 'ong'), # o 系列
|
| 123 |
+
('e', 'ei', 'en'), # e 系列
|
| 124 |
]
|
| 125 |
|
| 126 |
|
|
|
|
| 140 |
base_phone = _strip_tone(phone)
|
| 141 |
|
| 142 |
if language in ('chinese', 'zh', 'mandarin'):
|
| 143 |
+
# 直接匹配
|
| 144 |
if base_phone in CHINESE_VOWELS:
|
| 145 |
return True
|
| 146 |
+
|
| 147 |
+
# 检查是否以元音字符开头(处理复合元音)
|
| 148 |
+
vowel_starts = ['a', 'o', 'e', 'i', 'u', 'y', 'ə', 'ɛ', 'ɔ', 'ɤ', 'ɨ', 'ʅ', 'ʉ', 'ɚ']
|
| 149 |
+
for v in vowel_starts:
|
| 150 |
if base_phone.startswith(v):
|
| 151 |
return True
|
| 152 |
+
|
| 153 |
+
# 检查特殊的舌尖元音(带组合字符)
|
| 154 |
+
if 'z̩' in base_phone or 'ʐ̩' in base_phone:
|
| 155 |
+
return True
|
| 156 |
+
|
| 157 |
+
# 检查卷舌近音
|
| 158 |
+
if 'ɻ' in base_phone:
|
| 159 |
+
return True
|
| 160 |
+
|
| 161 |
return False
|
| 162 |
elif language in ('japanese', 'ja', 'jp'):
|
| 163 |
return base_phone in JAPANESE_VOWELS or base_phone.rstrip('ː') in {'a', 'i', 'ɯ', 'u', 'e', 'o'}
|
|
|
|
| 175 |
|
| 176 |
# ==================== IPA 到别名转换 ====================
|
| 177 |
|
| 178 |
+
# 中文 IPA 辅音到拼音声母映射
|
| 179 |
+
CHINESE_CONSONANT_TO_PINYIN = {
|
| 180 |
+
'p': 'b', 'pʰ': 'p', 'pʲ': 'p', 'pʷ': 'b',
|
|
|
|
| 181 |
'm': 'm', 'f': 'f',
|
| 182 |
+
't': 'd', 'tʰ': 't', 'tʲ': 'd',
|
| 183 |
'n': 'n', 'l': 'l',
|
| 184 |
+
'k': 'g', 'kʰ': 'k', 'kʷ': 'g',
|
| 185 |
+
'ɡ': 'g', 'g': 'g',
|
| 186 |
'x': 'h', 'h': 'h',
|
| 187 |
'tɕ': 'j', 'tɕʰ': 'q', 'ɕ': 'x',
|
| 188 |
'ts': 'z', 'tsʰ': 'c', 's': 's',
|
| 189 |
'ʈʂ': 'zh', 'ʈʂʰ': 'ch', 'ʂ': 'sh', 'ʐ': 'r',
|
| 190 |
+
'ɲ': 'n', 'ŋ': '', # ng 不作为声母
|
| 191 |
+
'j': '', 'w': '', 'ɥ': '', # 介音不作为声母
|
| 192 |
'ʔ': '',
|
| 193 |
+
}
|
| 194 |
+
|
| 195 |
+
# 中文 IPA 元音到拼音韵母映射
|
| 196 |
+
CHINESE_VOWEL_TO_PINYIN = {
|
| 197 |
+
# 单元音韵母
|
| 198 |
'a': 'a', 'o': 'o', 'e': 'e', 'i': 'i', 'u': 'u', 'y': 'v', 'ü': 'v',
|
| 199 |
+
'ə': 'e', 'ɛ': 'e', 'ɔ': 'o', 'ɤ': 'e', 'ɨ': 'i',
|
| 200 |
+
# 复韵母(MFA 可能的 IPA 格式)
|
| 201 |
+
'aj': 'ai', 'aw': 'ao', 'ej': 'ei', 'ow': 'ou',
|
| 202 |
+
'ai': 'ai', 'ao': 'ao', 'ei': 'ei', 'ou': 'ou', # 直接形式
|
| 203 |
+
# i 行韵母(MFA 可能的组合形式)
|
| 204 |
+
'ja': 'ia', 'je': 'ie', 'jɛ': 'ie', 'jao': 'iao', 'jow': 'iu', 'ju': 'iu',
|
| 205 |
+
'ia': 'ia', 'ie': 'ie', 'iao': 'iao', 'iu': 'iu', # 直接形式
|
| 206 |
+
# u 行韵母(MFA 可能的组合形式)
|
| 207 |
+
'wa': 'ua', 'wo': 'uo', 'wɔ': 'uo', 'wej': 'ui', 'waj': 'uai',
|
| 208 |
+
'ua': 'ua', 'uo': 'uo', 'ui': 'ui', 'uai': 'uai', # 直接形式
|
| 209 |
+
# ü 行韵母(MFA 可能的组合形式)
|
| 210 |
+
'ɥe': 've', 'ɥɛ': 've',
|
| 211 |
+
've': 've', 'yue': 've', # 直接形式
|
| 212 |
+
# 鼻音韵母(MFA 可能的组合形式)
|
| 213 |
+
'an': 'an', 'en': 'en', 'ang': 'ang', 'eng': 'eng', 'ong': 'ong',
|
| 214 |
+
'in': 'in', 'ing': 'ing', 'ian': 'ian', 'iang': 'iang', 'iong': 'iong',
|
| 215 |
+
'uan': 'uan', 'un': 'un', 'uang': 'uang', 'ueng': 'ueng',
|
| 216 |
+
'van': 'van', 'vn': 'vn',
|
| 217 |
+
# 舌尖元音
|
| 218 |
+
'z̩': 'i', 'ʐ̩': 'i', 'ʅ': 'i',
|
| 219 |
+
# 卷舌音
|
| 220 |
+
'ɻ': 'er', 'ɚ': 'er',
|
| 221 |
+
}
|
| 222 |
+
|
| 223 |
+
# 介音+元音组合到韵母的映射
|
| 224 |
+
MEDIAL_VOWEL_TO_FINAL = {
|
| 225 |
+
# j 介音(i 行韵母)
|
| 226 |
+
('j', 'a'): 'ia', ('j', 'e'): 'ie', ('j', 'ɛ'): 'ie',
|
| 227 |
+
('j', 'aw'): 'iao', ('j', 'o'): 'io',
|
| 228 |
+
('j', 'u'): 'iu', ('j', 'ow'): 'iou',
|
| 229 |
+
# w 介音(u 行韵母)
|
| 230 |
+
('w', 'a'): 'ua', ('w', 'o'): 'uo', ('w', 'ɔ'): 'uo',
|
| 231 |
+
('w', 'ej'): 'uei', ('w', 'e'): 'ue',
|
| 232 |
+
('w', 'aj'): 'uai', ('w', 'ai'): 'uai',
|
| 233 |
+
# ɥ 介音(ü 行韵母)
|
| 234 |
+
('ɥ', 'e'): 've', ('ɥ', 'ɛ'): 've',
|
| 235 |
+
}
|
| 236 |
+
|
| 237 |
+
# 介音+元音+韵尾组合到韵母的映射
|
| 238 |
+
MEDIAL_VOWEL_CODA_TO_FINAL = {
|
| 239 |
+
# j 介音 + 元音 + 韵尾
|
| 240 |
+
('j', 'a', 'n'): 'ian', ('j', 'e', 'n'): 'in',
|
| 241 |
+
('j', 'a', 'ŋ'): 'iang', ('j', 'o', 'ŋ'): 'iong',
|
| 242 |
+
# w 介音 + 元音 + 韵尾
|
| 243 |
+
('w', 'a', 'n'): 'uan', ('w', 'ə', 'n'): 'uen', ('w', 'e', 'n'): 'uen',
|
| 244 |
+
('w', 'a', 'ŋ'): 'uang', ('w', 'ə', 'ŋ'): 'ueng', ('w', 'e', 'ŋ'): 'ueng',
|
| 245 |
+
# ɥ 介音 + 元音 + 韵尾
|
| 246 |
+
('ɥ', 'a', 'n'): 'van', ('ɥ', 'e', 'n'): 'vn',
|
| 247 |
+
}
|
| 248 |
+
|
| 249 |
+
# 元音+韵尾组合到拼音韵母的映射
|
| 250 |
+
VOWEL_CODA_TO_PINYIN = {
|
| 251 |
+
# 前鼻音韵母
|
| 252 |
+
('a', 'n'): 'an', ('ə', 'n'): 'en', ('e', 'n'): 'en',
|
| 253 |
+
('i', 'n'): 'in', ('y', 'n'): 'un', ('u', 'n'): 'un',
|
| 254 |
+
# 后鼻音韵母
|
| 255 |
+
('a', 'ŋ'): 'ang', ('ə', 'ŋ'): 'eng', ('e', 'ŋ'): 'eng',
|
| 256 |
+
('i', 'ŋ'): 'ing', ('o', 'ŋ'): 'ong', ('u', 'ŋ'): 'ong',
|
| 257 |
+
# 复韵母(元音+元音)
|
| 258 |
+
('a', 'i'): 'ai', ('e', 'i'): 'ei', ('ej', 'i'): 'ei',
|
| 259 |
+
('a', 'u'): 'ao', ('aw', 'u'): 'ao', ('o', 'u'): 'ou', ('ow', 'u'): 'ou',
|
| 260 |
+
# i 行韵母
|
| 261 |
+
('i', 'a'): 'ia', ('i', 'e'): 'ie', ('i', 'ɛ'): 'ie',
|
| 262 |
+
('i', 'u'): 'iu',
|
| 263 |
+
# u 行韵母
|
| 264 |
+
('u', 'a'): 'ua', ('u', 'o'): 'uo', ('u', 'ɔ'): 'uo',
|
| 265 |
+
('u', 'i'): 'ui', ('u', 'e'): 'ue',
|
| 266 |
+
# ü 行韵母
|
| 267 |
+
('y', 'e'): 've', ('y', 'ɛ'): 've',
|
| 268 |
+
}
|
| 269 |
+
|
| 270 |
+
# IPA 音节组合到标准拼音的映射表(处理特殊组合规则)
|
| 271 |
+
IPA_SYLLABLE_TO_PINYIN = {
|
| 272 |
+
# j/q/x + ü 系列(ü 简写为 u)
|
| 273 |
+
('tɕ', 'y'): 'ju', ('tɕʰ', 'y'): 'qu', ('ɕ', 'y'): 'xu',
|
| 274 |
+
('tɕ', 'ɥ'): 'ju', ('tɕʰ', 'ɥ'): 'qu', ('ɕ', 'ɥ'): 'xu',
|
| 275 |
+
('tɕ', 'yɛ'): 'jue', ('tɕʰ', 'yɛ'): 'que', ('ɕ', 'yɛ'): 'xue',
|
| 276 |
+
('tɕ', 'yan'): 'juan', ('tɕʰ', 'yan'): 'quan', ('ɕ', 'yan'): 'xuan',
|
| 277 |
+
('tɕ', 'yn'): 'jun', ('tɕʰ', 'yn'): 'qun', ('ɕ', 'yn'): 'xun',
|
| 278 |
+
|
| 279 |
+
# 零声母 + i/u/ü 开头的韵母(需要加 y/w)
|
| 280 |
+
('', 'i'): 'yi', ('', 'in'): 'yin', ('', 'ing'): 'ying',
|
| 281 |
+
('', 'u'): 'wu', ('', 'un'): 'wen', ('', 'ong'): 'weng',
|
| 282 |
+
('', 'y'): 'yu', ('', 'yn'): 'yun',
|
| 283 |
+
|
| 284 |
+
# i 行韵母(ia, ie, iao, ian, iang, iong, iu)
|
| 285 |
+
('', 'ia'): 'ya', ('', 'iɛ'): 'ye', ('', 'ie'): 'ye',
|
| 286 |
+
('', 'iao'): 'yao', ('', 'ian'): 'yan', ('', 'iang'): 'yang',
|
| 287 |
+
('', 'iou'): 'you', ('', 'iu'): 'you',
|
| 288 |
+
('', 'iong'): 'yong',
|
| 289 |
+
|
| 290 |
+
# u 行韵母(ua, uo, uai, uei, uan, uen, uang, ueng)
|
| 291 |
+
('', 'ua'): 'wa', ('', 'uɔ'): 'wo', ('', 'uo'): 'wo',
|
| 292 |
+
('', 'uai'): 'wai', ('', 'uei'): 'wei', ('', 'ui'): 'wei',
|
| 293 |
+
('', 'uan'): 'wan', ('', 'uen'): 'wen',
|
| 294 |
+
('', 'uang'): 'wang', ('', 'ueng'): 'weng',
|
| 295 |
+
|
| 296 |
+
# ü 行韵母(üe, üan, ün)
|
| 297 |
+
('', 'yɛ'): 'yue', ('', 'üe'): 'yue',
|
| 298 |
+
('', 'yan'): 'yuan', ('', 'üan'): 'yuan',
|
| 299 |
+
('', 'yn'): 'yun', ('', 'ün'): 'yun',
|
| 300 |
+
|
| 301 |
+
# zh/ch/sh/r + i 实际是舌尖元音
|
| 302 |
+
('ʈʂ', 'ʐ̩'): 'zhi', ('ʈʂʰ', 'ʐ̩'): 'chi', ('ʂ', 'ʐ̩'): 'shi', ('ʐ', 'ʐ̩'): 'ri',
|
| 303 |
+
('ʈʂ', 'z̩'): 'zhi', ('ʈʂʰ', 'z̩'): 'chi', ('ʂ', 'z̩'): 'shi', ('ʐ', 'z̩'): 'ri',
|
| 304 |
+
('ʈʂ', 'ʅ'): 'zhi', ('ʈʂʰ', 'ʅ'): 'chi', ('ʂ', 'ʅ'): 'shi', ('ʐ', 'ʅ'): 'ri',
|
| 305 |
+
|
| 306 |
+
# z/c/s + i 实际是舌尖元音
|
| 307 |
+
('ts', 'z̩'): 'zi', ('tsʰ', 'z̩'): 'ci', ('s', 'z̩'): 'si',
|
| 308 |
+
('ts', 'ʅ'): 'zi', ('tsʰ', 'ʅ'): 'ci', ('s', 'ʅ'): 'si',
|
| 309 |
+
|
| 310 |
+
# n/l + ü 系列(保持 ü)
|
| 311 |
+
('n', 'y'): 'nv', ('l', 'y'): 'lv',
|
| 312 |
+
('n', 'yɛ'): 'nve', ('l', 'yɛ'): 'lve',
|
| 313 |
+
|
| 314 |
+
# 其他特殊组合
|
| 315 |
+
('ʔ', 'a'): 'a', ('ʔ', 'o'): 'o', ('ʔ', 'e'): 'e',
|
| 316 |
+
('ʔ', 'ai'): 'ai', ('ʔ', 'ei'): 'ei', ('ʔ', 'ao'): 'ao', ('ʔ', 'ou'): 'ou',
|
| 317 |
+
('ʔ', 'an'): 'an', ('ʔ', 'en'): 'en', ('ʔ', 'ang'): 'ang', ('ʔ', 'eng'): 'eng',
|
| 318 |
+
('ʔ', 'ej'): 'ei', ('ʔ', 'aw'): 'ao', ('ʔ', 'ow'): 'ou',
|
| 319 |
+
|
| 320 |
+
# 儿化音
|
| 321 |
+
('', 'ɻ'): 'er', ('', 'ɚ'): 'er',
|
| 322 |
}
|
| 323 |
|
| 324 |
# 日语 IPA 到罗马音映射
|
|
|
|
| 385 |
|
| 386 |
|
| 387 |
def ipa_to_alias(consonant: Optional[str], vowel: Optional[str], language: str, use_hiragana: bool = False) -> Optional[str]:
|
| 388 |
+
"""将 IPA 音素转换为别名(标准拼音或罗马音)"""
|
| 389 |
c_base = _strip_tone(consonant) if consonant else ''
|
| 390 |
v_base = _strip_tone(vowel) if vowel else ''
|
| 391 |
|
| 392 |
if language in ('chinese', 'zh', 'mandarin'):
|
| 393 |
+
# 中文:使用完整的音节转换规则
|
| 394 |
+
return _ipa_to_pinyin(c_base, v_base)
|
|
|
|
|
|
|
|
|
|
|
|
|
| 395 |
else:
|
| 396 |
# 日语
|
| 397 |
c_alias = JAPANESE_IPA_TO_ROMAJI.get(c_base, c_base)
|
|
|
|
| 410 |
return romaji
|
| 411 |
|
| 412 |
|
| 413 |
+
def _ipa_to_pinyin(consonant: str, vowel: str) -> Optional[str]:
|
| 414 |
+
"""
|
| 415 |
+
将 IPA 辅音+韵母转换为标准汉语拼音
|
| 416 |
+
|
| 417 |
+
参数:
|
| 418 |
+
consonant: IPA 辅音(已去除声调),可以是空字符串表示零声母
|
| 419 |
+
vowel: IPA 韵母(已去除声调),可能是单个元音或元音+韵尾的组合
|
| 420 |
+
|
| 421 |
+
返回:
|
| 422 |
+
标准拼音,如果无法转换则返回 None
|
| 423 |
+
"""
|
| 424 |
+
# 1. 先查找特殊组合映射
|
| 425 |
+
syllable_key = (consonant, vowel)
|
| 426 |
+
if syllable_key in IPA_SYLLABLE_TO_PINYIN:
|
| 427 |
+
return IPA_SYLLABLE_TO_PINYIN[syllable_key]
|
| 428 |
+
|
| 429 |
+
# 2. 获取声母的拼音
|
| 430 |
+
c_pinyin = ''
|
| 431 |
+
if consonant and consonant != 'ʔ':
|
| 432 |
+
if consonant in CHINESE_CONSONANT_TO_PINYIN:
|
| 433 |
+
c_pinyin = CHINESE_CONSONANT_TO_PINYIN[consonant]
|
| 434 |
+
else:
|
| 435 |
+
# 未知辅音,无法转换
|
| 436 |
+
return None
|
| 437 |
+
|
| 438 |
+
# 3. 获取韵母的拼音
|
| 439 |
+
# 韵母可能是单个元音,也可能是元音+韵尾的组合字符串
|
| 440 |
+
v_pinyin = ''
|
| 441 |
+
if vowel:
|
| 442 |
+
# 直接查找完整韵母
|
| 443 |
+
if vowel in CHINESE_VOWEL_TO_PINYIN:
|
| 444 |
+
v_pinyin = CHINESE_VOWEL_TO_PINYIN[vowel]
|
| 445 |
+
else:
|
| 446 |
+
# 韵母可能是组合形式,无法直接映射
|
| 447 |
+
# 这种情况应该在 _syllable_to_pinyin 中处理
|
| 448 |
+
return None
|
| 449 |
+
|
| 450 |
+
if not v_pinyin:
|
| 451 |
+
return None
|
| 452 |
+
|
| 453 |
+
# 4. 处理零声母(无声母或喉塞音)
|
| 454 |
+
if not c_pinyin:
|
| 455 |
+
# 零声母需要根据韵母添加 y/w/yu
|
| 456 |
+
if v_pinyin == 'i':
|
| 457 |
+
return 'yi'
|
| 458 |
+
elif v_pinyin in ('in', 'ing'):
|
| 459 |
+
return 'y' + v_pinyin
|
| 460 |
+
elif v_pinyin.startswith('i') and len(v_pinyin) > 1:
|
| 461 |
+
# ia->ya, ie->ye, iao->yao, ian->yan, iang->yang, iu->you, iong->yong
|
| 462 |
+
return 'y' + v_pinyin[1:]
|
| 463 |
+
elif v_pinyin == 'u':
|
| 464 |
+
return 'wu'
|
| 465 |
+
elif v_pinyin == 'un':
|
| 466 |
+
return 'wen'
|
| 467 |
+
elif v_pinyin == 'ong':
|
| 468 |
+
return 'weng'
|
| 469 |
+
elif v_pinyin.startswith('u') and len(v_pinyin) > 1:
|
| 470 |
+
# ua->wa, uo->wo, uai->wai, ui->wei, uan->wan, uang->wang
|
| 471 |
+
return 'w' + v_pinyin[1:]
|
| 472 |
+
elif v_pinyin == 'v':
|
| 473 |
+
# ü 单独出现写作 yu
|
| 474 |
+
return 'yu'
|
| 475 |
+
elif v_pinyin.startswith('v') and len(v_pinyin) > 1:
|
| 476 |
+
# ve->yue, van->yuan, vn->yun
|
| 477 |
+
return 'yu' + v_pinyin[1:]
|
| 478 |
+
else:
|
| 479 |
+
# a, o, e, ai, ei, ao, ou, an, en, ang, eng, er 等
|
| 480 |
+
return v_pinyin
|
| 481 |
+
|
| 482 |
+
# 5. 有声母的情况
|
| 483 |
+
# 5.1 j/q/x + ü 系列:ü 写作 u
|
| 484 |
+
if c_pinyin in ('j', 'q', 'x'):
|
| 485 |
+
if v_pinyin == 'v':
|
| 486 |
+
return c_pinyin + 'u'
|
| 487 |
+
elif v_pinyin.startswith('v'):
|
| 488 |
+
# jve->jue, jvan->juan, jvn->jun
|
| 489 |
+
return c_pinyin + 'u' + v_pinyin[1:]
|
| 490 |
+
else:
|
| 491 |
+
return c_pinyin + v_pinyin
|
| 492 |
+
|
| 493 |
+
# 5.2 n/l + ü 系列:保持 v(表示 ü)
|
| 494 |
+
elif c_pinyin in ('n', 'l'):
|
| 495 |
+
# 只有 n/l 才需要区分 u 和 ü
|
| 496 |
+
return c_pinyin + v_pinyin
|
| 497 |
+
|
| 498 |
+
# 5.3 其他声母 + v:v 改写为 u(因为不会产生歧义)
|
| 499 |
+
elif v_pinyin == 'v':
|
| 500 |
+
return c_pinyin + 'u'
|
| 501 |
+
elif v_pinyin.startswith('v'):
|
| 502 |
+
return c_pinyin + 'u' + v_pinyin[1:]
|
| 503 |
+
|
| 504 |
+
# 5.4 普通组合
|
| 505 |
+
else:
|
| 506 |
+
return c_pinyin + v_pinyin
|
| 507 |
+
|
| 508 |
+
|
| 509 |
class UTAUOtoExportPlugin(ExportPlugin):
|
| 510 |
"""UTAU oto.ini 导出插件"""
|
| 511 |
|
| 512 |
name = "UTAU oto.ini 导出"
|
| 513 |
description = "从 TextGrid 生成 UTAU 音源配置文件,一个 wav 可包含多条配置"
|
| 514 |
+
version = "1.2.0"
|
| 515 |
author = "内置"
|
| 516 |
|
| 517 |
def get_options(self) -> List[PluginOption]:
|
|
|
|
| 611 |
default="",
|
| 612 |
description="character.txt 中的角色名,留空则使用音源名称"
|
| 613 |
),
|
| 614 |
+
PluginOption(
|
| 615 |
+
key="cvvc_mode",
|
| 616 |
+
label="CVVC 模式",
|
| 617 |
+
option_type=OptionType.SWITCH,
|
| 618 |
+
default=False,
|
| 619 |
+
description="启用 CVVC 模式,额外生成 VC 部(元音到辅音过渡)条目"
|
| 620 |
+
),
|
| 621 |
+
PluginOption(
|
| 622 |
+
key="vc_alias_separator",
|
| 623 |
+
label="VC 别名分隔符",
|
| 624 |
+
option_type=OptionType.COMBO,
|
| 625 |
+
default=" ",
|
| 626 |
+
choices=[" ", "_", "-"],
|
| 627 |
+
description="VC 部别名中元音和辅音之间的分隔符",
|
| 628 |
+
visible_when={"cvvc_mode": True}
|
| 629 |
+
),
|
| 630 |
+
PluginOption(
|
| 631 |
+
key="vc_offset_ratio",
|
| 632 |
+
label="VC 偏移比例",
|
| 633 |
+
option_type=OptionType.NUMBER,
|
| 634 |
+
default=0.5,
|
| 635 |
+
min_value=0.3,
|
| 636 |
+
max_value=0.8,
|
| 637 |
+
description="VC 部开始位置 = 元音结束位置 - 元音时长 × 此比例",
|
| 638 |
+
visible_when={"cvvc_mode": True}
|
| 639 |
+
),
|
| 640 |
+
PluginOption(
|
| 641 |
+
key="vc_overlap_ratio",
|
| 642 |
+
label="VC Overlap 比例",
|
| 643 |
+
option_type=OptionType.NUMBER,
|
| 644 |
+
default=0.5,
|
| 645 |
+
min_value=0.3,
|
| 646 |
+
max_value=0.8,
|
| 647 |
+
description="VC 部的 Overlap = Preutterance × 此比例",
|
| 648 |
+
visible_when={"cvvc_mode": True}
|
| 649 |
+
),
|
| 650 |
]
|
| 651 |
|
| 652 |
def export(
|
|
|
|
| 674 |
fuzzy_phoneme = options.get("fuzzy_phoneme", False)
|
| 675 |
use_hiragana = (alias_style == "hiragana") and language in ('japanese', 'ja', 'jp')
|
| 676 |
|
| 677 |
+
# CVVC 模式选项
|
| 678 |
+
cvvc_mode = options.get("cvvc_mode", False)
|
| 679 |
+
vc_separator = options.get("vc_alias_separator", " ")
|
| 680 |
+
vc_offset_ratio = float(options.get("vc_offset_ratio", 0.5))
|
| 681 |
+
vc_overlap_ratio = float(options.get("vc_overlap_ratio", 0.5))
|
| 682 |
+
|
| 683 |
# 使用基类方法解析质量评估维度
|
| 684 |
enabled_metrics = self.parse_quality_metrics(quality_metrics)
|
| 685 |
|
|
|
|
| 689 |
os.makedirs(export_dir, exist_ok=True)
|
| 690 |
|
| 691 |
# 步骤1: 解析 TextGrid 并生成 oto 条目
|
| 692 |
+
if cvvc_mode:
|
| 693 |
+
self._log("【解析 TextGrid 文件】(CVVC 模式)")
|
| 694 |
+
else:
|
| 695 |
+
self._log("【解析 TextGrid 文件】")
|
| 696 |
oto_entries, wav_files = self._parse_textgrids(
|
| 697 |
paths["slices_dir"],
|
| 698 |
paths["textgrid_dir"],
|
| 699 |
language,
|
| 700 |
use_hiragana,
|
| 701 |
+
overlap_ratio,
|
| 702 |
+
cvvc_mode=cvvc_mode,
|
| 703 |
+
vc_offset_ratio=vc_offset_ratio,
|
| 704 |
+
vc_overlap_ratio=vc_overlap_ratio,
|
| 705 |
+
vc_separator=vc_separator
|
| 706 |
)
|
| 707 |
|
| 708 |
if not oto_entries:
|
|
|
|
| 778 |
textgrid_dir: str,
|
| 779 |
language: str,
|
| 780 |
use_hiragana: bool,
|
| 781 |
+
overlap_ratio: float,
|
| 782 |
+
cvvc_mode: bool = False,
|
| 783 |
+
vc_offset_ratio: float = 0.5,
|
| 784 |
+
vc_overlap_ratio: float = 0.5,
|
| 785 |
+
vc_separator: str = " "
|
| 786 |
) -> Tuple[List[Dict], set]:
|
| 787 |
+
"""解析 TextGrid 文件,提取音素边界
|
| 788 |
+
|
| 789 |
+
参数:
|
| 790 |
+
slices_dir: 切片目录
|
| 791 |
+
textgrid_dir: TextGrid 目录
|
| 792 |
+
language: 语言
|
| 793 |
+
use_hiragana: 是否使用平假名
|
| 794 |
+
overlap_ratio: CV 部 overlap 比例
|
| 795 |
+
cvvc_mode: 是否启用 CVVC 模式
|
| 796 |
+
vc_offset_ratio: VC 偏移比例
|
| 797 |
+
vc_overlap_ratio: VC overlap 比例
|
| 798 |
+
vc_separator: VC 别名分隔符
|
| 799 |
+
"""
|
| 800 |
import textgrid
|
| 801 |
import soundfile as sf
|
| 802 |
|
|
|
|
| 850 |
if phones_tier is None:
|
| 851 |
continue
|
| 852 |
|
| 853 |
+
# 提取 CV 对,使用 words 层限制配对范围
|
| 854 |
entries = self._extract_cv_pairs(
|
| 855 |
words_tier, phones_tier, wav_name, wav_duration_ms,
|
| 856 |
language, use_hiragana, overlap_ratio
|
| 857 |
)
|
| 858 |
oto_entries.extend(entries)
|
| 859 |
+
|
| 860 |
+
# 如果启用 CVVC 模式,额外提取 VC 对
|
| 861 |
+
if cvvc_mode:
|
| 862 |
+
vc_entries = self._extract_vc_pairs(
|
| 863 |
+
words_tier, phones_tier, wav_name, wav_duration_ms,
|
| 864 |
+
language, use_hiragana,
|
| 865 |
+
vc_offset_ratio, vc_overlap_ratio, vc_separator
|
| 866 |
+
)
|
| 867 |
+
oto_entries.extend(vc_entries)
|
| 868 |
|
| 869 |
return oto_entries, wav_files
|
| 870 |
|
|
|
|
| 879 |
overlap_ratio: float
|
| 880 |
) -> List[Dict]:
|
| 881 |
"""
|
| 882 |
+
从 phones 层提取音节(可能包含辅音+元音+韵尾)
|
| 883 |
+
使用 words 层限制配对范围,确保音素属于同一个字
|
| 884 |
"""
|
| 885 |
entries = []
|
| 886 |
|
|
|
|
| 921 |
start_ms = interval.minTime * 1000
|
| 922 |
end_ms = interval.maxTime * 1000
|
| 923 |
|
| 924 |
+
# 中文音节结构:(辅音) + (介音) + 元音 + (韵尾)
|
| 925 |
+
if language in ('chinese', 'zh', 'mandarin'):
|
| 926 |
+
syllable_phones = []
|
| 927 |
+
syllable_start = start_ms
|
| 928 |
+
syllable_end = end_ms
|
| 929 |
+
consonant_duration = 0
|
| 930 |
|
| 931 |
+
# 1. 检查是否有声母(辅音)
|
| 932 |
+
if is_consonant(phone, language):
|
| 933 |
+
syllable_phones.append(phone)
|
| 934 |
+
consonant_duration = end_ms - start_ms
|
| 935 |
+
i += 1
|
| 936 |
+
|
| 937 |
+
# 检查下一个音素
|
| 938 |
+
if i < len(intervals):
|
| 939 |
+
next_interval = intervals[i]
|
| 940 |
+
next_phone = next_interval.mark.strip()
|
| 941 |
+
|
| 942 |
+
if next_phone not in SKIP_MARKS and same_word(interval.minTime, next_interval.minTime):
|
| 943 |
+
phone = next_phone
|
| 944 |
+
end_ms = next_interval.maxTime * 1000
|
| 945 |
+
syllable_end = end_ms
|
| 946 |
+
else:
|
| 947 |
+
# 只有辅音,没有元音,跳过
|
| 948 |
+
continue
|
| 949 |
+
else:
|
| 950 |
+
# 只有辅音,没有元音,跳过
|
| 951 |
+
continue
|
| 952 |
|
| 953 |
+
# 2. 检查是否有介音(j, w, ɥ)
|
| 954 |
+
phone_base = _strip_tone(phone)
|
| 955 |
+
if phone_base in CHINESE_MEDIALS:
|
| 956 |
+
syllable_phones.append(phone)
|
| 957 |
+
i += 1
|
| 958 |
|
| 959 |
+
# 检查下一个音素(必须是元音)
|
| 960 |
+
if i < len(intervals):
|
| 961 |
+
next_interval = intervals[i]
|
| 962 |
+
next_phone = next_interval.mark.strip()
|
| 963 |
+
|
| 964 |
+
if next_phone not in SKIP_MARKS and same_word(interval.minTime, next_interval.minTime):
|
| 965 |
+
phone = next_phone
|
| 966 |
+
end_ms = next_interval.maxTime * 1000
|
| 967 |
+
syllable_end = end_ms
|
| 968 |
+
else:
|
| 969 |
+
# 只有介音,没有元音,跳过
|
| 970 |
+
continue
|
| 971 |
+
else:
|
| 972 |
+
# 只有介音,没有元音,跳过
|
| 973 |
+
continue
|
| 974 |
|
| 975 |
+
# 3. 必须有韵母(元音)
|
| 976 |
+
if is_vowel(phone, language):
|
| 977 |
+
syllable_phones.append(phone)
|
| 978 |
+
if not consonant_duration:
|
| 979 |
+
# 零声母,辅音时长设为元音前30ms
|
| 980 |
+
consonant_duration = min(30, (end_ms - start_ms) * 0.2)
|
| 981 |
+
syllable_end = end_ms
|
| 982 |
i += 1
|
| 983 |
+
|
| 984 |
+
# 4. 检查是否有韵尾(n, ng, i, u)
|
| 985 |
+
if i < len(intervals):
|
| 986 |
+
next_interval = intervals[i]
|
| 987 |
+
next_phone = next_interval.mark.strip()
|
| 988 |
+
|
| 989 |
+
if (next_phone not in SKIP_MARKS and
|
| 990 |
+
same_word(interval.minTime, next_interval.minTime)):
|
| 991 |
+
# 检查是否是韵尾
|
| 992 |
+
next_phone_base = _strip_tone(next_phone)
|
| 993 |
+
if next_phone_base in CHINESE_CODAS:
|
| 994 |
+
syllable_phones.append(next_phone)
|
| 995 |
+
syllable_end = next_interval.maxTime * 1000
|
| 996 |
+
i += 1
|
| 997 |
+
|
| 998 |
+
# 5. 将音节转换为拼音
|
| 999 |
+
alias = self._syllable_to_pinyin(syllable_phones, language, use_hiragana)
|
| 1000 |
+
if alias:
|
| 1001 |
+
entry = self._calculate_oto_params(
|
| 1002 |
+
wav_name=wav_name,
|
| 1003 |
+
alias=alias,
|
| 1004 |
+
offset=syllable_start,
|
| 1005 |
+
consonant_duration=consonant_duration,
|
| 1006 |
+
segment_end=syllable_end,
|
| 1007 |
+
wav_duration_ms=wav_duration_ms,
|
| 1008 |
+
overlap_ratio=overlap_ratio
|
| 1009 |
+
)
|
| 1010 |
+
entries.append(entry)
|
| 1011 |
+
else:
|
| 1012 |
+
# 不是元音,跳过
|
| 1013 |
+
i += 1
|
| 1014 |
+
|
| 1015 |
+
else:
|
| 1016 |
+
# 日语:简单的 CV 结构
|
| 1017 |
+
if is_consonant(phone, language):
|
| 1018 |
+
consonant = phone
|
| 1019 |
+
consonant_start = start_ms
|
| 1020 |
+
consonant_end = end_ms
|
| 1021 |
+
consonant_time = interval.minTime
|
| 1022 |
+
|
| 1023 |
+
vowel = None
|
| 1024 |
+
vowel_end = end_ms
|
| 1025 |
+
|
| 1026 |
+
if i + 1 < len(intervals):
|
| 1027 |
+
next_interval = intervals[i + 1]
|
| 1028 |
+
next_phone = next_interval.mark.strip()
|
| 1029 |
+
next_time = next_interval.minTime
|
| 1030 |
+
|
| 1031 |
+
if (next_phone not in SKIP_MARKS and
|
| 1032 |
+
is_vowel(next_phone, language) and
|
| 1033 |
+
same_word(consonant_time, next_time)):
|
| 1034 |
+
vowel = next_phone
|
| 1035 |
+
vowel_end = next_interval.maxTime * 1000
|
| 1036 |
+
i += 1
|
| 1037 |
+
|
| 1038 |
+
alias = ipa_to_alias(consonant, vowel, language, use_hiragana)
|
| 1039 |
+
if alias:
|
| 1040 |
+
consonant_duration = consonant_end - consonant_start
|
| 1041 |
+
entry = self._calculate_oto_params(
|
| 1042 |
+
wav_name=wav_name,
|
| 1043 |
+
alias=alias,
|
| 1044 |
+
offset=consonant_start,
|
| 1045 |
+
consonant_duration=consonant_duration,
|
| 1046 |
+
segment_end=vowel_end,
|
| 1047 |
+
wav_duration_ms=wav_duration_ms,
|
| 1048 |
+
overlap_ratio=overlap_ratio
|
| 1049 |
+
)
|
| 1050 |
+
entries.append(entry)
|
| 1051 |
+
|
| 1052 |
+
elif is_vowel(phone, language):
|
| 1053 |
+
alias = ipa_to_alias(None, phone, language, use_hiragana)
|
| 1054 |
+
if alias:
|
| 1055 |
+
entry = self._calculate_oto_params(
|
| 1056 |
+
wav_name=wav_name,
|
| 1057 |
+
alias=alias,
|
| 1058 |
+
offset=start_ms,
|
| 1059 |
+
consonant_duration=min(30, (end_ms - start_ms) * 0.2),
|
| 1060 |
+
segment_end=end_ms,
|
| 1061 |
+
wav_duration_ms=wav_duration_ms,
|
| 1062 |
+
overlap_ratio=overlap_ratio
|
| 1063 |
+
)
|
| 1064 |
+
entries.append(entry)
|
| 1065 |
|
| 1066 |
+
i += 1
|
| 1067 |
+
|
| 1068 |
+
return entries
|
| 1069 |
+
|
| 1070 |
+
def _syllable_to_pinyin(
|
| 1071 |
+
self,
|
| 1072 |
+
phones: List[str],
|
| 1073 |
+
language: str,
|
| 1074 |
+
use_hiragana: bool
|
| 1075 |
+
) -> Optional[str]:
|
| 1076 |
+
"""
|
| 1077 |
+
将音素列表转换为标准汉语拼音(通用方法)
|
| 1078 |
+
|
| 1079 |
+
采用新的通用转换算法,支持所有标准汉语拼音音节
|
| 1080 |
+
|
| 1081 |
+
参数:
|
| 1082 |
+
phones: 音素列表(带声调的 IPA 符号)
|
| 1083 |
+
language: 语言
|
| 1084 |
+
use_hiragana: 是否使用平假名(中文忽略此参数)
|
| 1085 |
+
|
| 1086 |
+
返回:
|
| 1087 |
+
拼音字符串
|
| 1088 |
+
"""
|
| 1089 |
+
if not phones:
|
| 1090 |
+
return None
|
| 1091 |
+
|
| 1092 |
+
# 去除声调
|
| 1093 |
+
phones_base = [_strip_tone(p) for p in phones]
|
| 1094 |
+
|
| 1095 |
+
# 解析音节结构:(辅音) + (介音) + 元音 + (韵尾)
|
| 1096 |
+
idx = 0
|
| 1097 |
+
c = '' # 声母
|
| 1098 |
+
m = '' # 介音
|
| 1099 |
+
v = '' # 元音
|
| 1100 |
+
cd = '' # 韵尾
|
| 1101 |
+
|
| 1102 |
+
# 1. 声母
|
| 1103 |
+
if idx < len(phones_base) and is_consonant(phones_base[idx], language):
|
| 1104 |
+
c = phones_base[idx]
|
| 1105 |
+
idx += 1
|
| 1106 |
+
|
| 1107 |
+
# 2. 介音
|
| 1108 |
+
if idx < len(phones_base) and phones_base[idx] in CHINESE_MEDIALS:
|
| 1109 |
+
m = phones_base[idx]
|
| 1110 |
+
idx += 1
|
| 1111 |
+
|
| 1112 |
+
# 3. 元音(必须)
|
| 1113 |
+
if idx < len(phones_base) and is_vowel(phones_base[idx], language):
|
| 1114 |
+
v = phones_base[idx]
|
| 1115 |
+
idx += 1
|
| 1116 |
+
else:
|
| 1117 |
+
# 没有元音,无法形成音节
|
| 1118 |
+
return None
|
| 1119 |
+
|
| 1120 |
+
# 4. 韵尾
|
| 1121 |
+
if idx < len(phones_base) and phones_base[idx] in CHINESE_CODAS:
|
| 1122 |
+
cd = phones_base[idx]
|
| 1123 |
+
idx += 1
|
| 1124 |
+
|
| 1125 |
+
# 转换为拼音
|
| 1126 |
+
c_py = CHINESE_CONSONANT_TO_PINYIN.get(c, '')
|
| 1127 |
+
v_py = CHINESE_VOWEL_TO_PINYIN.get(v, v)
|
| 1128 |
+
|
| 1129 |
+
# 组合韵母
|
| 1130 |
+
final = ''
|
| 1131 |
+
|
| 1132 |
+
if m == 'j':
|
| 1133 |
+
# i 行韵母
|
| 1134 |
+
if cd == 'n':
|
| 1135 |
+
if v_py == 'a':
|
| 1136 |
+
final = 'ian'
|
| 1137 |
+
elif v_py == 'e':
|
| 1138 |
+
final = 'in' # j + e + n = in (如 xin, yin)
|
| 1139 |
+
else:
|
| 1140 |
+
final = 'i' + v_py + 'n'
|
| 1141 |
+
elif cd == 'ŋ':
|
| 1142 |
+
if v_py == 'a':
|
| 1143 |
+
final = 'iang'
|
| 1144 |
+
elif v_py == 'o':
|
| 1145 |
+
final = 'iong'
|
| 1146 |
+
else:
|
| 1147 |
+
final = 'i' + v_py + 'ng'
|
| 1148 |
+
elif cd:
|
| 1149 |
+
final = 'i' + v_py + cd
|
| 1150 |
+
else:
|
| 1151 |
+
if v_py == 'a':
|
| 1152 |
+
final = 'ia'
|
| 1153 |
+
elif v_py == 'e':
|
| 1154 |
+
final = 'ie'
|
| 1155 |
+
elif v_py == 'ao':
|
| 1156 |
+
final = 'iao'
|
| 1157 |
+
elif v_py == 'ou':
|
| 1158 |
+
final = 'iu'
|
| 1159 |
+
else:
|
| 1160 |
+
final = 'i' + v_py
|
| 1161 |
+
|
| 1162 |
+
elif m == 'w':
|
| 1163 |
+
# u 行韵母
|
| 1164 |
+
if cd == 'n':
|
| 1165 |
+
if v_py == 'a':
|
| 1166 |
+
final = 'uan'
|
| 1167 |
+
elif v_py == 'e':
|
| 1168 |
+
final = 'un' # w + ə + n = un (如 shun)
|
| 1169 |
+
else:
|
| 1170 |
+
final = 'u' + v_py + 'n'
|
| 1171 |
+
elif cd == 'ŋ':
|
| 1172 |
+
if v_py == 'a':
|
| 1173 |
+
final = 'uang'
|
| 1174 |
+
elif v_py == 'e':
|
| 1175 |
+
final = 'ueng'
|
| 1176 |
+
else:
|
| 1177 |
+
final = 'u' + v_py + 'ng'
|
| 1178 |
+
elif cd:
|
| 1179 |
+
final = 'u' + v_py + cd
|
| 1180 |
+
else:
|
| 1181 |
+
if v_py == 'a':
|
| 1182 |
+
final = 'ua'
|
| 1183 |
+
elif v_py == 'o':
|
| 1184 |
+
final = 'uo'
|
| 1185 |
+
elif v_py == 'ei':
|
| 1186 |
+
final = 'ui' # w + ej = ui (如 shui)
|
| 1187 |
+
elif v_py == 'ai':
|
| 1188 |
+
final = 'uai'
|
| 1189 |
+
else:
|
| 1190 |
+
final = 'u' + v_py
|
| 1191 |
+
|
| 1192 |
+
elif m == 'ɥ':
|
| 1193 |
+
# ü 行韵母
|
| 1194 |
+
if cd == 'n':
|
| 1195 |
+
if v_py == 'a':
|
| 1196 |
+
final = 'van'
|
| 1197 |
+
elif v_py == 'e':
|
| 1198 |
+
final = 'vn'
|
| 1199 |
+
else:
|
| 1200 |
+
final = 'v' + v_py + 'n'
|
| 1201 |
+
elif cd:
|
| 1202 |
+
final = 'v' + v_py + cd
|
| 1203 |
+
else:
|
| 1204 |
+
if v_py == 'e':
|
| 1205 |
+
final = 've'
|
| 1206 |
+
else:
|
| 1207 |
+
final = 'v' + v_py
|
| 1208 |
+
|
| 1209 |
+
else:
|
| 1210 |
+
# 无介音
|
| 1211 |
+
if cd == 'n':
|
| 1212 |
+
final = v_py + 'n'
|
| 1213 |
+
elif cd == 'ŋ':
|
| 1214 |
+
final = v_py + 'ng'
|
| 1215 |
+
elif cd:
|
| 1216 |
+
final = v_py + cd
|
| 1217 |
+
else:
|
| 1218 |
+
final = v_py
|
| 1219 |
+
|
| 1220 |
+
# 组合声母和韵母
|
| 1221 |
+
if not c_py:
|
| 1222 |
+
# 零声母,需要添加 y/w/yu
|
| 1223 |
+
if final.startswith('i'):
|
| 1224 |
+
if final == 'i':
|
| 1225 |
+
return 'yi'
|
| 1226 |
+
elif final in ('in', 'ing'):
|
| 1227 |
+
return 'y' + final
|
| 1228 |
+
else:
|
| 1229 |
+
return 'y' + final[1:]
|
| 1230 |
+
elif final.startswith('u'):
|
| 1231 |
+
if final == 'u':
|
| 1232 |
+
return 'wu'
|
| 1233 |
+
elif final == 'un':
|
| 1234 |
+
return 'wen'
|
| 1235 |
+
elif final in ('ueng', 'ong'):
|
| 1236 |
+
return 'weng'
|
| 1237 |
+
else:
|
| 1238 |
+
return 'w' + final[1:]
|
| 1239 |
+
elif final.startswith('v'):
|
| 1240 |
+
if final == 'v':
|
| 1241 |
+
return 'yu'
|
| 1242 |
+
else:
|
| 1243 |
+
return 'yu' + final[1:]
|
| 1244 |
+
else:
|
| 1245 |
+
return final
|
| 1246 |
+
|
| 1247 |
+
# 有声母
|
| 1248 |
+
if c_py in ('j', 'q', 'x'):
|
| 1249 |
+
# j/q/x + ü 系列,ü 写作 u
|
| 1250 |
+
if final.startswith('v'):
|
| 1251 |
+
return c_py + 'u' + final[1:]
|
| 1252 |
+
else:
|
| 1253 |
+
return c_py + final
|
| 1254 |
+
elif c_py in ('n', 'l'):
|
| 1255 |
+
# n/l + ü 系列,保持 v
|
| 1256 |
+
return c_py + final
|
| 1257 |
+
else:
|
| 1258 |
+
# 其他声母 + ü,ü 写作 u
|
| 1259 |
+
if final.startswith('v'):
|
| 1260 |
+
return c_py + 'u' + final[1:]
|
| 1261 |
+
else:
|
| 1262 |
+
return c_py + final
|
| 1263 |
+
|
| 1264 |
+
def _extract_vc_pairs(
|
| 1265 |
+
self,
|
| 1266 |
+
words_tier,
|
| 1267 |
+
phones_tier,
|
| 1268 |
+
wav_name: str,
|
| 1269 |
+
wav_duration_ms: float,
|
| 1270 |
+
language: str,
|
| 1271 |
+
use_hiragana: bool,
|
| 1272 |
+
vc_offset_ratio: float,
|
| 1273 |
+
vc_overlap_ratio: float,
|
| 1274 |
+
vc_separator: str
|
| 1275 |
+
) -> List[Dict]:
|
| 1276 |
+
"""
|
| 1277 |
+
从 phones 层提取元音+辅音对(VC 部)
|
| 1278 |
+
|
| 1279 |
+
VC 部是当前音节的韵母(V) + 下一个音节的声母(C)
|
| 1280 |
+
用于连接两个相邻音节的过渡部分
|
| 1281 |
+
|
| 1282 |
+
使用 presamp.ini 中的映射规则来确定韵母和声母的对应关系
|
| 1283 |
+
|
| 1284 |
+
注意:VC 部的别名始终使用拼音格式,不受 use_hiragana 参数影响
|
| 1285 |
+
|
| 1286 |
+
参数:
|
| 1287 |
+
words_tier: words 层
|
| 1288 |
+
phones_tier: phones 层
|
| 1289 |
+
wav_name: 音频文件名
|
| 1290 |
+
wav_duration_ms: 音频总时长
|
| 1291 |
+
language: 语言
|
| 1292 |
+
use_hiragana: 是否使用平假名(VC 部忽略此参数,始终用拼音)
|
| 1293 |
+
vc_offset_ratio: VC 偏移比例
|
| 1294 |
+
vc_overlap_ratio: VC overlap 比例
|
| 1295 |
+
vc_separator: VC 别名分隔符
|
| 1296 |
+
|
| 1297 |
+
返回:
|
| 1298 |
+
VC 条目列表
|
| 1299 |
+
"""
|
| 1300 |
+
entries = []
|
| 1301 |
+
|
| 1302 |
+
if language not in ('chinese', 'zh', 'mandarin'):
|
| 1303 |
+
# 非中文暂不支持 CVVC
|
| 1304 |
+
return entries
|
| 1305 |
+
|
| 1306 |
+
# 加载 presamp.ini 映射
|
| 1307 |
+
vowel_map, consonant_map = self._load_presamp_mapping()
|
| 1308 |
+
if not vowel_map or not consonant_map:
|
| 1309 |
+
self._log("警告: 无法加载 presamp.ini 映射,跳过 VC 部生成")
|
| 1310 |
+
return entries
|
| 1311 |
+
|
| 1312 |
+
intervals = list(phones_tier)
|
| 1313 |
+
|
| 1314 |
+
# 解析所有音节,提取韵母和声母信息
|
| 1315 |
+
syllables = []
|
| 1316 |
+
i = 0
|
| 1317 |
+
|
| 1318 |
+
while i < len(intervals):
|
| 1319 |
+
interval = intervals[i]
|
| 1320 |
+
phone = interval.mark.strip()
|
| 1321 |
+
|
| 1322 |
+
if phone in SKIP_MARKS:
|
| 1323 |
+
i += 1
|
| 1324 |
+
continue
|
| 1325 |
+
|
| 1326 |
+
# 解析一个完整音节:(辅音) + (介音) + 元音 + (韵尾)
|
| 1327 |
+
syllable_phones = []
|
| 1328 |
+
syllable_start = interval.minTime * 1000
|
| 1329 |
+
syllable_end = interval.maxTime * 1000
|
| 1330 |
+
consonant_duration = 0
|
| 1331 |
+
vowel_start = syllable_start
|
| 1332 |
+
vowel_end = syllable_end
|
| 1333 |
+
has_consonant = False
|
| 1334 |
+
|
| 1335 |
+
# 1. 检查是否有声母(辅音)
|
| 1336 |
+
if is_consonant(phone, language):
|
| 1337 |
+
syllable_phones.append(phone)
|
| 1338 |
+
consonant_duration = interval.maxTime * 1000 - syllable_start
|
| 1339 |
+
has_consonant = True
|
| 1340 |
+
i += 1
|
| 1341 |
|
| 1342 |
+
# 检查下一个音素
|
| 1343 |
+
if i < len(intervals):
|
| 1344 |
+
next_interval = intervals[i]
|
| 1345 |
+
next_phone = next_interval.mark.strip()
|
| 1346 |
+
|
| 1347 |
+
if next_phone not in SKIP_MARKS:
|
| 1348 |
+
phone = next_phone
|
| 1349 |
+
syllable_end = next_interval.maxTime * 1000
|
| 1350 |
+
vowel_start = next_interval.minTime * 1000
|
| 1351 |
+
else:
|
| 1352 |
+
# 只有辅音,没有元音,跳过
|
| 1353 |
+
continue
|
| 1354 |
+
else:
|
| 1355 |
+
# 只有辅音,没有元音,跳过
|
| 1356 |
+
continue
|
| 1357 |
+
|
| 1358 |
+
# 2. 检查是否有介音(j, w, ɥ)
|
| 1359 |
+
phone_base = _strip_tone(phone)
|
| 1360 |
+
if phone_base in CHINESE_MEDIALS:
|
| 1361 |
+
syllable_phones.append(phone)
|
| 1362 |
+
i += 1
|
| 1363 |
|
| 1364 |
+
# 检查下一个音素(必须是元音)
|
| 1365 |
+
if i < len(intervals):
|
| 1366 |
+
next_interval = intervals[i]
|
| 1367 |
+
next_phone = next_interval.mark.strip()
|
| 1368 |
+
|
| 1369 |
+
if next_phone not in SKIP_MARKS:
|
| 1370 |
+
phone = next_phone
|
| 1371 |
+
syllable_end = next_interval.maxTime * 1000
|
| 1372 |
+
else:
|
| 1373 |
+
# 只有介音,没有元音,跳过
|
| 1374 |
+
continue
|
| 1375 |
+
else:
|
| 1376 |
+
# 只有介音,没有元音,跳过
|
| 1377 |
continue
|
| 1378 |
+
|
| 1379 |
+
# 3. 必须有韵母(元音)
|
| 1380 |
+
if is_vowel(phone, language):
|
| 1381 |
+
syllable_phones.append(phone)
|
| 1382 |
+
vowel_end = interval.maxTime * 1000
|
| 1383 |
+
if not consonant_duration:
|
| 1384 |
+
# 零声母,辅音时长设为元音前30ms
|
| 1385 |
+
consonant_duration = min(30, (vowel_end - vowel_start) * 0.2)
|
| 1386 |
+
syllable_end = vowel_end
|
| 1387 |
+
i += 1
|
| 1388 |
|
| 1389 |
+
# 4. 检查是否有韵尾(n, ng, i, u)
|
| 1390 |
+
if i < len(intervals):
|
| 1391 |
+
next_interval = intervals[i]
|
| 1392 |
+
next_phone = next_interval.mark.strip()
|
| 1393 |
+
|
| 1394 |
+
if next_phone not in SKIP_MARKS:
|
| 1395 |
+
# 检查是否是韵尾
|
| 1396 |
+
next_phone_base = _strip_tone(next_phone)
|
| 1397 |
+
if next_phone_base in CHINESE_CODAS:
|
| 1398 |
+
syllable_phones.append(next_phone)
|
| 1399 |
+
syllable_end = next_interval.maxTime * 1000
|
| 1400 |
+
vowel_end = next_interval.maxTime * 1000
|
| 1401 |
+
i += 1
|
| 1402 |
+
|
| 1403 |
+
# 5. 将音节转换为拼音并保存
|
| 1404 |
+
pinyin = self._syllable_to_pinyin(syllable_phones, language, False)
|
| 1405 |
+
if pinyin:
|
| 1406 |
+
# 使用 presamp.ini 映射查找韵母和声母
|
| 1407 |
+
vowel_part = self._find_vowel_in_mapping(pinyin, vowel_map)
|
| 1408 |
+
consonant_part = self._find_consonant_in_mapping(pinyin, consonant_map) if has_consonant else None
|
| 1409 |
+
|
| 1410 |
+
if vowel_part:
|
| 1411 |
+
syllables.append({
|
| 1412 |
+
'pinyin': pinyin,
|
| 1413 |
+
'vowel_part': vowel_part,
|
| 1414 |
+
'consonant_part': consonant_part,
|
| 1415 |
+
'vowel_start': vowel_start,
|
| 1416 |
+
'vowel_end': vowel_end,
|
| 1417 |
+
'syllable_end': syllable_end
|
| 1418 |
+
})
|
| 1419 |
+
else:
|
| 1420 |
+
# 不是元音,跳过
|
| 1421 |
+
i += 1
|
| 1422 |
+
|
| 1423 |
+
# 生成 VC 对:当前音节的韵母 + 下一个音节的声母
|
| 1424 |
+
for idx in range(len(syllables) - 1):
|
| 1425 |
+
current = syllables[idx]
|
| 1426 |
+
next_syl = syllables[idx + 1]
|
| 1427 |
+
|
| 1428 |
+
# 获取下一个音节的声母
|
| 1429 |
+
next_consonant = next_syl.get('consonant_part')
|
| 1430 |
+
|
| 1431 |
+
# 如果下一个音节没有声母(零声母),跳过
|
| 1432 |
+
if not next_consonant:
|
| 1433 |
+
continue
|
| 1434 |
|
| 1435 |
+
# 生成 VC 别名
|
| 1436 |
+
vc_alias = f"{current['vowel_part']}{vc_separator}{next_consonant}"
|
| 1437 |
+
|
| 1438 |
+
# 计算 VC 参数
|
| 1439 |
+
entry = self._calculate_vc_params(
|
| 1440 |
+
wav_name=wav_name,
|
| 1441 |
+
alias=vc_alias,
|
| 1442 |
+
vowel_start_ms=current['vowel_start'],
|
| 1443 |
+
vowel_end_ms=current['vowel_end'],
|
| 1444 |
+
consonant_end_ms=next_syl['syllable_end'],
|
| 1445 |
+
wav_duration_ms=wav_duration_ms,
|
| 1446 |
+
vc_offset_ratio=vc_offset_ratio,
|
| 1447 |
+
vc_overlap_ratio=vc_overlap_ratio
|
| 1448 |
+
)
|
| 1449 |
+
entries.append(entry)
|
| 1450 |
|
| 1451 |
return entries
|
| 1452 |
|
| 1453 |
+
def _load_presamp_mapping(self) -> Tuple[Dict[str, str], Dict[str, str]]:
|
| 1454 |
+
"""
|
| 1455 |
+
加载中文 CVVC 韵母和声母映射(内置数据)
|
| 1456 |
+
|
| 1457 |
+
返回:
|
| 1458 |
+
(韵母映射字典, 声母映射字典)
|
| 1459 |
+
韵母映射: {完整拼音: 韵母标识}
|
| 1460 |
+
声母映射: {完整拼音: 声母标识}
|
| 1461 |
+
"""
|
| 1462 |
+
vowel_map = {} # {拼音: 韵母标识}
|
| 1463 |
+
consonant_map = {} # {拼音: 声母标识}
|
| 1464 |
+
|
| 1465 |
+
# 内置韵母映射数据(来自 presamp.ini [VOWEL] 部分)
|
| 1466 |
+
vowel_data = {
|
| 1467 |
+
'a': ['a', 'ba', 'pa', 'ma', 'fa', 'da', 'ta', 'na', 'la', 'ga', 'ka', 'ha', 'zha', 'cha', 'sha', 'za', 'ca', 'sa', 'ya', 'lia', 'jia', 'qia', 'xia', 'wa', 'gua', 'kua', 'hua', 'zhua', 'shua', 'dia'],
|
| 1468 |
+
'ai': ['ai', 'bai', 'pai', 'mai', 'dai', 'tai', 'nai', 'lai', 'gai', 'kai', 'hai', 'zhai', 'chai', 'shai', 'zai', 'cai', 'sai', 'wai', 'guai', 'kuai', 'huai', 'zhuai', 'chuai', 'shuai'],
|
| 1469 |
+
'an': ['an', 'ban', 'pan', 'man', 'fan', 'dan', 'tan', 'nan', 'lan', 'gan', 'kan', 'han', 'zhan', 'chan', 'shan', 'ran', 'zan', 'can', 'san', 'wan', 'duan', 'tuan', 'nuan', 'luan', 'guan', 'kuan', 'huan', 'zhuan', 'chuan', 'shuan', 'ruan', 'zuan', 'cuan', 'suan'],
|
| 1470 |
+
'ang': ['ang', 'bang', 'pang', 'mang', 'fang', 'dang', 'tang', 'nang', 'lang', 'gang', 'kang', 'hang', 'zhang', 'chang', 'shang', 'rang', 'zang', 'cang', 'sang', 'yang', 'liang', 'jiang', 'qiang', 'xiang', 'wang', 'guang', 'kuang', 'huang', 'zhuang', 'chuang', 'shuang', 'niang'],
|
| 1471 |
+
'ao': ['ao', 'bao', 'pao', 'mao', 'dao', 'tao', 'nao', 'lao', 'gao', 'kao', 'hao', 'zhao', 'chao', 'shao', 'rao', 'zao', 'cao', 'sao', 'yao', 'biao', 'piao', 'miao', 'diao', 'tiao', 'niao', 'liao', 'jiao', 'qiao', 'xiao'],
|
| 1472 |
+
'e': ['e', 'me', 'de', 'te', 'ne', 'le', 'ge', 'ke', 'he', 'zhe', 'che', 'she', 're', 'ze', 'ce', 'se'],
|
| 1473 |
+
'e0': ['ye', 'bie', 'pie', 'mie', 'die', 'tie', 'nie', 'lie', 'jie', 'qie', 'xie', 'yue', 'nue', 'lue', 'jue', 'que', 'xue'],
|
| 1474 |
+
'ei': ['ei', 'bei', 'pei', 'mei', 'fei', 'dei', 'tei', 'nei', 'lei', 'gei', 'kei', 'hei', 'zhei', 'shei', 'zei', 'wei', 'dui', 'tui', 'gui', 'kui', 'hui', 'zhui', 'chui', 'shui', 'rui', 'zui', 'cui', 'sui'],
|
| 1475 |
+
'en': ['en', 'ben', 'pen', 'men', 'fen', 'nen', 'gen', 'ken', 'hen', 'zhen', 'chen', 'shen', 'ren', 'zen', 'cen', 'sen', 'wen', 'dun', 'tun', 'lun', 'gun', 'kun', 'hun', 'zhun', 'chun', 'shun', 'run', 'zun', 'cun', 'sun'],
|
| 1476 |
+
'en0': ['yan', 'bian', 'pian', 'mian', 'dian', 'tian', 'nian', 'lian', 'jian', 'qian', 'xian', 'yuan', 'juan', 'quan', 'xuan'],
|
| 1477 |
+
'eng': ['beng', 'peng', 'meng', 'feng', 'deng', 'teng', 'neng', 'leng', 'geng', 'keng', 'heng', 'weng', 'zheng', 'cheng', 'sheng', 'reng', 'zeng', 'ceng', 'seng'],
|
| 1478 |
+
'er': ['er'],
|
| 1479 |
+
'i': ['bi', 'pi', 'mi', 'di', 'ti', 'ni', 'li', 'ji', 'qi', 'xi', 'yi'],
|
| 1480 |
+
'in': ['yin', 'bin', 'pin', 'min', 'nin', 'lin', 'jin', 'qin', 'xin'],
|
| 1481 |
+
'ing': ['ying', 'bing', 'ping', 'ming', 'ding', 'ting', 'ning', 'ling', 'jing', 'qing', 'xing'],
|
| 1482 |
+
'i0': ['zi', 'ci', 'si'],
|
| 1483 |
+
'ir': ['zhi', 'chi', 'shi', 'ri'],
|
| 1484 |
+
'o': ['bo', 'po', 'mo', 'fo', 'wo', 'duo', 'tuo', 'nuo', 'luo', 'guo', 'kuo', 'huo', 'zhuo', 'chuo', 'shuo', 'ruo', 'zuo', 'cuo', 'suo'],
|
| 1485 |
+
'ong': ['dong', 'tong', 'nong', 'long', 'gong', 'kong', 'hong', 'zhong', 'chong', 'rong', 'zong', 'cong', 'song', 'yong', 'jiong', 'qiong', 'xiong'],
|
| 1486 |
+
'ou': ['ou', 'pou', 'mou', 'fou', 'dou', 'tou', 'lou', 'gou', 'kou', 'hou', 'zhou', 'chou', 'shou', 'rou', 'zou', 'cou', 'sou', 'you', 'miu', 'diu', 'niu', 'liu', 'jiu', 'qiu', 'xiu'],
|
| 1487 |
+
'u': ['bu', 'pu', 'mu', 'fu', 'du', 'tu', 'nu', 'lu', 'gu', 'ku', 'hu', 'zhu', 'chu', 'shu', 'ru', 'zu', 'cu', 'su', 'wu'],
|
| 1488 |
+
'v': ['yu', 'nv', 'lv', 'ju', 'qu', 'xu'],
|
| 1489 |
+
'vn': ['yun', 'jun', 'qun', 'xun'],
|
| 1490 |
+
}
|
| 1491 |
+
|
| 1492 |
+
# 内置声母映射数据(来自 presamp.ini [CONSONANT] 部分)
|
| 1493 |
+
consonant_data = {
|
| 1494 |
+
'b': ['ba', 'bai', 'ban', 'bang', 'bao', 'biao', 'bie', 'bei', 'ben', 'bian', 'beng', 'bi', 'bin', 'bing', 'bo', 'bu'],
|
| 1495 |
+
'p': ['pa', 'pai', 'pan', 'pang', 'pao', 'piao', 'pie', 'pei', 'pen', 'pian', 'peng', 'pi', 'pin', 'ping', 'po', 'pou', 'pu'],
|
| 1496 |
+
'm': ['ma', 'mai', 'man', 'mang', 'mao', 'me', 'mei', 'men', 'meng', 'mo', 'mou', 'mu'],
|
| 1497 |
+
'f': ['fa', 'fan', 'fang', 'fei', 'fen', 'feng', 'fo', 'fou', 'fu'],
|
| 1498 |
+
'd': ['da', 'dia', 'dai', 'dan', 'duan', 'dang', 'dao', 'diao', 'de', 'die', 'dei', 'dui', 'dun', 'dian', 'deng', 'di', 'ding', 'duo', 'dong', 'dou', 'diu', 'du'],
|
| 1499 |
+
't': ['ta', 'tai', 'tan', 'tuan', 'tang', 'tao', 'tiao', 'te', 'tie', 'tei', 'tui', 'tun', 'tian', 'teng', 'ti', 'ting', 'tuo', 'tong', 'tou', 'tu'],
|
| 1500 |
+
'n': ['na', 'nai', 'nan', 'nuan', 'nang', 'nao', 'ne', 'nue', 'nei', 'nen', 'neng', 'nuo', 'nong', 'nu', 'nv'],
|
| 1501 |
+
'l': ['la', 'lai', 'lan', 'luan', 'lang', 'lao', 'le', 'lue', 'lei', 'lun', 'leng', 'luo', 'long', 'lou', 'lu', 'lv'],
|
| 1502 |
+
'g': ['ga', 'gua', 'gai', 'guai', 'gan', 'guan', 'gang', 'guang', 'gao', 'ge', 'gei', 'gui', 'gen', 'gun', 'geng', 'guo', 'gong', 'gou', 'gu'],
|
| 1503 |
+
'k': ['ka', 'kua', 'kai', 'kuai', 'kan', 'kuan', 'kang', 'kuang', 'kao', 'ke', 'kei', 'kui', 'ken', 'kun', 'keng', 'kuo', 'kong', 'kou', 'ku'],
|
| 1504 |
+
'h': ['ha', 'hai', 'han', 'hang', 'hao', 'he', 'hei', 'hen', 'heng', 'hong', 'hou'],
|
| 1505 |
+
'zh': ['zha', 'zhua', 'zhai', 'zhuai', 'zhan', 'zhuan', 'zhang', 'zhuang', 'zhao', 'zhe', 'zhei', 'zhui', 'zhen', 'zhun', 'zheng', 'zhi', 'zhuo', 'zhong', 'zhou', 'zhu'],
|
| 1506 |
+
'ch': ['cha', 'chai', 'chuai', 'chan', 'chuan', 'chang', 'chuang', 'chao', 'che', 'chui', 'chen', 'chun', 'cheng', 'chi', 'chuo', 'chong', 'chou', 'chu'],
|
| 1507 |
+
'sh': ['sha', 'shai', 'shan', 'shang', 'shao', 'she', 'shei', 'shen', 'sheng', 'shi', 'shou'],
|
| 1508 |
+
'z': ['za', 'zai', 'zan', 'zuan', 'zang', 'zao', 'ze', 'zei', 'zui', 'zen', 'zun', 'zeng', 'zi', 'zuo', 'zong', 'zou', 'zu'],
|
| 1509 |
+
'c': ['ca', 'cai', 'can', 'cuan', 'cang', 'cao', 'ce', 'cui', 'cen', 'cun', 'ceng', 'ci', 'cuo', 'cong', 'cou', 'cu'],
|
| 1510 |
+
's': ['sa', 'sai', 'san', 'sang', 'sao', 'se', 'sen', 'seng', 'si', 'song', 'sou'],
|
| 1511 |
+
'y': ['ya', 'yang', 'yao', 'ye', 'yan', 'yi', 'yin', 'ying', 'yong', 'you'],
|
| 1512 |
+
'ly': ['lia', 'liang', 'liao', 'lie', 'lian', 'li', 'lin', 'ling', 'liu'],
|
| 1513 |
+
'j': ['jia', 'jiang', 'jiao', 'jie', 'jue', 'jian', 'juan', 'ji', 'jin', 'jing', 'jiong', 'jiu', 'ju', 'jun'],
|
| 1514 |
+
'q': ['qia', 'qiang', 'qiao', 'qie', 'que', 'qian', 'quan', 'qi', 'qin', 'qing', 'qiong', 'qiu', 'qu', 'qun'],
|
| 1515 |
+
'xy': ['xia', 'xiang', 'xiao', 'xie', 'xian', 'xi', 'xin', 'xing', 'xiong', 'xiu'],
|
| 1516 |
+
'w': ['wa', 'wai', 'wan', 'wang', 'wei', 'wen', 'weng', 'wo', 'wu'],
|
| 1517 |
+
'hw': ['hua', 'huai', 'huan', 'huang', 'hui', 'hun', 'huo', 'hu'],
|
| 1518 |
+
'shw': ['shua', 'shuai', 'shuan', 'shuang', 'shui', 'shun', 'shuo', 'shu'],
|
| 1519 |
+
'r': ['ran', 'ruan', 'rang', 'rao', 're', 'rui', 'ren', 'run', 'reng', 'ri', 'ruo', 'rong', 'rou', 'ru'],
|
| 1520 |
+
'sw': ['suan', 'sui', 'sun', 'suo', 'su'],
|
| 1521 |
+
'ny': ['niang', 'niao', 'nie', 'nian', 'ni', 'nin', 'ning', 'niu'],
|
| 1522 |
+
'my': ['miao', 'mie', 'mian', 'mi', 'min', 'ming', 'miu'],
|
| 1523 |
+
'v': ['yu', 'yue', 'yuan', 'yun'],
|
| 1524 |
+
'xw': ['xue', 'xuan', 'xu', 'xun'],
|
| 1525 |
+
}
|
| 1526 |
+
|
| 1527 |
+
# 构建韵母映射
|
| 1528 |
+
for vowel_id, pinyins in vowel_data.items():
|
| 1529 |
+
for pinyin in pinyins:
|
| 1530 |
+
vowel_map[pinyin] = vowel_id
|
| 1531 |
+
|
| 1532 |
+
# 构建声母映射
|
| 1533 |
+
for consonant_id, pinyins in consonant_data.items():
|
| 1534 |
+
for pinyin in pinyins:
|
| 1535 |
+
consonant_map[pinyin] = consonant_id
|
| 1536 |
+
|
| 1537 |
+
self._log(f"加载内置 CVVC 映射: {len(vowel_map)} 个韵母映射, {len(consonant_map)} 个声母映射")
|
| 1538 |
+
return vowel_map, consonant_map
|
| 1539 |
+
|
| 1540 |
+
def _find_vowel_in_mapping(self, pinyin: str, vowel_map: Dict[str, str]) -> Optional[str]:
|
| 1541 |
+
"""
|
| 1542 |
+
在韵母映射中查找拼音对应的韵母标识
|
| 1543 |
+
|
| 1544 |
+
参数:
|
| 1545 |
+
pinyin: 完整拼音
|
| 1546 |
+
vowel_map: 韵母映射字典
|
| 1547 |
+
|
| 1548 |
+
返回:
|
| 1549 |
+
韵母标识,如果未找到则返回 None
|
| 1550 |
+
"""
|
| 1551 |
+
return vowel_map.get(pinyin)
|
| 1552 |
+
|
| 1553 |
+
def _find_consonant_in_mapping(self, pinyin: str, consonant_map: Dict[str, str]) -> Optional[str]:
|
| 1554 |
+
"""
|
| 1555 |
+
在声母映射中查找拼音对应的声母标识
|
| 1556 |
+
|
| 1557 |
+
参数:
|
| 1558 |
+
pinyin: 完整拼音
|
| 1559 |
+
consonant_map: 声母映射字典
|
| 1560 |
+
|
| 1561 |
+
返回:
|
| 1562 |
+
声母标识,如果未找到则返回 None
|
| 1563 |
+
"""
|
| 1564 |
+
return consonant_map.get(pinyin)
|
| 1565 |
+
|
| 1566 |
def _calculate_oto_params(
|
| 1567 |
self,
|
| 1568 |
wav_name: str,
|
|
|
|
| 1602 |
"segment_duration": segment_duration, # 用于排序
|
| 1603 |
}
|
| 1604 |
|
| 1605 |
+
def _calculate_vc_params(
|
| 1606 |
+
self,
|
| 1607 |
+
wav_name: str,
|
| 1608 |
+
alias: str,
|
| 1609 |
+
vowel_start_ms: float,
|
| 1610 |
+
vowel_end_ms: float,
|
| 1611 |
+
consonant_end_ms: float,
|
| 1612 |
+
wav_duration_ms: float,
|
| 1613 |
+
vc_offset_ratio: float,
|
| 1614 |
+
vc_overlap_ratio: float
|
| 1615 |
+
) -> Dict:
|
| 1616 |
+
"""
|
| 1617 |
+
计算 VC 部的 oto.ini 参数
|
| 1618 |
+
|
| 1619 |
+
VC 部从元音后半段开始,到辅音结束
|
| 1620 |
+
|
| 1621 |
+
参数:
|
| 1622 |
+
wav_name: 音频文件名
|
| 1623 |
+
alias: VC 别名
|
| 1624 |
+
vowel_start_ms: 元音开始时间
|
| 1625 |
+
vowel_end_ms: 元音结束时间(即辅音开始时间)
|
| 1626 |
+
consonant_end_ms: 辅音结束时间
|
| 1627 |
+
wav_duration_ms: 音频总时长
|
| 1628 |
+
vc_offset_ratio: VC 偏移比例
|
| 1629 |
+
vc_overlap_ratio: VC overlap 比例
|
| 1630 |
+
|
| 1631 |
+
返回:
|
| 1632 |
+
oto 参数字典
|
| 1633 |
+
"""
|
| 1634 |
+
vowel_duration = vowel_end_ms - vowel_start_ms
|
| 1635 |
+
|
| 1636 |
+
# offset: 元音后半段位置
|
| 1637 |
+
offset = vowel_end_ms - vowel_duration * vc_offset_ratio
|
| 1638 |
+
|
| 1639 |
+
# 总时长(从 offset 到辅音结束)
|
| 1640 |
+
segment_duration = consonant_end_ms - offset
|
| 1641 |
+
|
| 1642 |
+
# preutterance: 从 offset 到辅音开始(即元音结束)的距离
|
| 1643 |
+
preutterance = vowel_end_ms - offset
|
| 1644 |
+
|
| 1645 |
+
# consonant: 固定区域,较短
|
| 1646 |
+
consonant = min(30, segment_duration * 0.3)
|
| 1647 |
+
|
| 1648 |
+
# overlap: 较大,平滑过渡
|
| 1649 |
+
overlap = preutterance * vc_overlap_ratio
|
| 1650 |
+
|
| 1651 |
+
# cutoff: 负值,表示总时长
|
| 1652 |
+
cutoff = -segment_duration
|
| 1653 |
+
|
| 1654 |
+
return {
|
| 1655 |
+
"wav_name": wav_name,
|
| 1656 |
+
"alias": alias,
|
| 1657 |
+
"offset": round(offset, 1),
|
| 1658 |
+
"consonant": round(consonant, 1),
|
| 1659 |
+
"cutoff": round(cutoff, 1),
|
| 1660 |
+
"preutterance": round(preutterance, 1),
|
| 1661 |
+
"overlap": round(overlap, 1),
|
| 1662 |
+
"segment_duration": segment_duration,
|
| 1663 |
+
"is_vc": True # 标记为 VC 部
|
| 1664 |
+
}
|
| 1665 |
+
|
| 1666 |
def _filter_by_alias(
|
| 1667 |
self,
|
| 1668 |
entries: List[Dict],
|
|
|
|
| 2310 |
|
| 2311 |
# 获取有效的元音列表(用于验证组合)
|
| 2312 |
if language in ('chinese', 'zh', 'mandarin'):
|
| 2313 |
+
valid_vowels = {'a', 'o', 'e', 'i', 'u', 'v',
|
| 2314 |
+
'ai', 'ei', 'ao', 'ou',
|
| 2315 |
+
'an', 'en', 'ang', 'eng', 'ong',
|
| 2316 |
+
'ia', 'ie', 'iao', 'iu', 'ian', 'in', 'iang', 'ing', 'iong',
|
| 2317 |
+
'ua', 'uo', 'uai', 'ui', 'uan', 'un', 'uang', 'ueng',
|
| 2318 |
+
've', 'van', 'vn', 'er'}
|
| 2319 |
else:
|
| 2320 |
valid_vowels = {'a', 'i', 'u', 'e', 'o'}
|
| 2321 |
|
|
|
|
| 2432 |
all_consonants = ['b', 'p', 'm', 'f', 'd', 't', 'n', 'l', 'g', 'k', 'h',
|
| 2433 |
'j', 'q', 'x', 'zh', 'ch', 'sh', 'r', 'z', 'c', 's', 'y', 'w']
|
| 2434 |
|
| 2435 |
+
# 中文所有可能的韵母(包含所有标准韵母)
|
| 2436 |
+
all_vowels = ['a', 'o', 'e', 'i', 'u', 'v',
|
| 2437 |
+
'ai', 'ei', 'ao', 'ou',
|
| 2438 |
+
'an', 'en', 'ang', 'eng', 'ong',
|
| 2439 |
+
'ia', 'ie', 'iao', 'iu', 'ian', 'in', 'iang', 'ing', 'iong',
|
| 2440 |
+
'ua', 'uo', 'uai', 'ui', 'uan', 'un', 'uang', 'ueng',
|
| 2441 |
+
've', 'van', 'vn', 'er']
|
| 2442 |
|
| 2443 |
fuzzy_count = 0
|
| 2444 |
|
src/gui_cloud.py
CHANGED
|
@@ -60,18 +60,10 @@ def safe_gradio_handler(func):
|
|
| 60 |
Gradio 处理函数的安全包装器
|
| 61 |
|
| 62 |
捕获所有异常并返回友好的错误信息,避免 Gradio 显示默认的"错误"状态
|
| 63 |
-
同时确保异常时释放并发计数,防止计数滞留
|
| 64 |
"""
|
| 65 |
import functools
|
| 66 |
import traceback
|
| 67 |
|
| 68 |
-
# 需要管理并发计数的函数列表
|
| 69 |
-
CONCURRENCY_MANAGED_FUNCS = {
|
| 70 |
-
'process_make_voicebank',
|
| 71 |
-
'process_export_voicebank',
|
| 72 |
-
'process_mfa_realign'
|
| 73 |
-
}
|
| 74 |
-
|
| 75 |
@functools.wraps(func)
|
| 76 |
def wrapper(*args, **kwargs):
|
| 77 |
try:
|
|
@@ -81,15 +73,11 @@ def safe_gradio_handler(func):
|
|
| 81 |
error_trace = traceback.format_exc()
|
| 82 |
logger.error(f"处理函数 {func.__name__} 发生异常:\n{error_trace}")
|
| 83 |
|
| 84 |
-
# 如果是并发管理的函数,确保释放并发计数
|
| 85 |
-
# 注意:函数内部可能已经调用了 decrement_concurrency(),
|
| 86 |
-
# 但如果异常发生在 increment 之后、decrement 之前,这里需要补救
|
| 87 |
-
# decrement_concurrency() 内部有 max(0, ...) 保护,不会变成负数
|
| 88 |
-
if func.__name__ in CONCURRENCY_MANAGED_FUNCS:
|
| 89 |
-
decrement_concurrency()
|
| 90 |
-
logger.info(f"异常处理:已释放 {func.__name__} 的并发计数")
|
| 91 |
-
|
| 92 |
# 根据函数返回值数量返回错误信息
|
|
|
|
|
|
|
|
|
|
|
|
|
| 93 |
error_msg = f"❌ 系统错误: {str(e)}"
|
| 94 |
error_detail = f"异常类型: {type(e).__name__}\n详情: {str(e)}"
|
| 95 |
|
|
@@ -143,144 +131,6 @@ def create_temp_workspace() -> str:
|
|
| 143 |
return workspace
|
| 144 |
|
| 145 |
|
| 146 |
-
def cleanup_gradio_cache(max_age_hours: float = 1.0):
|
| 147 |
-
"""
|
| 148 |
-
清理 Gradio 临时文件缓存
|
| 149 |
-
|
| 150 |
-
参数:
|
| 151 |
-
max_age_hours: 文件最大保留时间(小时),超过此时间的文件将被删除
|
| 152 |
-
"""
|
| 153 |
-
import time
|
| 154 |
-
|
| 155 |
-
gradio_tmp_dir = os.path.join(tempfile.gettempdir(), "gradio")
|
| 156 |
-
if not os.path.exists(gradio_tmp_dir):
|
| 157 |
-
return
|
| 158 |
-
|
| 159 |
-
current_time = time.time()
|
| 160 |
-
max_age_seconds = max_age_hours * 3600
|
| 161 |
-
cleaned_count = 0
|
| 162 |
-
cleaned_size = 0
|
| 163 |
-
|
| 164 |
-
try:
|
| 165 |
-
for root, dirs, files in os.walk(gradio_tmp_dir, topdown=False):
|
| 166 |
-
for name in files:
|
| 167 |
-
file_path = os.path.join(root, name)
|
| 168 |
-
try:
|
| 169 |
-
file_age = current_time - os.path.getmtime(file_path)
|
| 170 |
-
if file_age > max_age_seconds:
|
| 171 |
-
file_size = os.path.getsize(file_path)
|
| 172 |
-
os.remove(file_path)
|
| 173 |
-
cleaned_count += 1
|
| 174 |
-
cleaned_size += file_size
|
| 175 |
-
except Exception:
|
| 176 |
-
pass
|
| 177 |
-
|
| 178 |
-
# 删除空目录
|
| 179 |
-
for name in dirs:
|
| 180 |
-
dir_path = os.path.join(root, name)
|
| 181 |
-
try:
|
| 182 |
-
if not os.listdir(dir_path):
|
| 183 |
-
os.rmdir(dir_path)
|
| 184 |
-
except Exception:
|
| 185 |
-
pass
|
| 186 |
-
|
| 187 |
-
if cleaned_count > 0:
|
| 188 |
-
size_mb = cleaned_size / (1024 * 1024)
|
| 189 |
-
logger.info(f"Gradio 缓存清理: 删除 {cleaned_count} 个文件, 释放 {size_mb:.1f} MB")
|
| 190 |
-
except Exception as e:
|
| 191 |
-
logger.warning(f"Gradio 缓存清理失败: {e}")
|
| 192 |
-
|
| 193 |
-
|
| 194 |
-
def cleanup_old_jinriki_workspaces(max_age_hours: float = 2.0):
|
| 195 |
-
"""
|
| 196 |
-
清理旧的 jinriki 工作空间
|
| 197 |
-
|
| 198 |
-
参数:
|
| 199 |
-
max_age_hours: 工作空间最大保留时间(小时)
|
| 200 |
-
"""
|
| 201 |
-
import time
|
| 202 |
-
|
| 203 |
-
current_time = time.time()
|
| 204 |
-
max_age_seconds = max_age_hours * 3600
|
| 205 |
-
cleaned_count = 0
|
| 206 |
-
|
| 207 |
-
try:
|
| 208 |
-
for item in os.listdir(CloudConfig.TEMP_BASE):
|
| 209 |
-
if item.startswith("jinriki_"):
|
| 210 |
-
workspace_path = os.path.join(CloudConfig.TEMP_BASE, item)
|
| 211 |
-
if os.path.isdir(workspace_path):
|
| 212 |
-
try:
|
| 213 |
-
dir_age = current_time - os.path.getmtime(workspace_path)
|
| 214 |
-
if dir_age > max_age_seconds:
|
| 215 |
-
shutil.rmtree(workspace_path)
|
| 216 |
-
cleaned_count += 1
|
| 217 |
-
except Exception:
|
| 218 |
-
pass
|
| 219 |
-
|
| 220 |
-
if cleaned_count > 0:
|
| 221 |
-
logger.info(f"工作空间清理: 删除 {cleaned_count} 个旧工作空间")
|
| 222 |
-
except Exception as e:
|
| 223 |
-
logger.warning(f"工作空间清理失败: {e}")
|
| 224 |
-
|
| 225 |
-
|
| 226 |
-
def start_periodic_cleanup(interval_minutes: int = 15):
|
| 227 |
-
"""
|
| 228 |
-
启动定期清理任务
|
| 229 |
-
|
| 230 |
-
参数:
|
| 231 |
-
interval_minutes: 清理间隔(分钟),默认15分钟
|
| 232 |
-
"""
|
| 233 |
-
import time
|
| 234 |
-
|
| 235 |
-
def cleanup_task():
|
| 236 |
-
while True:
|
| 237 |
-
try:
|
| 238 |
-
time.sleep(interval_minutes * 60)
|
| 239 |
-
logger.info("执行定期清理...")
|
| 240 |
-
cleanup_gradio_cache(max_age_hours=0.5) # 30分钟以上的缓存
|
| 241 |
-
cleanup_old_jinriki_workspaces(max_age_hours=1.0) # 1小时以上的工作空间
|
| 242 |
-
except Exception as e:
|
| 243 |
-
logger.error(f"定期清理任务异常: {e}")
|
| 244 |
-
|
| 245 |
-
cleanup_thread = threading.Thread(target=cleanup_task, daemon=True)
|
| 246 |
-
cleanup_thread.start()
|
| 247 |
-
logger.info(f"定期清理任务已启动,间隔 {interval_minutes} 分钟")
|
| 248 |
-
|
| 249 |
-
|
| 250 |
-
def check_disk_space(min_mb: int = 100) -> Tuple[bool, str]:
|
| 251 |
-
"""
|
| 252 |
-
检查磁盘空间是否充足
|
| 253 |
-
|
| 254 |
-
参数:
|
| 255 |
-
min_mb: 最小可用空间(MB)
|
| 256 |
-
|
| 257 |
-
返回:
|
| 258 |
-
(是否充足, 消息)
|
| 259 |
-
"""
|
| 260 |
-
try:
|
| 261 |
-
import shutil
|
| 262 |
-
total, used, free = shutil.disk_usage("/tmp")
|
| 263 |
-
free_mb = free / (1024 * 1024)
|
| 264 |
-
|
| 265 |
-
if free_mb < min_mb:
|
| 266 |
-
# 尝试清理
|
| 267 |
-
logger.warning(f"磁盘空间不足 ({free_mb:.0f} MB),尝试清理...")
|
| 268 |
-
cleanup_gradio_cache(max_age_hours=0) # 清理所有缓存
|
| 269 |
-
cleanup_old_jinriki_workspaces(max_age_hours=0) # 清理所有工作空间
|
| 270 |
-
|
| 271 |
-
# 重新检查
|
| 272 |
-
total, used, free = shutil.disk_usage("/tmp")
|
| 273 |
-
free_mb = free / (1024 * 1024)
|
| 274 |
-
|
| 275 |
-
if free_mb < min_mb:
|
| 276 |
-
return False, f"磁盘空间不足,仅剩 {free_mb:.0f} MB,请稍后重试"
|
| 277 |
-
|
| 278 |
-
return True, f"可用空间: {free_mb:.0f} MB"
|
| 279 |
-
except Exception as e:
|
| 280 |
-
logger.warning(f"检查磁盘空间失败: {e}")
|
| 281 |
-
return True, "无法检查磁盘空间" # 无法检查时允许继续
|
| 282 |
-
|
| 283 |
-
|
| 284 |
def cleanup_workspace(workspace: str):
|
| 285 |
"""清理工作空间"""
|
| 286 |
if workspace and os.path.exists(workspace):
|
|
@@ -351,47 +201,34 @@ def get_audio_duration(file_path: str) -> Optional[float]:
|
|
| 351 |
|
| 352 |
返回: 时长秒数,失败返回 None
|
| 353 |
"""
|
| 354 |
-
import subprocess
|
| 355 |
-
|
| 356 |
-
try:
|
| 357 |
-
# 优先使用 ffprobe(更轻量,不需要解码整个文件)
|
| 358 |
-
cmd = [
|
| 359 |
-
'ffprobe', '-v', 'error',
|
| 360 |
-
'-show_entries', 'format=duration',
|
| 361 |
-
'-of', 'default=noprint_wrappers=1:nokey=1',
|
| 362 |
-
file_path
|
| 363 |
-
]
|
| 364 |
-
result = subprocess.run(cmd, capture_output=True, text=True, timeout=10)
|
| 365 |
-
|
| 366 |
-
if result.returncode == 0 and result.stdout.strip():
|
| 367 |
-
duration = float(result.stdout.strip())
|
| 368 |
-
if duration > 0:
|
| 369 |
-
return duration
|
| 370 |
-
|
| 371 |
-
# ffprobe 失败时的错误信息
|
| 372 |
-
if result.stderr:
|
| 373 |
-
logger.debug(f"ffprobe 错误: {result.stderr.strip()}")
|
| 374 |
-
|
| 375 |
-
except subprocess.TimeoutExpired:
|
| 376 |
-
logger.warning(f"ffprobe 超时: {file_path}")
|
| 377 |
-
except (ValueError, Exception) as e:
|
| 378 |
-
logger.debug(f"ffprobe 获取时长失败: {e}")
|
| 379 |
-
|
| 380 |
-
# 回退:对于 WAV 文件,使用 wave 模块
|
| 381 |
try:
|
| 382 |
import wave
|
| 383 |
import contextlib
|
| 384 |
|
|
|
|
| 385 |
if file_path.lower().endswith('.wav'):
|
| 386 |
with contextlib.closing(wave.open(file_path, 'r')) as f:
|
| 387 |
frames = f.getnframes()
|
| 388 |
rate = f.getframerate()
|
| 389 |
return frames / float(rate)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 390 |
except Exception as e:
|
| 391 |
-
logger.
|
| 392 |
-
|
| 393 |
-
logger.warning(f"无法获取音频时长: {os.path.basename(file_path)}")
|
| 394 |
-
return None
|
| 395 |
|
| 396 |
|
| 397 |
# 云端音频时长限制(秒)
|
|
@@ -407,24 +244,15 @@ def validate_audio_upload(files) -> Tuple[bool, str, List[str]]:
|
|
| 407 |
if not files:
|
| 408 |
return False, "请上传音频文件", []
|
| 409 |
|
| 410 |
-
# 调试:记录 Gradio 传入的文件对象类型
|
| 411 |
-
logger.info(f"Gradio 文件对象类型: {type(files)}, 数量: {len(files) if hasattr(files, '__len__') else 'N/A'}")
|
| 412 |
-
|
| 413 |
valid_files = []
|
| 414 |
-
for
|
| 415 |
-
|
| 416 |
-
|
| 417 |
-
# Gradio 6.x 可能直接传入字符串路径
|
| 418 |
-
if isinstance(f, str):
|
| 419 |
-
path = f
|
| 420 |
-
elif hasattr(f, 'name'):
|
| 421 |
path = f.name
|
| 422 |
else:
|
| 423 |
path = str(f)
|
| 424 |
|
| 425 |
if path.lower().endswith(CloudConfig.AUDIO_EXTENSIONS):
|
| 426 |
valid_files.append(path)
|
| 427 |
-
logger.info(f"文���[{i}] 有效路径: {path}")
|
| 428 |
|
| 429 |
if not valid_files:
|
| 430 |
return False, f"未找到有效音频文件,支持格式: {', '.join(CloudConfig.AUDIO_EXTENSIONS)}", []
|
|
@@ -493,12 +321,6 @@ def process_make_voicebank(
|
|
| 493 |
logs.append(msg)
|
| 494 |
logger.info(msg)
|
| 495 |
|
| 496 |
-
# 检查磁盘空间
|
| 497 |
-
space_ok, space_msg = check_disk_space(min_mb=200)
|
| 498 |
-
if not space_ok:
|
| 499 |
-
decrement_concurrency()
|
| 500 |
-
return f"❌ {space_msg}", "", None, None
|
| 501 |
-
|
| 502 |
try:
|
| 503 |
# 导入依赖(放在 try 块内以捕获导入错误)
|
| 504 |
from src.pipeline import PipelineConfig, VoiceBankPipeline
|
|
@@ -541,73 +363,22 @@ def process_make_voicebank(
|
|
| 541 |
os.makedirs(input_dir, exist_ok=True)
|
| 542 |
os.makedirs(bank_dir, exist_ok=True)
|
| 543 |
|
| 544 |
-
# 复制音频文件到输入目录
|
| 545 |
progress(0.05, desc="复制音频文件...")
|
| 546 |
copied_count = 0
|
| 547 |
-
|
| 548 |
-
|
| 549 |
-
for idx, src_path in enumerate(file_paths):
|
| 550 |
-
original_name = os.path.basename(src_path)
|
| 551 |
-
|
| 552 |
-
# 检查源文件
|
| 553 |
if not os.path.exists(src_path):
|
| 554 |
-
|
| 555 |
-
continue
|
| 556 |
-
|
| 557 |
-
src_size = os.path.getsize(src_path)
|
| 558 |
-
if src_size == 0:
|
| 559 |
-
copy_errors.append(f"{original_name}: 文件为空")
|
| 560 |
-
continue
|
| 561 |
-
|
| 562 |
-
# 验证文件是否为有效音频(检查文件头)
|
| 563 |
-
try:
|
| 564 |
-
with open(src_path, 'rb') as f:
|
| 565 |
-
header = f.read(12)
|
| 566 |
-
|
| 567 |
-
# 检查常见音频格式的文件头
|
| 568 |
-
is_valid_audio = False
|
| 569 |
-
if header[:4] == b'RIFF' and header[8:12] == b'WAVE': # WAV
|
| 570 |
-
is_valid_audio = True
|
| 571 |
-
elif header[:4] == b'OggS': # OGG
|
| 572 |
-
is_valid_audio = True
|
| 573 |
-
elif header[:3] == b'ID3' or header[:2] == b'\xff\xfb': # MP3
|
| 574 |
-
is_valid_audio = True
|
| 575 |
-
elif header[:4] == b'fLaC': # FLAC
|
| 576 |
-
is_valid_audio = True
|
| 577 |
-
elif header[4:8] == b'ftyp': # M4A/MP4
|
| 578 |
-
is_valid_audio = True
|
| 579 |
-
|
| 580 |
-
if not is_valid_audio:
|
| 581 |
-
copy_errors.append(f"{original_name}: 无效的音频文件格式 (header: {header[:8].hex()})")
|
| 582 |
-
continue
|
| 583 |
-
|
| 584 |
-
except Exception as e:
|
| 585 |
-
copy_errors.append(f"{original_name}: 无法读取文件头 ({e})")
|
| 586 |
continue
|
| 587 |
-
|
| 588 |
try:
|
| 589 |
-
|
| 590 |
-
_, ext = os.path.splitext(original_name)
|
| 591 |
-
safe_name = f"audio_{idx:04d}{ext.lower()}"
|
| 592 |
-
dst_path = os.path.join(input_dir, safe_name)
|
| 593 |
-
|
| 594 |
shutil.copy2(src_path, dst_path)
|
| 595 |
-
|
| 596 |
-
# 验证复制结果
|
| 597 |
-
if os.path.getsize(dst_path) == src_size:
|
| 598 |
-
copied_count += 1
|
| 599 |
-
log(f" {original_name} ({src_size} bytes) -> {safe_name}")
|
| 600 |
-
else:
|
| 601 |
-
copy_errors.append(f"{original_name}: 复制不完整")
|
| 602 |
except Exception as e:
|
| 603 |
-
|
| 604 |
-
|
| 605 |
-
if copy_errors:
|
| 606 |
-
for err in copy_errors:
|
| 607 |
-
log(f"⚠️ {err}")
|
| 608 |
|
| 609 |
if copied_count == 0:
|
| 610 |
-
decrement_concurrency()
|
| 611 |
return "❌ 无法访问上传的文件,请重新上传", "\n".join(logs), None, None
|
| 612 |
|
| 613 |
log(f"📋 已复制 {copied_count}/{len(file_paths)} 个文件到工作目录")
|
|
@@ -826,12 +597,6 @@ def process_export_voicebank(
|
|
| 826 |
logs.append(msg)
|
| 827 |
logger.info(msg)
|
| 828 |
|
| 829 |
-
# 检查磁盘空间
|
| 830 |
-
space_ok, space_msg = check_disk_space(min_mb=100)
|
| 831 |
-
if not space_ok:
|
| 832 |
-
decrement_concurrency()
|
| 833 |
-
return f"❌ {space_msg}", "", None
|
| 834 |
-
|
| 835 |
# 验证输入
|
| 836 |
valid, msg, source_name = validate_voicebank_zip(zip_file)
|
| 837 |
if not valid:
|
|
@@ -1054,12 +819,6 @@ def process_mfa_realign(
|
|
| 1054 |
logs.append(msg)
|
| 1055 |
logger.info(msg)
|
| 1056 |
|
| 1057 |
-
# 检查磁盘空间
|
| 1058 |
-
space_ok, space_msg = check_disk_space(min_mb=100)
|
| 1059 |
-
if not space_ok:
|
| 1060 |
-
decrement_concurrency()
|
| 1061 |
-
return f"❌ {space_msg}", "", None
|
| 1062 |
-
|
| 1063 |
# 验证输入
|
| 1064 |
if not zip_file:
|
| 1065 |
decrement_concurrency()
|
|
@@ -1949,14 +1708,6 @@ def create_cloud_ui():
|
|
| 1949 |
|
| 1950 |
def main():
|
| 1951 |
"""云端入口"""
|
| 1952 |
-
# 启动时执行一次清理
|
| 1953 |
-
logger.info("启动时执行缓存清理...")
|
| 1954 |
-
cleanup_gradio_cache(max_age_hours=0.5) # 清理超过30分钟的缓存
|
| 1955 |
-
cleanup_old_jinriki_workspaces(max_age_hours=1.0) # 清理超过1小时的工作空间
|
| 1956 |
-
|
| 1957 |
-
# 启动定期清理任务
|
| 1958 |
-
start_periodic_cleanup(interval_minutes=30)
|
| 1959 |
-
|
| 1960 |
app = create_cloud_ui()
|
| 1961 |
# 启用队列,魔搭CPU按需分配,无需设置并发上限
|
| 1962 |
app.queue()
|
|
|
|
| 60 |
Gradio 处理函数的安全包装器
|
| 61 |
|
| 62 |
捕获所有异常并返回友好的错误信息,避免 Gradio 显示默认的"错误"状态
|
|
|
|
| 63 |
"""
|
| 64 |
import functools
|
| 65 |
import traceback
|
| 66 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 67 |
@functools.wraps(func)
|
| 68 |
def wrapper(*args, **kwargs):
|
| 69 |
try:
|
|
|
|
| 73 |
error_trace = traceback.format_exc()
|
| 74 |
logger.error(f"处理函数 {func.__name__} 发生异常:\n{error_trace}")
|
| 75 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 76 |
# 根据函数返回值数量返回错误信息
|
| 77 |
+
# 检查函数的类型注解来确定返回值数量
|
| 78 |
+
annotations = getattr(func, '__annotations__', {})
|
| 79 |
+
return_type = annotations.get('return', None)
|
| 80 |
+
|
| 81 |
error_msg = f"❌ 系统错误: {str(e)}"
|
| 82 |
error_detail = f"异常类型: {type(e).__name__}\n详情: {str(e)}"
|
| 83 |
|
|
|
|
| 131 |
return workspace
|
| 132 |
|
| 133 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 134 |
def cleanup_workspace(workspace: str):
|
| 135 |
"""清理工作空间"""
|
| 136 |
if workspace and os.path.exists(workspace):
|
|
|
|
| 201 |
|
| 202 |
返回: 时长秒数,失败返回 None
|
| 203 |
"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 204 |
try:
|
| 205 |
import wave
|
| 206 |
import contextlib
|
| 207 |
|
| 208 |
+
# 对于 WAV 文件,使用 wave 模块快速获取时长
|
| 209 |
if file_path.lower().endswith('.wav'):
|
| 210 |
with contextlib.closing(wave.open(file_path, 'r')) as f:
|
| 211 |
frames = f.getnframes()
|
| 212 |
rate = f.getframerate()
|
| 213 |
return frames / float(rate)
|
| 214 |
+
|
| 215 |
+
# 对于其他格式,使用 pydub(如果可用)
|
| 216 |
+
try:
|
| 217 |
+
from pydub import AudioSegment
|
| 218 |
+
audio = AudioSegment.from_file(file_path)
|
| 219 |
+
return len(audio) / 1000.0 # 毫秒转秒
|
| 220 |
+
except ImportError:
|
| 221 |
+
# pydub 不可用,尝试使用 librosa
|
| 222 |
+
try:
|
| 223 |
+
import librosa
|
| 224 |
+
duration = librosa.get_duration(path=file_path)
|
| 225 |
+
return duration
|
| 226 |
+
except ImportError:
|
| 227 |
+
logger.warning(f"无法获取音频时长,缺少 pydub 或 librosa: {file_path}")
|
| 228 |
+
return None
|
| 229 |
except Exception as e:
|
| 230 |
+
logger.warning(f"获取音频时长失败 {file_path}: {e}")
|
| 231 |
+
return None
|
|
|
|
|
|
|
| 232 |
|
| 233 |
|
| 234 |
# 云端音频时长限制(秒)
|
|
|
|
| 244 |
if not files:
|
| 245 |
return False, "请上传音频文件", []
|
| 246 |
|
|
|
|
|
|
|
|
|
|
| 247 |
valid_files = []
|
| 248 |
+
for f in files:
|
| 249 |
+
if hasattr(f, 'name'):
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 250 |
path = f.name
|
| 251 |
else:
|
| 252 |
path = str(f)
|
| 253 |
|
| 254 |
if path.lower().endswith(CloudConfig.AUDIO_EXTENSIONS):
|
| 255 |
valid_files.append(path)
|
|
|
|
| 256 |
|
| 257 |
if not valid_files:
|
| 258 |
return False, f"未找到有效音频文件,支持格式: {', '.join(CloudConfig.AUDIO_EXTENSIONS)}", []
|
|
|
|
| 321 |
logs.append(msg)
|
| 322 |
logger.info(msg)
|
| 323 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 324 |
try:
|
| 325 |
# 导入依赖(放在 try 块内以捕获导入错误)
|
| 326 |
from src.pipeline import PipelineConfig, VoiceBankPipeline
|
|
|
|
| 363 |
os.makedirs(input_dir, exist_ok=True)
|
| 364 |
os.makedirs(bank_dir, exist_ok=True)
|
| 365 |
|
| 366 |
+
# 复制音频文件到输入目录
|
| 367 |
progress(0.05, desc="复制音频文件...")
|
| 368 |
copied_count = 0
|
| 369 |
+
for src_path in file_paths:
|
| 370 |
+
# 检查源文件是否存在
|
|
|
|
|
|
|
|
|
|
|
|
|
| 371 |
if not os.path.exists(src_path):
|
| 372 |
+
log(f"⚠️ 文件不存在或已被清理: {src_path}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 373 |
continue
|
|
|
|
| 374 |
try:
|
| 375 |
+
dst_path = os.path.join(input_dir, os.path.basename(src_path))
|
|
|
|
|
|
|
|
|
|
|
|
|
| 376 |
shutil.copy2(src_path, dst_path)
|
| 377 |
+
copied_count += 1
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 378 |
except Exception as e:
|
| 379 |
+
log(f"⚠️ 复制文件失败 {os.path.basename(src_path)}: {e}")
|
|
|
|
|
|
|
|
|
|
|
|
|
| 380 |
|
| 381 |
if copied_count == 0:
|
|
|
|
| 382 |
return "❌ 无法访问上传的文件,请重新上传", "\n".join(logs), None, None
|
| 383 |
|
| 384 |
log(f"📋 已复制 {copied_count}/{len(file_paths)} 个文件到工作目录")
|
|
|
|
| 597 |
logs.append(msg)
|
| 598 |
logger.info(msg)
|
| 599 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 600 |
# 验证输入
|
| 601 |
valid, msg, source_name = validate_voicebank_zip(zip_file)
|
| 602 |
if not valid:
|
|
|
|
| 819 |
logs.append(msg)
|
| 820 |
logger.info(msg)
|
| 821 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 822 |
# 验证输入
|
| 823 |
if not zip_file:
|
| 824 |
decrement_concurrency()
|
|
|
|
| 1708 |
|
| 1709 |
def main():
|
| 1710 |
"""云端入口"""
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1711 |
app = create_cloud_ui()
|
| 1712 |
# 启用队列,魔搭CPU按需分配,无需设置并发上限
|
| 1713 |
app.queue()
|
src/mfa_model_downloader.py
CHANGED
|
@@ -3,7 +3,6 @@
|
|
| 3 |
MFA 模型下载模块
|
| 4 |
支持下载中文和日文的声学模型及字典
|
| 5 |
包含 SHA256 哈希校验,确保文件完整性
|
| 6 |
-
支持 GitHub 代理镜像(云端环境)
|
| 7 |
"""
|
| 8 |
|
| 9 |
import os
|
|
@@ -12,7 +11,7 @@ import logging
|
|
| 12 |
import urllib.request
|
| 13 |
import urllib.error
|
| 14 |
from pathlib import Path
|
| 15 |
-
from typing import Optional, Callable
|
| 16 |
|
| 17 |
logger = logging.getLogger(__name__)
|
| 18 |
|
|
@@ -20,14 +19,6 @@ logger = logging.getLogger(__name__)
|
|
| 20 |
GITHUB_RELEASE_BASE = "https://github.com/MontrealCorpusTools/mfa-models/releases/download"
|
| 21 |
GITHUB_RAW_BASE = "https://raw.githubusercontent.com/MontrealCorpusTools/mfa-models/main"
|
| 22 |
|
| 23 |
-
# GitHub 代理镜像列表(云端环境使用)
|
| 24 |
-
# 格式: 代理前缀 + 原始 GitHub URL
|
| 25 |
-
GITHUB_PROXIES = [
|
| 26 |
-
"https://ghfast.top/",
|
| 27 |
-
"https://gh-proxy.com/",
|
| 28 |
-
"", # 最后尝试直连
|
| 29 |
-
]
|
| 30 |
-
|
| 31 |
# 支持的语言配置
|
| 32 |
# 格式: {语言代码: {名称, 声学模型信息, 字典信息}}
|
| 33 |
# sha256: 官方文件的 SHA256 哈希值(清理空行后),用于校验文件完整性
|
|
@@ -168,36 +159,16 @@ def _verify_file_integrity(
|
|
| 168 |
return True, "文件完整"
|
| 169 |
|
| 170 |
|
| 171 |
-
def _is_cloud_environment() -> bool:
|
| 172 |
-
"""检测是否在云端环境运行"""
|
| 173 |
-
return any([
|
| 174 |
-
os.environ.get("SPACE_ID"), # Hugging Face Spaces
|
| 175 |
-
os.environ.get("MODELSCOPE_SPACE"), # 魔塔社区
|
| 176 |
-
os.environ.get("GRADIO_SERVER_NAME"), # 通用 Gradio 云端
|
| 177 |
-
Path("/home/studio_service").exists(), # 魔搭创空间特征目录
|
| 178 |
-
])
|
| 179 |
-
|
| 180 |
-
|
| 181 |
-
def _get_proxy_urls(original_url: str) -> List[str]:
|
| 182 |
-
"""
|
| 183 |
-
获取带代理的 URL 列表
|
| 184 |
-
云端环境返回多个代理 URL,本地环境只返回原始 URL
|
| 185 |
-
"""
|
| 186 |
-
if _is_cloud_environment():
|
| 187 |
-
return [f"{proxy}{original_url}" for proxy in GITHUB_PROXIES]
|
| 188 |
-
return [original_url]
|
| 189 |
-
|
| 190 |
-
|
| 191 |
def _download_file(
|
| 192 |
url: str,
|
| 193 |
dest_path: str,
|
| 194 |
progress_callback: Optional[Callable[[str], None]] = None
|
| 195 |
) -> bool:
|
| 196 |
"""
|
| 197 |
-
下载文件
|
| 198 |
|
| 199 |
参数:
|
| 200 |
-
url: 下载地址
|
| 201 |
dest_path: 保存路径
|
| 202 |
progress_callback: 进度回调
|
| 203 |
|
|
@@ -209,73 +180,65 @@ def _download_file(
|
|
| 209 |
if progress_callback:
|
| 210 |
progress_callback(msg)
|
| 211 |
|
| 212 |
-
|
| 213 |
-
|
| 214 |
-
|
| 215 |
-
|
| 216 |
-
|
| 217 |
-
|
| 218 |
-
|
| 219 |
-
|
| 220 |
-
|
| 221 |
-
|
| 222 |
-
|
| 223 |
-
|
| 224 |
-
|
| 225 |
-
|
| 226 |
-
|
| 227 |
-
|
| 228 |
-
|
| 229 |
-
total_size = response.headers.get("Content-Length")
|
| 230 |
-
if total_size:
|
| 231 |
-
total_size = int(total_size)
|
| 232 |
-
log(f"文件大小: {total_size / 1024 / 1024:.1f} MB")
|
| 233 |
-
|
| 234 |
-
# 分块下载
|
| 235 |
-
block_size = 8192
|
| 236 |
-
downloaded = 0
|
| 237 |
-
|
| 238 |
-
with open(temp_path, "wb") as f:
|
| 239 |
-
while True:
|
| 240 |
-
chunk = response.read(block_size)
|
| 241 |
-
if not chunk:
|
| 242 |
-
break
|
| 243 |
-
f.write(chunk)
|
| 244 |
-
downloaded += len(chunk)
|
| 245 |
-
|
| 246 |
-
if total_size and downloaded % (block_size * 100) == 0:
|
| 247 |
-
percent = downloaded / total_size * 100
|
| 248 |
-
log(f"下载进度: {percent:.1f}%")
|
| 249 |
-
|
| 250 |
-
# 下载完成,重命名
|
| 251 |
-
if os.path.exists(dest_path):
|
| 252 |
-
os.remove(dest_path)
|
| 253 |
-
os.rename(temp_path, dest_path)
|
| 254 |
|
| 255 |
-
|
| 256 |
-
|
|
|
|
| 257 |
|
| 258 |
-
|
| 259 |
-
|
| 260 |
-
|
| 261 |
-
|
| 262 |
-
|
| 263 |
-
|
| 264 |
-
|
| 265 |
-
|
| 266 |
-
|
| 267 |
-
|
| 268 |
-
|
| 269 |
-
os.remove(temp_path)
|
| 270 |
-
except:
|
| 271 |
-
pass
|
| 272 |
|
| 273 |
-
#
|
| 274 |
-
if
|
| 275 |
-
|
| 276 |
-
|
| 277 |
-
|
| 278 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 279 |
|
| 280 |
|
| 281 |
|
|
|
|
| 3 |
MFA 模型下载模块
|
| 4 |
支持下载中文和日文的声学模型及字典
|
| 5 |
包含 SHA256 哈希校验,确保文件完整性
|
|
|
|
| 6 |
"""
|
| 7 |
|
| 8 |
import os
|
|
|
|
| 11 |
import urllib.request
|
| 12 |
import urllib.error
|
| 13 |
from pathlib import Path
|
| 14 |
+
from typing import Optional, Callable
|
| 15 |
|
| 16 |
logger = logging.getLogger(__name__)
|
| 17 |
|
|
|
|
| 19 |
GITHUB_RELEASE_BASE = "https://github.com/MontrealCorpusTools/mfa-models/releases/download"
|
| 20 |
GITHUB_RAW_BASE = "https://raw.githubusercontent.com/MontrealCorpusTools/mfa-models/main"
|
| 21 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 22 |
# 支持的语言配置
|
| 23 |
# 格式: {语言代码: {名称, 声学模型信息, 字典信息}}
|
| 24 |
# sha256: 官方文件的 SHA256 哈希值(清理空行后),用于校验文件完整性
|
|
|
|
| 159 |
return True, "文件完整"
|
| 160 |
|
| 161 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 162 |
def _download_file(
|
| 163 |
url: str,
|
| 164 |
dest_path: str,
|
| 165 |
progress_callback: Optional[Callable[[str], None]] = None
|
| 166 |
) -> bool:
|
| 167 |
"""
|
| 168 |
+
下载文件
|
| 169 |
|
| 170 |
参数:
|
| 171 |
+
url: 下载地址
|
| 172 |
dest_path: 保存路径
|
| 173 |
progress_callback: 进度回调
|
| 174 |
|
|
|
|
| 180 |
if progress_callback:
|
| 181 |
progress_callback(msg)
|
| 182 |
|
| 183 |
+
try:
|
| 184 |
+
log(f"正在下载: {url}")
|
| 185 |
+
|
| 186 |
+
# 创建目录
|
| 187 |
+
os.makedirs(os.path.dirname(dest_path), exist_ok=True)
|
| 188 |
+
|
| 189 |
+
# 下载到临时文件,完成后再重命名
|
| 190 |
+
temp_path = dest_path + ".downloading"
|
| 191 |
+
|
| 192 |
+
# 下载文件
|
| 193 |
+
req = urllib.request.Request(url, headers={"User-Agent": "Mozilla/5.0"})
|
| 194 |
+
|
| 195 |
+
with urllib.request.urlopen(req, timeout=120) as response:
|
| 196 |
+
total_size = response.headers.get("Content-Length")
|
| 197 |
+
if total_size:
|
| 198 |
+
total_size = int(total_size)
|
| 199 |
+
log(f"文件大小: {total_size / 1024 / 1024:.1f} MB")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 200 |
|
| 201 |
+
# 分块下载
|
| 202 |
+
block_size = 8192
|
| 203 |
+
downloaded = 0
|
| 204 |
|
| 205 |
+
with open(temp_path, "wb") as f:
|
| 206 |
+
while True:
|
| 207 |
+
chunk = response.read(block_size)
|
| 208 |
+
if not chunk:
|
| 209 |
+
break
|
| 210 |
+
f.write(chunk)
|
| 211 |
+
downloaded += len(chunk)
|
| 212 |
+
|
| 213 |
+
if total_size and downloaded % (block_size * 100) == 0:
|
| 214 |
+
percent = downloaded / total_size * 100
|
| 215 |
+
log(f"下载进度: {percent:.1f}%")
|
|
|
|
|
|
|
|
|
|
| 216 |
|
| 217 |
+
# 下载完成,重命名
|
| 218 |
+
if os.path.exists(dest_path):
|
| 219 |
+
os.remove(dest_path)
|
| 220 |
+
os.rename(temp_path, dest_path)
|
| 221 |
+
|
| 222 |
+
log(f"下载完成: {dest_path}")
|
| 223 |
+
return True
|
| 224 |
+
|
| 225 |
+
except urllib.error.HTTPError as e:
|
| 226 |
+
log(f"HTTP 错误: {e.code} - {e.reason}")
|
| 227 |
+
return False
|
| 228 |
+
except urllib.error.URLError as e:
|
| 229 |
+
log(f"网络错误: {e.reason}")
|
| 230 |
+
return False
|
| 231 |
+
except Exception as e:
|
| 232 |
+
log(f"下载失败: {e}")
|
| 233 |
+
return False
|
| 234 |
+
finally:
|
| 235 |
+
# 清理临时文件
|
| 236 |
+
temp_path = dest_path + ".downloading"
|
| 237 |
+
if os.path.exists(temp_path):
|
| 238 |
+
try:
|
| 239 |
+
os.remove(temp_path)
|
| 240 |
+
except:
|
| 241 |
+
pass
|
| 242 |
|
| 243 |
|
| 244 |
|
src/mfa_runner.py
CHANGED
|
@@ -102,18 +102,6 @@ def _build_mfa_env(mfa_root: Optional[Path] = None) -> dict:
|
|
| 102 |
str(MFA_ENGINE_DIR / "bin"),
|
| 103 |
]
|
| 104 |
env["PATH"] = ";".join(mfa_paths) + ";" + env.get("PATH", "")
|
| 105 |
-
|
| 106 |
-
# Windows: 设置 MFA_ROOT_DIR 到纯 ASCII 路径
|
| 107 |
-
# 解决用户名包含中文时 OpenFST 无法写入文件的问题
|
| 108 |
-
# 优先使用项目目录下的 mfa_temp,确保路径为纯 ASCII
|
| 109 |
-
if mfa_root:
|
| 110 |
-
env["MFA_ROOT_DIR"] = str(mfa_root)
|
| 111 |
-
else:
|
| 112 |
-
# 默认使用项目目录下的 mfa_data 作为 MFA 数据目录
|
| 113 |
-
mfa_data_dir = BASE_DIR / "mfa_data"
|
| 114 |
-
mfa_data_dir.mkdir(parents=True, exist_ok=True)
|
| 115 |
-
env["MFA_ROOT_DIR"] = str(mfa_data_dir)
|
| 116 |
-
logger.info(f"设置 MFA_ROOT_DIR: {env['MFA_ROOT_DIR']}")
|
| 117 |
else:
|
| 118 |
# Linux: 设置会话独立的 MFA_ROOT_DIR(解决并发数据库冲突)
|
| 119 |
if mfa_root:
|
|
|
|
| 102 |
str(MFA_ENGINE_DIR / "bin"),
|
| 103 |
]
|
| 104 |
env["PATH"] = ";".join(mfa_paths) + ";" + env.get("PATH", "")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 105 |
else:
|
| 106 |
# Linux: 设置会话独立的 MFA_ROOT_DIR(解决并发数据库冲突)
|
| 107 |
if mfa_root:
|
src/pipeline.py
CHANGED
|
@@ -346,24 +346,12 @@ class VoiceBankPipeline:
|
|
| 346 |
import subprocess
|
| 347 |
import numpy as np
|
| 348 |
|
| 349 |
-
# 确保路径是绝对路径,避免编码问题
|
| 350 |
-
audio_path = os.path.abspath(audio_path)
|
| 351 |
-
|
| 352 |
-
# 检查文件是否存在
|
| 353 |
-
if not os.path.exists(audio_path):
|
| 354 |
-
raise RuntimeError(f"音频文件不存在: {audio_path}")
|
| 355 |
-
|
| 356 |
# 使用 ffprobe 获取采样率
|
| 357 |
probe_cmd = [
|
| 358 |
'ffprobe', '-v', 'quiet', '-print_format', 'json',
|
| 359 |
'-show_streams', audio_path
|
| 360 |
]
|
| 361 |
-
probe_result = subprocess.run(
|
| 362 |
-
probe_cmd,
|
| 363 |
-
capture_output=True,
|
| 364 |
-
encoding='utf-8',
|
| 365 |
-
errors='replace'
|
| 366 |
-
)
|
| 367 |
|
| 368 |
sr = 44100 # 默认采样率
|
| 369 |
if probe_result.returncode == 0:
|
|
@@ -384,18 +372,13 @@ class VoiceBankPipeline:
|
|
| 384 |
'-acodec', 'pcm_s16le',
|
| 385 |
'-ac', '1', # 单声道
|
| 386 |
'-ar', str(sr), # 保持原采样率
|
| 387 |
-
'-v', '
|
| 388 |
'-'
|
| 389 |
]
|
| 390 |
result = subprocess.run(cmd, capture_output=True)
|
| 391 |
|
| 392 |
if result.returncode != 0:
|
| 393 |
-
|
| 394 |
-
stderr_msg = result.stderr.decode('utf-8', errors='replace').strip()
|
| 395 |
-
raise RuntimeError(f"ffmpeg 读取音频失败: {audio_path}\n错误: {stderr_msg}")
|
| 396 |
-
|
| 397 |
-
if len(result.stdout) == 0:
|
| 398 |
-
raise RuntimeError(f"ffmpeg 输出为空,可能是文件损坏或格式不支持: {audio_path}")
|
| 399 |
|
| 400 |
# 转换为 numpy 数组
|
| 401 |
audio = np.frombuffer(result.stdout, dtype=np.int16).astype(np.float32) / 32768.0
|
|
|
|
| 346 |
import subprocess
|
| 347 |
import numpy as np
|
| 348 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 349 |
# 使用 ffprobe 获取采样率
|
| 350 |
probe_cmd = [
|
| 351 |
'ffprobe', '-v', 'quiet', '-print_format', 'json',
|
| 352 |
'-show_streams', audio_path
|
| 353 |
]
|
| 354 |
+
probe_result = subprocess.run(probe_cmd, capture_output=True, text=True)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 355 |
|
| 356 |
sr = 44100 # 默认采样率
|
| 357 |
if probe_result.returncode == 0:
|
|
|
|
| 372 |
'-acodec', 'pcm_s16le',
|
| 373 |
'-ac', '1', # 单声道
|
| 374 |
'-ar', str(sr), # 保持原采样率
|
| 375 |
+
'-v', 'quiet',
|
| 376 |
'-'
|
| 377 |
]
|
| 378 |
result = subprocess.run(cmd, capture_output=True)
|
| 379 |
|
| 380 |
if result.returncode != 0:
|
| 381 |
+
raise RuntimeError(f"ffmpeg 读取音频失败: {audio_path}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 382 |
|
| 383 |
# 转换为 numpy 数组
|
| 384 |
audio = np.frombuffer(result.stdout, dtype=np.int16).astype(np.float32) / 32768.0
|
tests/.gitkeep
DELETED
|
File without changes
|
tests/test_mfa_model_downloader.py
DELETED
|
@@ -1,182 +0,0 @@
|
|
| 1 |
-
# -*- coding: utf-8 -*-
|
| 2 |
-
"""
|
| 3 |
-
MFA 模型下载模块单元测试
|
| 4 |
-
"""
|
| 5 |
-
|
| 6 |
-
import os
|
| 7 |
-
import sys
|
| 8 |
-
import unittest
|
| 9 |
-
from unittest.mock import patch, MagicMock
|
| 10 |
-
from pathlib import Path
|
| 11 |
-
|
| 12 |
-
# 添加项目根目录到路径
|
| 13 |
-
sys.path.insert(0, str(Path(__file__).parent.parent))
|
| 14 |
-
|
| 15 |
-
from src.mfa_model_downloader import (
|
| 16 |
-
get_available_languages,
|
| 17 |
-
LANGUAGE_MODELS,
|
| 18 |
-
GITHUB_RELEASE_BASE,
|
| 19 |
-
download_acoustic_model,
|
| 20 |
-
download_dictionary,
|
| 21 |
-
download_language_models,
|
| 22 |
-
)
|
| 23 |
-
|
| 24 |
-
|
| 25 |
-
class TestGetAvailableLanguages(unittest.TestCase):
|
| 26 |
-
"""测试获取可用语言列表"""
|
| 27 |
-
|
| 28 |
-
def test_returns_dict(self):
|
| 29 |
-
"""返回值应为字典"""
|
| 30 |
-
result = get_available_languages()
|
| 31 |
-
self.assertIsInstance(result, dict)
|
| 32 |
-
|
| 33 |
-
def test_contains_mandarin(self):
|
| 34 |
-
"""应包含中文"""
|
| 35 |
-
result = get_available_languages()
|
| 36 |
-
self.assertIn("mandarin", result)
|
| 37 |
-
self.assertEqual(result["mandarin"], "中文 (普通话)")
|
| 38 |
-
|
| 39 |
-
def test_contains_japanese(self):
|
| 40 |
-
"""应包含日文"""
|
| 41 |
-
result = get_available_languages()
|
| 42 |
-
self.assertIn("japanese", result)
|
| 43 |
-
self.assertEqual(result["japanese"], "日文")
|
| 44 |
-
|
| 45 |
-
|
| 46 |
-
class TestLanguageModelsConfig(unittest.TestCase):
|
| 47 |
-
"""测试语言模型配置"""
|
| 48 |
-
|
| 49 |
-
def test_mandarin_config_complete(self):
|
| 50 |
-
"""中文配置应完整"""
|
| 51 |
-
config = LANGUAGE_MODELS["mandarin"]
|
| 52 |
-
self.assertIn("name", config)
|
| 53 |
-
self.assertIn("acoustic", config)
|
| 54 |
-
self.assertIn("dictionary", config)
|
| 55 |
-
|
| 56 |
-
# 声学模型配置
|
| 57 |
-
acoustic = config["acoustic"]
|
| 58 |
-
self.assertIn("tag", acoustic)
|
| 59 |
-
self.assertIn("filename", acoustic)
|
| 60 |
-
self.assertTrue(acoustic["filename"].endswith(".zip"))
|
| 61 |
-
|
| 62 |
-
# 字典配置
|
| 63 |
-
dictionary = config["dictionary"]
|
| 64 |
-
self.assertIn("tag", dictionary)
|
| 65 |
-
self.assertIn("filename", dictionary)
|
| 66 |
-
self.assertTrue(dictionary["filename"].endswith(".dict"))
|
| 67 |
-
|
| 68 |
-
def test_japanese_config_complete(self):
|
| 69 |
-
"""日文配置应完整"""
|
| 70 |
-
config = LANGUAGE_MODELS["japanese"]
|
| 71 |
-
self.assertIn("name", config)
|
| 72 |
-
self.assertIn("acoustic", config)
|
| 73 |
-
self.assertIn("dictionary", config)
|
| 74 |
-
|
| 75 |
-
def test_acoustic_url_format(self):
|
| 76 |
-
"""声学模型 URL 格式应正确"""
|
| 77 |
-
for lang, config in LANGUAGE_MODELS.items():
|
| 78 |
-
acoustic = config["acoustic"]
|
| 79 |
-
url = f"{GITHUB_RELEASE_BASE}/{acoustic['tag']}/{acoustic['filename']}"
|
| 80 |
-
self.assertTrue(url.startswith("https://github.com/"))
|
| 81 |
-
self.assertIn("mfa-models", url)
|
| 82 |
-
|
| 83 |
-
def test_dictionary_url_format(self):
|
| 84 |
-
"""字典 URL 格式应正确"""
|
| 85 |
-
for lang, config in LANGUAGE_MODELS.items():
|
| 86 |
-
dictionary = config["dictionary"]
|
| 87 |
-
url = f"{GITHUB_RELEASE_BASE}/{dictionary['tag']}/{dictionary['filename']}"
|
| 88 |
-
self.assertTrue(url.startswith("https://github.com/"))
|
| 89 |
-
self.assertIn("dictionary-", url)
|
| 90 |
-
|
| 91 |
-
|
| 92 |
-
class TestDownloadAcousticModel(unittest.TestCase):
|
| 93 |
-
"""测试声学模型下载"""
|
| 94 |
-
|
| 95 |
-
def test_invalid_language(self):
|
| 96 |
-
"""不支持的语言应返回失败"""
|
| 97 |
-
success, result = download_acoustic_model("invalid_lang", "/tmp")
|
| 98 |
-
self.assertFalse(success)
|
| 99 |
-
self.assertIn("不支持的语言", result)
|
| 100 |
-
|
| 101 |
-
@patch('src.mfa_model_downloader._download_file')
|
| 102 |
-
def test_download_called_with_correct_url(self, mock_download):
|
| 103 |
-
"""应使用正确的 URL 下载"""
|
| 104 |
-
mock_download.return_value = True
|
| 105 |
-
|
| 106 |
-
with patch('os.path.exists', return_value=False):
|
| 107 |
-
download_acoustic_model("mandarin", "/tmp/models")
|
| 108 |
-
|
| 109 |
-
# 验证调用参数
|
| 110 |
-
call_args = mock_download.call_args
|
| 111 |
-
url = call_args[0][0]
|
| 112 |
-
self.assertIn("mandarin_mfa.zip", url)
|
| 113 |
-
self.assertIn("acoustic-mandarin_mfa", url)
|
| 114 |
-
|
| 115 |
-
@patch('os.path.exists')
|
| 116 |
-
def test_skip_if_exists(self, mock_exists):
|
| 117 |
-
"""文件已存在时应跳过下载"""
|
| 118 |
-
mock_exists.return_value = True
|
| 119 |
-
|
| 120 |
-
success, result = download_acoustic_model("mandarin", "/tmp/models")
|
| 121 |
-
self.assertTrue(success)
|
| 122 |
-
self.assertIn("mandarin_mfa.zip", result)
|
| 123 |
-
|
| 124 |
-
|
| 125 |
-
class TestDownloadDictionary(unittest.TestCase):
|
| 126 |
-
"""测试字典下载"""
|
| 127 |
-
|
| 128 |
-
def test_invalid_language(self):
|
| 129 |
-
"""不支持的语言应返回失败"""
|
| 130 |
-
success, result = download_dictionary("invalid_lang", "/tmp")
|
| 131 |
-
self.assertFalse(success)
|
| 132 |
-
self.assertIn("不支持的语言", result)
|
| 133 |
-
|
| 134 |
-
@patch('src.mfa_model_downloader._download_file')
|
| 135 |
-
def test_download_called_with_correct_url(self, mock_download):
|
| 136 |
-
"""应使用正确的 URL 下载"""
|
| 137 |
-
mock_download.return_value = True
|
| 138 |
-
|
| 139 |
-
with patch('os.path.exists', return_value=False):
|
| 140 |
-
download_dictionary("japanese", "/tmp/models")
|
| 141 |
-
|
| 142 |
-
call_args = mock_download.call_args
|
| 143 |
-
url = call_args[0][0]
|
| 144 |
-
self.assertIn("github.com", url)
|
| 145 |
-
self.assertIn("dictionary-japanese", url)
|
| 146 |
-
|
| 147 |
-
|
| 148 |
-
class TestDownloadLanguageModels(unittest.TestCase):
|
| 149 |
-
"""测试完整语言模型下载"""
|
| 150 |
-
|
| 151 |
-
def test_invalid_language(self):
|
| 152 |
-
"""不支持的语言应返回失败"""
|
| 153 |
-
success, acoustic, dict_path = download_language_models("invalid", "/tmp")
|
| 154 |
-
self.assertFalse(success)
|
| 155 |
-
|
| 156 |
-
@patch('src.mfa_model_downloader.download_dictionary')
|
| 157 |
-
@patch('src.mfa_model_downloader.download_acoustic_model')
|
| 158 |
-
def test_downloads_both_models(self, mock_acoustic, mock_dict):
|
| 159 |
-
"""应同时下载声学模型和字典"""
|
| 160 |
-
mock_acoustic.return_value = (True, "/tmp/acoustic.zip")
|
| 161 |
-
mock_dict.return_value = (True, "/tmp/dict.dict")
|
| 162 |
-
|
| 163 |
-
success, acoustic, dict_path = download_language_models("mandarin", "/tmp")
|
| 164 |
-
|
| 165 |
-
self.assertTrue(success)
|
| 166 |
-
mock_acoustic.assert_called_once()
|
| 167 |
-
mock_dict.assert_called_once()
|
| 168 |
-
|
| 169 |
-
@patch('src.mfa_model_downloader.download_dictionary')
|
| 170 |
-
@patch('src.mfa_model_downloader.download_acoustic_model')
|
| 171 |
-
def test_stops_on_acoustic_failure(self, mock_acoustic, mock_dict):
|
| 172 |
-
"""声学模型下载失败时应停止"""
|
| 173 |
-
mock_acoustic.return_value = (False, "下载失败")
|
| 174 |
-
|
| 175 |
-
success, _, _ = download_language_models("mandarin", "/tmp")
|
| 176 |
-
|
| 177 |
-
self.assertFalse(success)
|
| 178 |
-
mock_dict.assert_not_called()
|
| 179 |
-
|
| 180 |
-
|
| 181 |
-
if __name__ == "__main__":
|
| 182 |
-
unittest.main()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
tests/test_mfa_runner.py
DELETED
|
@@ -1,243 +0,0 @@
|
|
| 1 |
-
# -*- coding: utf-8 -*-
|
| 2 |
-
"""
|
| 3 |
-
MFA 运行模块单元测试
|
| 4 |
-
"""
|
| 5 |
-
|
| 6 |
-
import os
|
| 7 |
-
import sys
|
| 8 |
-
import unittest
|
| 9 |
-
from unittest.mock import patch, MagicMock
|
| 10 |
-
from pathlib import Path
|
| 11 |
-
|
| 12 |
-
# 添加项目根目录到路径
|
| 13 |
-
sys.path.insert(0, str(Path(__file__).parent.parent))
|
| 14 |
-
|
| 15 |
-
from src.mfa_runner import (
|
| 16 |
-
check_mfa_available,
|
| 17 |
-
_build_mfa_env,
|
| 18 |
-
run_mfa_alignment,
|
| 19 |
-
run_mfa_validate,
|
| 20 |
-
BASE_DIR,
|
| 21 |
-
MFA_ENGINE_DIR,
|
| 22 |
-
MFA_PYTHON,
|
| 23 |
-
)
|
| 24 |
-
|
| 25 |
-
|
| 26 |
-
class TestCheckMfaAvailable(unittest.TestCase):
|
| 27 |
-
"""测试 MFA 环境检查"""
|
| 28 |
-
|
| 29 |
-
@patch('src.mfa_runner.MFA_ENGINE_DIR')
|
| 30 |
-
def test_returns_false_when_dir_not_exists(self, mock_dir):
|
| 31 |
-
"""目录不存在时应返回 False"""
|
| 32 |
-
mock_path = MagicMock()
|
| 33 |
-
mock_path.exists.return_value = False
|
| 34 |
-
|
| 35 |
-
with patch.object(Path, 'exists', return_value=False):
|
| 36 |
-
# 由于模块级变量,需要重新导入或直接测试逻辑
|
| 37 |
-
pass
|
| 38 |
-
|
| 39 |
-
def test_path_constants_defined(self):
|
| 40 |
-
"""路径常量应正确定义"""
|
| 41 |
-
self.assertIsInstance(BASE_DIR, Path)
|
| 42 |
-
self.assertIsInstance(MFA_ENGINE_DIR, Path)
|
| 43 |
-
self.assertIsInstance(MFA_PYTHON, Path)
|
| 44 |
-
|
| 45 |
-
# 验证路径结构
|
| 46 |
-
self.assertTrue(str(MFA_ENGINE_DIR).endswith("mfa_engine"))
|
| 47 |
-
self.assertTrue(str(MFA_PYTHON).endswith("python.exe"))
|
| 48 |
-
|
| 49 |
-
|
| 50 |
-
class TestBuildMfaEnv(unittest.TestCase):
|
| 51 |
-
"""测试 MFA 环境变量构建"""
|
| 52 |
-
|
| 53 |
-
def test_returns_dict(self):
|
| 54 |
-
"""应返回字典"""
|
| 55 |
-
env = _build_mfa_env()
|
| 56 |
-
self.assertIsInstance(env, dict)
|
| 57 |
-
|
| 58 |
-
def test_path_contains_mfa_dirs(self):
|
| 59 |
-
"""PATH 应包含 MFA 相关目录"""
|
| 60 |
-
env = _build_mfa_env()
|
| 61 |
-
path = env.get("PATH", "")
|
| 62 |
-
|
| 63 |
-
self.assertIn("mfa_engine", path)
|
| 64 |
-
self.assertIn("Library", path)
|
| 65 |
-
|
| 66 |
-
def test_preserves_original_path(self):
|
| 67 |
-
"""应保留原始 PATH"""
|
| 68 |
-
original_path = os.environ.get("PATH", "")
|
| 69 |
-
env = _build_mfa_env()
|
| 70 |
-
|
| 71 |
-
# 原始 PATH 应在新 PATH 中
|
| 72 |
-
self.assertIn(original_path.split(";")[0], env["PATH"])
|
| 73 |
-
|
| 74 |
-
|
| 75 |
-
class TestRunMfaAlignment(unittest.TestCase):
|
| 76 |
-
"""测试 MFA 对齐功能"""
|
| 77 |
-
|
| 78 |
-
@patch('src.mfa_runner.check_mfa_available')
|
| 79 |
-
def test_fails_when_mfa_unavailable(self, mock_check):
|
| 80 |
-
"""MFA 不可用时应返回失败"""
|
| 81 |
-
mock_check.return_value = False
|
| 82 |
-
|
| 83 |
-
success, msg = run_mfa_alignment("/input", "/output")
|
| 84 |
-
|
| 85 |
-
self.assertFalse(success)
|
| 86 |
-
self.assertIn("不可用", msg)
|
| 87 |
-
|
| 88 |
-
@patch('src.mfa_runner.check_mfa_available')
|
| 89 |
-
@patch('os.path.isdir')
|
| 90 |
-
def test_fails_when_corpus_not_exists(self, mock_isdir, mock_check):
|
| 91 |
-
"""输入目录不存在时应返回失败"""
|
| 92 |
-
mock_check.return_value = True
|
| 93 |
-
mock_isdir.return_value = False
|
| 94 |
-
|
| 95 |
-
success, msg = run_mfa_alignment("/nonexistent", "/output")
|
| 96 |
-
|
| 97 |
-
self.assertFalse(success)
|
| 98 |
-
self.assertIn("不存在", msg)
|
| 99 |
-
|
| 100 |
-
@patch('src.mfa_runner.check_mfa_available')
|
| 101 |
-
@patch('os.path.isdir')
|
| 102 |
-
@patch('os.path.isfile')
|
| 103 |
-
def test_fails_when_dict_not_exists(self, mock_isfile, mock_isdir, mock_check):
|
| 104 |
-
"""字典文件不存在时应返回失败"""
|
| 105 |
-
mock_check.return_value = True
|
| 106 |
-
mock_isdir.return_value = True
|
| 107 |
-
mock_isfile.return_value = False
|
| 108 |
-
|
| 109 |
-
success, msg = run_mfa_alignment(
|
| 110 |
-
"/input", "/output",
|
| 111 |
-
dict_path="/nonexistent.dict"
|
| 112 |
-
)
|
| 113 |
-
|
| 114 |
-
self.assertFalse(success)
|
| 115 |
-
self.assertIn("不存在", msg)
|
| 116 |
-
|
| 117 |
-
@patch('src.mfa_runner.check_mfa_available')
|
| 118 |
-
@patch('os.path.isdir')
|
| 119 |
-
@patch('os.path.isfile')
|
| 120 |
-
@patch('os.makedirs')
|
| 121 |
-
@patch('subprocess.run')
|
| 122 |
-
def test_calls_subprocess_with_correct_args(
|
| 123 |
-
self, mock_run, mock_makedirs, mock_isfile, mock_isdir, mock_check
|
| 124 |
-
):
|
| 125 |
-
"""应使用正确的参数调用 subprocess"""
|
| 126 |
-
mock_check.return_value = True
|
| 127 |
-
mock_isdir.return_value = True
|
| 128 |
-
mock_isfile.return_value = True
|
| 129 |
-
mock_run.return_value = MagicMock(returncode=0, stdout="", stderr="")
|
| 130 |
-
|
| 131 |
-
run_mfa_alignment(
|
| 132 |
-
"/input", "/output",
|
| 133 |
-
dict_path="/dict.dict",
|
| 134 |
-
model_path="/model.zip",
|
| 135 |
-
single_speaker=True,
|
| 136 |
-
clean=True
|
| 137 |
-
)
|
| 138 |
-
|
| 139 |
-
# 验证 subprocess.run 被调用
|
| 140 |
-
mock_run.assert_called_once()
|
| 141 |
-
|
| 142 |
-
# 验证命令参数
|
| 143 |
-
call_args = mock_run.call_args
|
| 144 |
-
cmd = call_args[0][0]
|
| 145 |
-
|
| 146 |
-
self.assertIn("align", cmd)
|
| 147 |
-
self.assertIn("/input", cmd)
|
| 148 |
-
self.assertIn("/dict.dict", cmd)
|
| 149 |
-
self.assertIn("/model.zip", cmd)
|
| 150 |
-
self.assertIn("/output", cmd)
|
| 151 |
-
self.assertIn("--single_speaker", cmd)
|
| 152 |
-
self.assertIn("--clean", cmd)
|
| 153 |
-
|
| 154 |
-
@patch('src.mfa_runner.check_mfa_available')
|
| 155 |
-
@patch('os.path.isdir')
|
| 156 |
-
@patch('os.path.isfile')
|
| 157 |
-
@patch('os.makedirs')
|
| 158 |
-
@patch('subprocess.run')
|
| 159 |
-
def test_returns_success_on_zero_returncode(
|
| 160 |
-
self, mock_run, mock_makedirs, mock_isfile, mock_isdir, mock_check
|
| 161 |
-
):
|
| 162 |
-
"""返回码为 0 时应返回成功"""
|
| 163 |
-
mock_check.return_value = True
|
| 164 |
-
mock_isdir.return_value = True
|
| 165 |
-
mock_isfile.return_value = True
|
| 166 |
-
mock_run.return_value = MagicMock(returncode=0, stdout="完成", stderr="")
|
| 167 |
-
|
| 168 |
-
success, msg = run_mfa_alignment(
|
| 169 |
-
"/input", "/output",
|
| 170 |
-
dict_path="/dict.dict",
|
| 171 |
-
model_path="/model.zip"
|
| 172 |
-
)
|
| 173 |
-
|
| 174 |
-
self.assertTrue(success)
|
| 175 |
-
|
| 176 |
-
@patch('src.mfa_runner.check_mfa_available')
|
| 177 |
-
@patch('os.path.isdir')
|
| 178 |
-
@patch('os.path.isfile')
|
| 179 |
-
@patch('os.makedirs')
|
| 180 |
-
@patch('subprocess.run')
|
| 181 |
-
def test_returns_failure_on_nonzero_returncode(
|
| 182 |
-
self, mock_run, mock_makedirs, mock_isfile, mock_isdir, mock_check
|
| 183 |
-
):
|
| 184 |
-
"""返回码非 0 时应返回失败"""
|
| 185 |
-
mock_check.return_value = True
|
| 186 |
-
mock_isdir.return_value = True
|
| 187 |
-
mock_isfile.return_value = True
|
| 188 |
-
mock_run.return_value = MagicMock(returncode=1, stdout="", stderr="错误")
|
| 189 |
-
|
| 190 |
-
success, msg = run_mfa_alignment(
|
| 191 |
-
"/input", "/output",
|
| 192 |
-
dict_path="/dict.dict",
|
| 193 |
-
model_path="/model.zip"
|
| 194 |
-
)
|
| 195 |
-
|
| 196 |
-
self.assertFalse(success)
|
| 197 |
-
|
| 198 |
-
|
| 199 |
-
class TestRunMfaValidate(unittest.TestCase):
|
| 200 |
-
"""测试 MFA 验证功能"""
|
| 201 |
-
|
| 202 |
-
@patch('src.mfa_runner.check_mfa_available')
|
| 203 |
-
def test_fails_when_mfa_unavailable(self, mock_check):
|
| 204 |
-
"""MFA 不可用时应返回失败"""
|
| 205 |
-
mock_check.return_value = False
|
| 206 |
-
|
| 207 |
-
success, msg = run_mfa_validate("/corpus")
|
| 208 |
-
|
| 209 |
-
self.assertFalse(success)
|
| 210 |
-
self.assertIn("不可用", msg)
|
| 211 |
-
|
| 212 |
-
|
| 213 |
-
class TestProgressCallback(unittest.TestCase):
|
| 214 |
-
"""测试进度回调"""
|
| 215 |
-
|
| 216 |
-
@patch('src.mfa_runner.check_mfa_available')
|
| 217 |
-
@patch('os.path.isdir')
|
| 218 |
-
@patch('os.path.isfile')
|
| 219 |
-
@patch('os.makedirs')
|
| 220 |
-
@patch('subprocess.run')
|
| 221 |
-
def test_callback_called_on_success(
|
| 222 |
-
self, mock_run, mock_makedirs, mock_isfile, mock_isdir, mock_check
|
| 223 |
-
):
|
| 224 |
-
"""成功时应调用回调"""
|
| 225 |
-
mock_check.return_value = True
|
| 226 |
-
mock_isdir.return_value = True
|
| 227 |
-
mock_isfile.return_value = True
|
| 228 |
-
mock_run.return_value = MagicMock(returncode=0, stdout="完成", stderr="")
|
| 229 |
-
callback = MagicMock()
|
| 230 |
-
|
| 231 |
-
run_mfa_alignment(
|
| 232 |
-
"/input", "/output",
|
| 233 |
-
dict_path="/dict.dict",
|
| 234 |
-
model_path="/model.zip",
|
| 235 |
-
progress_callback=callback
|
| 236 |
-
)
|
| 237 |
-
|
| 238 |
-
# 回调应被调用(至少一次)
|
| 239 |
-
self.assertTrue(callback.called)
|
| 240 |
-
|
| 241 |
-
|
| 242 |
-
if __name__ == "__main__":
|
| 243 |
-
unittest.main()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
tests/test_silero_vad_downloader.py
DELETED
|
@@ -1,65 +0,0 @@
|
|
| 1 |
-
# -*- coding: utf-8 -*-
|
| 2 |
-
"""
|
| 3 |
-
Silero VAD 下载模块测试
|
| 4 |
-
"""
|
| 5 |
-
|
| 6 |
-
import os
|
| 7 |
-
import tempfile
|
| 8 |
-
import unittest
|
| 9 |
-
from unittest.mock import patch, MagicMock
|
| 10 |
-
|
| 11 |
-
from src.silero_vad_downloader import (
|
| 12 |
-
get_vad_model_path,
|
| 13 |
-
is_vad_model_downloaded,
|
| 14 |
-
download_silero_vad,
|
| 15 |
-
ensure_vad_model,
|
| 16 |
-
SILERO_VAD_CONFIG
|
| 17 |
-
)
|
| 18 |
-
|
| 19 |
-
|
| 20 |
-
class TestSileroVadDownloader(unittest.TestCase):
|
| 21 |
-
"""Silero VAD 下载器测试类"""
|
| 22 |
-
|
| 23 |
-
def test_get_vad_model_path(self):
|
| 24 |
-
"""测试获取模型路径"""
|
| 25 |
-
models_dir = "/test/models"
|
| 26 |
-
expected = os.path.join(models_dir, "silero_vad", "silero_vad.onnx")
|
| 27 |
-
self.assertEqual(get_vad_model_path(models_dir), expected)
|
| 28 |
-
|
| 29 |
-
def test_is_vad_model_downloaded_false(self):
|
| 30 |
-
"""测试模型未下载时返回 False"""
|
| 31 |
-
with tempfile.TemporaryDirectory() as tmpdir:
|
| 32 |
-
self.assertFalse(is_vad_model_downloaded(tmpdir))
|
| 33 |
-
|
| 34 |
-
def test_is_vad_model_downloaded_true(self):
|
| 35 |
-
"""测试模型已下载时返回 True"""
|
| 36 |
-
with tempfile.TemporaryDirectory() as tmpdir:
|
| 37 |
-
vad_dir = os.path.join(tmpdir, "silero_vad")
|
| 38 |
-
os.makedirs(vad_dir)
|
| 39 |
-
model_path = os.path.join(vad_dir, "silero_vad.onnx")
|
| 40 |
-
with open(model_path, "w") as f:
|
| 41 |
-
f.write("dummy")
|
| 42 |
-
self.assertTrue(is_vad_model_downloaded(tmpdir))
|
| 43 |
-
|
| 44 |
-
def test_download_silero_vad_already_exists(self):
|
| 45 |
-
"""测试模型已存在时跳过下载"""
|
| 46 |
-
with tempfile.TemporaryDirectory() as tmpdir:
|
| 47 |
-
vad_dir = os.path.join(tmpdir, "silero_vad")
|
| 48 |
-
os.makedirs(vad_dir)
|
| 49 |
-
model_path = os.path.join(vad_dir, "silero_vad.onnx")
|
| 50 |
-
with open(model_path, "w") as f:
|
| 51 |
-
f.write("dummy")
|
| 52 |
-
|
| 53 |
-
success, result = download_silero_vad(tmpdir)
|
| 54 |
-
self.assertTrue(success)
|
| 55 |
-
self.assertEqual(result, model_path)
|
| 56 |
-
|
| 57 |
-
def test_config_values(self):
|
| 58 |
-
"""测试配置值正确性"""
|
| 59 |
-
self.assertEqual(SILERO_VAD_CONFIG["onnx_filename"], "silero_vad.onnx")
|
| 60 |
-
self.assertEqual(SILERO_VAD_CONFIG["jit_filename"], "silero_vad.jit")
|
| 61 |
-
self.assertIn("snakers4/silero-vad", SILERO_VAD_CONFIG["repo"])
|
| 62 |
-
|
| 63 |
-
|
| 64 |
-
if __name__ == "__main__":
|
| 65 |
-
unittest.main()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
目前便携版仅支持Windows!.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
没有条件可以使用:https://www.modelscope.cn/studios/TNOTqwq/JinrikiHelper/
|