JinrikiHelper / src /textgrid2bank.py
TNOT's picture
完成了单音频或选择文件夹 → Silero VAD 切片 → Whisper 转录 → 生成 .lab → MFA 强制对齐 → 简单导出,已可用
9361148
# -*- coding: utf-8 -*-
"""
TextGrid 转音频库模块
从 TextGrid 对齐结果中提取分词片段
"""
import os
import glob
import logging
from typing import Optional, Callable, Dict, Tuple
logger = logging.getLogger(__name__)
def textgrid_to_bank(
wav_dir: str,
textgrid_dir: str,
output_dir: str,
progress_callback: Optional[Callable[[str], None]] = None
) -> Tuple[bool, str, Dict[str, int]]:
"""
将 TextGrid 对齐结果转换为音频库
参数:
wav_dir: WAV文件目录
textgrid_dir: TextGrid文件目录
output_dir: 输出目录
progress_callback: 进度回调函数
返回:
(成功标志, 消息, 词条统计)
"""
import textgrid
import audiofile
def log(msg: str):
logger.info(msg)
if progress_callback:
progress_callback(msg)
try:
os.makedirs(output_dir, exist_ok=True)
tg_files = glob.glob(os.path.join(textgrid_dir, '*.TextGrid'))
if not tg_files:
return False, "未找到TextGrid文件", {}
log(f"处理 {len(tg_files)} 个TextGrid文件")
word_counts = {}
for idx, tg_path in enumerate(tg_files):
basename = os.path.basename(tg_path).replace('.TextGrid', '.wav')
wav_path = os.path.join(wav_dir, basename)
if not os.path.exists(wav_path):
log(f"警告: 找不到 {basename}")
continue
tg = textgrid.TextGrid.fromFile(tg_path)
audio, sr = audiofile.read(wav_path)
for word in tg[0]:
if word.mark in ['SP', 'AP', '']:
continue
word_text = word.mark.split(':')[0]
word_dir = os.path.join(output_dir, word_text)
os.makedirs(word_dir, exist_ok=True)
# 找到下一个可用编号
index = 1
while os.path.exists(os.path.join(word_dir, f'{index}.wav')):
index += 1
# 切出片段并保存
start_sample = int(word.minTime * sr)
end_sample = int(word.maxTime * sr)
segment = audio[start_sample:end_sample]
output_path = os.path.join(word_dir, f'{index}.wav')
audiofile.write(output_path, segment, sr)
word_counts[word_text] = word_counts.get(word_text, 0) + 1
log(f"进度: {idx+1}/{len(tg_files)} - {basename}")
total = sum(word_counts.values())
log(f"提取完成: {len(word_counts)} 个词条,共 {total} 个片段")
return True, f"提取完成: {len(word_counts)} 个词条", word_counts
except Exception as e:
logger.error(f"TextGrid转换失败: {e}", exc_info=True)
return False, str(e), {}
# 保留原有脚本入口以兼容
if __name__ == "__main__":
import tqdm
wavDir = r'E:\Workspace\umamusume-voice-text-extractor\extracted'
tgDir = r'E:\SVS\DiffSinger\MakeDiffSinger\temp\revised'
saveDir = 'bank'
success, msg, stats = textgrid_to_bank(
wav_dir=wavDir,
textgrid_dir=tgDir,
output_dir=saveDir,
progress_callback=print
)
print(f"结果: {msg}")