JinrikiHelper / src /bank_sort.py
TNOT's picture
完成了单音频或选择文件夹 → Silero VAD 切片 → Whisper 转录 → 生成 .lab → MFA 强制对齐 → 简单导出,已可用
9361148
# -*- coding: utf-8 -*-
"""
音频库排序模块
按时长排序并导出音频片段
"""
import os
import glob
import shutil
import logging
from typing import Optional, Callable, Tuple, Dict, List
logger = logging.getLogger(__name__)
def sort_and_export_bank(
bank_dir: str,
output_dir: str,
max_per_word: int = 100,
progress_callback: Optional[Callable[[str], None]] = None
) -> Tuple[bool, str, Dict[str, int]]:
"""
对音频库进行排序并导出
参数:
bank_dir: 音频库目录 (包含 [词]/[编号].wav 结构)
output_dir: 导出目录
max_per_word: 每个词最多保留的样本数
progress_callback: 进度回调函数
返回:
(成功标志, 消息, 导出统计)
"""
import audiofile
def log(msg: str):
logger.info(msg)
if progress_callback:
progress_callback(msg)
try:
os.makedirs(output_dir, exist_ok=True)
# 统计所有词条
stats: Dict[str, List[Tuple[str, float]]] = {}
wav_files = glob.glob(os.path.join(bank_dir, '**', '*.wav'), recursive=True)
if not wav_files:
return False, "未找到WAV文件", {}
log(f"扫描到 {len(wav_files)} 个WAV文件")
for path in wav_files:
rel_path = os.path.relpath(path, bank_dir)
parts = rel_path.split(os.sep)
if len(parts) >= 2:
word = parts[0]
if word not in stats:
stats[word] = []
try:
duration = audiofile.duration(path)
stats[word].append((path, duration))
except Exception as e:
log(f"警告: 无法读取 {path}: {e}")
log(f"统计到 {len(stats)} 个词条")
# 按时长排序并导出
export_counts = {}
for word, files in stats.items():
# 按时长降序排序
sorted_files = sorted(files, key=lambda x: -x[1])
count = 0
for idx, (src_path, _) in enumerate(sorted_files[:max_per_word]):
dst_path = os.path.join(output_dir, f'{word}_{idx}.wav')
shutil.copyfile(src_path, dst_path)
count += 1
export_counts[word] = count
log(f"处理词条: {word} ({count} 个文件)")
total = sum(export_counts.values())
log(f"导出完成: {len(export_counts)} 个词条,{total} 个文件")
return True, f"导出完成: {len(export_counts)} 个词条,{total} 个文件", export_counts
except Exception as e:
logger.error(f"排序导出失败: {e}", exc_info=True)
return False, str(e), {}
# 保留原有脚本入口以兼容
if __name__ == "__main__":
bank_dir = 'bank'
output_dir = 'bank_export'
success, msg, stats = sort_and_export_bank(
bank_dir=bank_dir,
output_dir=output_dir,
max_per_word=100,
progress_callback=print
)
print(f"结果: {msg}")