# -*- coding: utf-8 -*- """ 人力V助手 (JinrikiHelper) GUI 基于 CustomTkinter 构建 作者:TNOT """ import customtkinter as ctk from tkinter import filedialog, messagebox import threading import logging import os import sys import json # 配置日志 logging.basicConfig( level=logging.INFO, format='%(asctime)s - %(name)s - %(levelname)s - %(message)s', handlers=[logging.StreamHandler(sys.stdout)] ) logger = logging.getLogger(__name__) # 设置外观 ctk.set_appearance_mode("System") ctk.set_default_color_theme("blue") class ConfigManager: """配置管理器""" CONFIG_FILE = os.path.join( os.path.dirname(os.path.dirname(os.path.abspath(__file__))), "config.json" ) WHISPER_MODELS = { "whisper-small": {"name": "openai/whisper-small", "desc": "小型模型 (~500MB)", "size": "~500MB"}, "whisper-medium": {"name": "openai/whisper-medium", "desc": "中型模型 (~1.5GB)", "size": "~1.5GB"} } def __init__(self): self._default_models_dir = os.path.join( os.path.dirname(os.path.dirname(os.path.abspath(__file__))), "models" ) self.config = self._load() def _load(self) -> dict: """加载配置""" default = { "whisper_model": "whisper-small", "models_dir": self._default_models_dir, "mfa_dir": os.path.join(self._default_models_dir, "mfa"), "bank_dir": os.path.join( os.path.dirname(os.path.dirname(os.path.abspath(__file__))), "bank" ), "show_log": False # 默认关闭日志 } if os.path.exists(self.CONFIG_FILE): try: with open(self.CONFIG_FILE, 'r', encoding='utf-8') as f: default.update(json.load(f)) except Exception as e: logger.warning(f"加载配置失败: {e}") return default def save(self): """保存配置""" try: with open(self.CONFIG_FILE, 'w', encoding='utf-8') as f: json.dump(self.config, f, ensure_ascii=False, indent=2) except Exception as e: logger.error(f"保存配置失败: {e}") def get(self, key: str, default=None): return self.config.get(key, default) def set(self, key: str, value): self.config[key] = value self.save() class ModelDownloadFrame(ctk.CTkFrame): """模型配置功能面板""" def __init__(self, master, log_callback, config: ConfigManager): super().__init__(master) self.log_callback = log_callback self.config = config self.whisper_pipe = None self._download_thread = None self._setup_ui() def _setup_ui(self): row = 0 # 便携版提示 ctk.CTkLabel( self, text="💡 便携版已附带除 whisper-medium 以外的所有模型", font=ctk.CTkFont(size=12), text_color="#4a9a6a" ).grid(row=row, column=0, columnspan=3, padx=10, pady=(10, 15), sticky="w") row += 1 # Whisper 模型区域 ctk.CTkLabel(self, text="Whisper 语音识别模型", font=ctk.CTkFont(size=14, weight="bold")).grid( row=row, column=0, columnspan=3, padx=10, pady=(10, 5), sticky="w" ) row += 1 ctk.CTkLabel(self, text="模型版本:").grid(row=row, column=0, padx=10, pady=5, sticky="w") self.whisper_model_var = ctk.StringVar(value=self.config.get("whisper_model")) ctk.CTkComboBox( self, values=list(ConfigManager.WHISPER_MODELS.keys()), variable=self.whisper_model_var, width=200, command=self._on_model_change ).grid(row=row, column=1, padx=5, pady=5, sticky="w") self.model_desc_label = ctk.CTkLabel(self, text=self._get_model_desc(), text_color="gray") self.model_desc_label.grid(row=row, column=2, padx=10, pady=5, sticky="w") row += 1 ctk.CTkLabel(self, text="模型目录:").grid(row=row, column=0, padx=10, pady=5, sticky="w") self.models_dir_var = ctk.StringVar(value=self.config.get("models_dir")) ctk.CTkEntry(self, textvariable=self.models_dir_var, width=320).grid(row=row, column=1, padx=5, pady=5, sticky="w") ctk.CTkButton(self, text="浏览", width=60, command=self._browse_models_dir).grid(row=row, column=2, padx=5, pady=5, sticky="w") row += 1 # Whisper 状态(初始隐藏) self.whisper_status = ctk.CTkLabel(self, text="", text_color="gray") self.whisper_status.grid(row=row, column=1, padx=5, pady=5, sticky="w") self.whisper_btn = ctk.CTkButton(self, text="下载 / 加载模型", command=self._download_whisper, width=140) self.whisper_btn.grid(row=row, column=2, padx=5, pady=5, sticky="w") row += 1 self.progress_label = ctk.CTkLabel(self, text="", text_color="gray") self.progress_label.grid(row=row, column=0, columnspan=3, padx=10, pady=5, sticky="w") row += 1 # Silero VAD 区域 ctk.CTkLabel(self, text="Silero VAD 模型", font=ctk.CTkFont(size=14, weight="bold")).grid( row=row, column=0, columnspan=3, padx=10, pady=(20, 5), sticky="w" ) row += 1 ctk.CTkLabel(self, text="用于语音活动检测和音频切片", text_color="gray").grid( row=row, column=0, columnspan=3, padx=10, pady=(0, 10), sticky="w" ) row += 1 # VAD 状态(初始隐藏) self.vad_status = ctk.CTkLabel(self, text="", text_color="gray") self.vad_status.grid(row=row, column=1, padx=5, pady=5, sticky="w") self.vad_btn = ctk.CTkButton(self, text="下载模型", command=self._download_vad, width=140) self.vad_btn.grid(row=row, column=2, padx=5, pady=5, sticky="w") row += 1 # MFA 模型区域 ctk.CTkLabel(self, text="MFA 声学模型", font=ctk.CTkFont(size=14, weight="bold")).grid( row=row, column=0, columnspan=3, padx=10, pady=(20, 5), sticky="w" ) row += 1 ctk.CTkLabel(self, text="Montreal Forced Aligner 模型,用于语音对齐", text_color="gray").grid( row=row, column=0, columnspan=3, padx=10, pady=(0, 10), sticky="w" ) row += 1 ctk.CTkLabel(self, text="模型目录:").grid(row=row, column=0, padx=10, pady=5, sticky="w") self.mfa_dir_var = ctk.StringVar(value=self.config.get("mfa_dir")) ctk.CTkEntry(self, textvariable=self.mfa_dir_var, width=320).grid(row=row, column=1, padx=5, pady=5, sticky="w") ctk.CTkButton(self, text="浏览", width=60, command=self._browse_mfa_dir).grid(row=row, column=2, padx=5, pady=5) row += 1 ctk.CTkLabel(self, text="选择语言:").grid(row=row, column=0, padx=10, pady=5, sticky="w") self.mfa_lang_var = ctk.StringVar(value="mandarin") ctk.CTkComboBox( self, values=["mandarin", "japanese"], variable=self.mfa_lang_var, width=200, command=self._on_mfa_lang_change ).grid(row=row, column=1, padx=5, pady=5, sticky="w") self.mfa_lang_desc = ctk.CTkLabel(self, text="中文 (普通话)", text_color="gray") self.mfa_lang_desc.grid(row=row, column=2, padx=5, pady=5, sticky="w") row += 1 # MFA 状态(初始隐藏) self.mfa_status = ctk.CTkLabel(self, text="", text_color="gray") self.mfa_status.grid(row=row, column=1, padx=5, pady=5, sticky="w") self.mfa_download_btn = ctk.CTkButton(self, text="下载模型", command=self._download_mfa_models, width=140) self.mfa_download_btn.grid(row=row, column=2, padx=5, pady=5, sticky="w") def _get_model_desc(self): info = ConfigManager.WHISPER_MODELS.get(self.whisper_model_var.get(), {}) return info.get('desc', '') def _on_model_change(self, choice): self.model_desc_label.configure(text=self._get_model_desc()) self.config.set("whisper_model", choice) # 切换模型时清空状态显示,重置 pipeline self.whisper_status.configure(text="") self.whisper_pipe = None def _browse_models_dir(self): path = filedialog.askdirectory(title="选择模型下载目录") if path: self.models_dir_var.set(path) self.config.set("models_dir", path) def _browse_mfa_dir(self): path = filedialog.askdirectory(title="选择 MFA 模型目录") if path: self.mfa_dir_var.set(path) self.config.set("mfa_dir", path) def _on_mfa_lang_change(self, choice): from src.mfa_model_downloader import get_available_languages self.mfa_lang_desc.configure(text=get_available_languages().get(choice, "")) def _download_vad(self): if self._download_thread and self._download_thread.is_alive(): return self.vad_btn.configure(state="disabled") self.vad_status.configure(text="⏳ 下载中...", text_color="gray") self._download_thread = threading.Thread(target=self._do_download_vad, daemon=True) self._download_thread.start() def _do_download_vad(self): from src.silero_vad_downloader import download_silero_vad self.log_callback("开始下载 Silero VAD 模型...") success, result = download_silero_vad(self.config.get("models_dir"), self.log_callback) if success: self.after(0, lambda: self.vad_status.configure(text="✅ 已下载", text_color="green")) self.log_callback(f"VAD 模型已保存: {result}") else: self.after(0, lambda: self.vad_status.configure(text="❌ 下载失败", text_color="red")) self.after(0, lambda: self.vad_btn.configure(state="normal")) def _download_mfa_models(self): if self._download_thread and self._download_thread.is_alive(): return self.mfa_download_btn.configure(state="disabled") self.mfa_status.configure(text="⏳ 下载中...", text_color="gray") self._download_thread = threading.Thread(target=self._do_download_mfa, daemon=True) self._download_thread.start() def _do_download_mfa(self): from src.mfa_model_downloader import download_language_models language = self.mfa_lang_var.get() output_dir = self.mfa_dir_var.get() os.makedirs(output_dir, exist_ok=True) self.log_callback(f"开始下载 MFA 模型: {language}") success, acoustic_path, dict_path = download_language_models( language=language, output_dir=output_dir, progress_callback=self.log_callback ) if success: self.after(0, lambda: self.mfa_status.configure(text="✅ 已下载", text_color="green")) self.log_callback(f"声学模型: {acoustic_path}") self.log_callback(f"字典文件: {dict_path}") else: self.after(0, lambda: self.mfa_status.configure(text="❌ 下载失败", text_color="red")) self.after(0, lambda: self.mfa_download_btn.configure(state="normal")) def _download_whisper(self): if self._download_thread and self._download_thread.is_alive(): return self.whisper_btn.configure(state="disabled") self.whisper_status.configure(text="⏳ 加载中...", text_color="gray") self._download_thread = threading.Thread(target=self._do_download_whisper, daemon=True) self._download_thread.start() def _do_download_whisper(self): try: self._update_progress("正在加载 transformers 库...") from transformers import pipeline import torch model_key = self.whisper_model_var.get() model_name = ConfigManager.WHISPER_MODELS[model_key]["name"] cache_dir = os.path.join(self.models_dir_var.get(), "whisper") os.makedirs(cache_dir, exist_ok=True) self._update_progress(f"正在下载/加载 {model_key}...") self.log_callback(f"开始加载 Whisper 模型: {model_name}") os.environ["HF_HOME"] = cache_dir os.environ["TRANSFORMERS_CACHE"] = cache_dir self.whisper_pipe = pipeline( "automatic-speech-recognition", model=model_name, torch_dtype=torch.float16 if torch.cuda.is_available() else torch.float32, device_map="auto", model_kwargs={"cache_dir": cache_dir} ) self._update_progress("") self.after(0, lambda: self.whisper_status.configure(text="✅ 已就绪", text_color="green")) self.after(0, lambda: self.whisper_btn.configure(state="normal", text="重新加载")) self.log_callback("Whisper 模型加载完成") except Exception as e: self._update_progress("") self.after(0, lambda: self.whisper_status.configure(text="❌ 加载失败", text_color="red")) self.after(0, lambda: self.whisper_btn.configure(state="normal")) self.log_callback(f"Whisper 模型加载失败: {e}") logger.error(f"Whisper 模型加载失败: {e}", exc_info=True) def _update_progress(self, text): self.after(0, lambda: self.progress_label.configure(text=text)) def get_whisper_pipeline(self): return self.whisper_pipe def get_models_dir(self): return self.models_dir_var.get() def get_mfa_dir(self): return self.mfa_dir_var.get() def get_whisper_model_name(self): return ConfigManager.WHISPER_MODELS[self.whisper_model_var.get()]["name"] class MakeVoiceBankFrame(ctk.CTkFrame): """制作音源页面 - 简化工作流""" def __init__(self, master, log_callback, config: ConfigManager, model_frame: ModelDownloadFrame): super().__init__(master) self.log_callback = log_callback self.config = config self.model_frame = model_frame self._is_running = False self._setup_ui() self._check_mfa_status() def _setup_ui(self): self.scroll_frame = ctk.CTkScrollableFrame(self) self.scroll_frame.pack(fill="both", expand=True, padx=5, pady=5) row = 0 # ========== 基本设置 ========== ctk.CTkLabel( self.scroll_frame, text="基本设置", font=ctk.CTkFont(size=16, weight="bold") ).grid(row=row, column=0, columnspan=3, padx=10, pady=(10, 15), sticky="w") row += 1 # 音源名称 ctk.CTkLabel(self.scroll_frame, text="音源名称:").grid(row=row, column=0, padx=10, pady=5, sticky="w") self.source_name_var = ctk.StringVar(value="my_voice") ctk.CTkEntry(self.scroll_frame, textvariable=self.source_name_var, width=200).grid( row=row, column=1, padx=5, pady=5, sticky="w" ) ctk.CTkLabel(self.scroll_frame, text="输出到 bank/[音源名称]/", text_color="gray").grid( row=row, column=2, padx=5, pady=5, sticky="w" ) row += 1 # 输入音频 ctk.CTkLabel(self.scroll_frame, text="输入音频:").grid(row=row, column=0, padx=10, pady=5, sticky="w") self.input_audio_var = ctk.StringVar() ctk.CTkEntry(self.scroll_frame, textvariable=self.input_audio_var, width=300).grid( row=row, column=1, padx=5, pady=5 ) btn_frame = ctk.CTkFrame(self.scroll_frame, fg_color="transparent") btn_frame.grid(row=row, column=2, padx=5, pady=5) ctk.CTkButton(btn_frame, text="文件", width=50, command=self._browse_input_file, fg_color="#5a6a7a", hover_color="#4a5a6a").pack(side="left", padx=2) ctk.CTkButton(btn_frame, text="文件夹", width=60, command=self._browse_input_dir, fg_color="#5a6a7a", hover_color="#4a5a6a").pack(side="left", padx=2) row += 1 # 输出目录 ctk.CTkLabel(self.scroll_frame, text="输出目录:").grid(row=row, column=0, padx=10, pady=5, sticky="w") self.output_dir_var = ctk.StringVar(value=self.config.get("bank_dir", "bank")) ctk.CTkEntry(self.scroll_frame, textvariable=self.output_dir_var, width=300).grid( row=row, column=1, padx=5, pady=5 ) ctk.CTkButton(self.scroll_frame, text="浏览", width=60, command=self._browse_output_dir, fg_color="#5a6a7a", hover_color="#4a5a6a").grid( row=row, column=2, padx=5, pady=5, sticky="w" ) row += 1 # 分隔线 ctk.CTkFrame(self.scroll_frame, height=2, fg_color="gray50").grid( row=row, column=0, columnspan=3, padx=10, pady=15, sticky="ew" ) row += 1 # ========== 模型选择 ========== ctk.CTkLabel( self.scroll_frame, text="模型选择", font=ctk.CTkFont(size=16, weight="bold") ).grid(row=row, column=0, columnspan=3, padx=10, pady=(10, 15), sticky="w") row += 1 # Whisper模型 ctk.CTkLabel(self.scroll_frame, text="Whisper模型:").grid(row=row, column=0, padx=10, pady=5, sticky="w") self.whisper_combo = ctk.CTkComboBox( self.scroll_frame, values=["(扫描中...)"], width=250 ) self.whisper_combo.grid(row=row, column=1, padx=5, pady=5, sticky="w") ctk.CTkButton(self.scroll_frame, text="刷新", width=60, command=self._refresh_whisper_models, fg_color="#5a6a7a", hover_color="#4a5a6a").grid( row=row, column=2, padx=5, pady=5, sticky="w" ) row += 1 # MFA字典 ctk.CTkLabel(self.scroll_frame, text="MFA字典:").grid(row=row, column=0, padx=10, pady=5, sticky="w") self.dict_combo = ctk.CTkComboBox(self.scroll_frame, values=["(扫描中...)"], width=250) self.dict_combo.grid(row=row, column=1, padx=5, pady=5, sticky="w") row += 1 # MFA声学模型 ctk.CTkLabel(self.scroll_frame, text="MFA声学模型:").grid(row=row, column=0, padx=10, pady=5, sticky="w") self.acoustic_combo = ctk.CTkComboBox(self.scroll_frame, values=["(扫描中...)"], width=250) self.acoustic_combo.grid(row=row, column=1, padx=5, pady=5, sticky="w") ctk.CTkButton(self.scroll_frame, text="刷新", width=60, command=self._refresh_mfa_models, fg_color="#5a6a7a", hover_color="#4a5a6a").grid( row=row, column=2, padx=5, pady=5, sticky="w" ) row += 1 # 语言 ctk.CTkLabel(self.scroll_frame, text="转录语言:").grid(row=row, column=0, padx=10, pady=5, sticky="w") self.language_var = ctk.StringVar(value="chinese") ctk.CTkComboBox( self.scroll_frame, values=["chinese", "japanese"], variable=self.language_var, width=150, command=self._on_language_change ).grid(row=row, column=1, padx=5, pady=5, sticky="w") row += 1 # 分隔线 ctk.CTkFrame(self.scroll_frame, height=2, fg_color="gray50").grid( row=row, column=0, columnspan=3, padx=10, pady=15, sticky="ew" ) row += 1 # ========== MFA状态 ========== self.mfa_status_label = ctk.CTkLabel( self.scroll_frame, text="⏳ 检查 MFA 环境...", font=ctk.CTkFont(size=12) ) self.mfa_status_label.grid(row=row, column=0, columnspan=3, padx=10, pady=5, sticky="w") row += 1 # 分隔线 ctk.CTkFrame(self.scroll_frame, height=2, fg_color="gray50").grid( row=row, column=0, columnspan=3, padx=10, pady=15, sticky="ew" ) row += 1 # ========== 执行按钮 ========== ctk.CTkLabel( self.scroll_frame, text="执行流程", font=ctk.CTkFont(size=16, weight="bold") ).grid(row=row, column=0, columnspan=3, padx=10, pady=(10, 15), sticky="w") row += 1 # 按钮容器 - 优化排版 btn_container = ctk.CTkFrame(self.scroll_frame, fg_color="transparent") btn_container.grid(row=row, column=0, columnspan=3, padx=10, pady=10, sticky="ew") # 分步执行按钮 - 降饱和颜色 self.step0_btn = ctk.CTkButton( btn_container, text="步骤0: 切片+转录", command=self._run_step0, width=150, height=36, fg_color="#5c7a9a", hover_color="#4a6888" ) self.step0_btn.pack(side="left", padx=8) self.step1_btn = ctk.CTkButton( btn_container, text="步骤1: MFA对齐", command=self._run_step1, width=150, height=36, fg_color="#6a9a7a", hover_color="#588868" ) self.step1_btn.pack(side="left", padx=8) row += 1 # 一键执行 - 降饱和 self.full_btn = ctk.CTkButton( self.scroll_frame, text="▶ 一键执行全部流程", command=self._run_full, width=320, height=40, fg_color="#8a6a8a", hover_color="#785878", font=ctk.CTkFont(size=14, weight="bold") ) self.full_btn.grid(row=row, column=0, columnspan=3, pady=15) row += 1 # 进度提示 self.progress_label = ctk.CTkLabel(self.scroll_frame, text="", text_color="gray") self.progress_label.grid(row=row, column=0, columnspan=3, padx=10, pady=5, sticky="w") # 初始化模型列表 self.after(500, self._refresh_all_models) def _check_mfa_status(self): from src.mfa_runner import check_mfa_available if check_mfa_available(): self.mfa_status_label.configure(text="✅ MFA 环境已就绪", text_color="green") else: self.mfa_status_label.configure(text="❌ MFA 环境不可用,请检查 tools/mfa_engine", text_color="red") def _refresh_all_models(self): self._refresh_whisper_models() self._refresh_mfa_models() def _refresh_whisper_models(self): from src.pipeline import scan_whisper_models models_dir = self.model_frame.get_models_dir() models = scan_whisper_models(models_dir) all_models = list(ConfigManager.WHISPER_MODELS.values()) preset_names = [m["name"] for m in all_models] for m in models: if m not in preset_names: preset_names.append(m) if preset_names: self.whisper_combo.configure(values=preset_names) self.whisper_combo.set(preset_names[0]) else: self.whisper_combo.configure(values=["openai/whisper-small"]) self.whisper_combo.set("openai/whisper-small") def _refresh_mfa_models(self): from src.pipeline import scan_mfa_models mfa_dir = self.model_frame.get_mfa_dir() models = scan_mfa_models(os.path.dirname(mfa_dir)) if models["dictionary"]: self.dict_combo.configure(values=models["dictionary"]) self.dict_combo.set(models["dictionary"][0]) else: self.dict_combo.configure(values=["(未找到字典文件)"]) self.dict_combo.set("(未找到字典文件)") if models["acoustic"]: self.acoustic_combo.configure(values=models["acoustic"]) self.acoustic_combo.set(models["acoustic"][0]) else: self.acoustic_combo.configure(values=["(未找到声学模型)"]) self.acoustic_combo.set("(未找到声学模型)") # 根据当前语言自动选择对应模型 self._auto_select_mfa_models() def _on_language_change(self, choice): """语言选择变化时自动选择对应的 MFA 模型和字典""" self._auto_select_mfa_models() def _auto_select_mfa_models(self): """根据当前语言自动选择对应的 MFA 模型和字典""" language = self.language_var.get() # 语言到 MFA 模型关键字的映射 lang_to_mfa = { "chinese": "mandarin", "japanese": "japanese", "english": "english" } mfa_keyword = lang_to_mfa.get(language, "") if not mfa_keyword: return # 自动选择字典 dict_values = self.dict_combo.cget("values") if dict_values and not dict_values[0].startswith("("): for dict_file in dict_values: if mfa_keyword in dict_file.lower(): self.dict_combo.set(dict_file) break # 自动选择声学模型 acoustic_values = self.acoustic_combo.cget("values") if acoustic_values and not acoustic_values[0].startswith("("): for acoustic_file in acoustic_values: if mfa_keyword in acoustic_file.lower(): self.acoustic_combo.set(acoustic_file) break def _browse_input_file(self): path = filedialog.askopenfilename( title="选择音频文件", filetypes=[("音频文件", "*.wav *.mp3 *.flac *.ogg *.m4a"), ("所有文件", "*.*")] ) if path: self.input_audio_var.set(path) def _browse_input_dir(self): path = filedialog.askdirectory(title="选择音频文件夹") if path: self.input_audio_var.set(path) def _browse_output_dir(self): path = filedialog.askdirectory(title="选择输出目录") if path: self.output_dir_var.set(path) self.config.set("bank_dir", path) def _get_pipeline_config(self): """获取流水线配置""" from src.pipeline import PipelineConfig mfa_dir = self.model_frame.get_mfa_dir() dict_file = self.dict_combo.get() acoustic_file = self.acoustic_combo.get() dict_path = None if dict_file and not dict_file.startswith("("): dict_path = os.path.join(mfa_dir, dict_file) acoustic_path = None if acoustic_file and not acoustic_file.startswith("("): acoustic_path = os.path.join(mfa_dir, acoustic_file) return PipelineConfig( source_name=self.source_name_var.get(), input_path=self.input_audio_var.get(), output_base_dir=self.output_dir_var.get(), models_dir=self.model_frame.get_models_dir(), whisper_model=self.whisper_combo.get(), mfa_dict_path=dict_path, mfa_model_path=acoustic_path, language=self.language_var.get() ) def _set_buttons_state(self, state: str): """设置所有按钮状态""" for btn in [self.step0_btn, self.step1_btn, self.full_btn]: btn.configure(state=state) def _run_step0(self): if self._is_running: return if not self._validate_input(): return self._is_running = True self._set_buttons_state("disabled") threading.Thread(target=self._do_step0, daemon=True).start() def _do_step0(self): from src.pipeline import VoiceBankPipeline config = self._get_pipeline_config() pipeline = VoiceBankPipeline(config, self.log_callback) self.log_callback("=" * 50) self.log_callback("【步骤0】音频预处理 (VAD切片 + Whisper转录)") success, msg, _ = pipeline.step0_preprocess() if success: self.log_callback(f"✅ {msg}") else: self.log_callback(f"❌ {msg}") self.log_callback("=" * 50) self.after(0, lambda: self._set_buttons_state("normal")) self._is_running = False def _run_step1(self): if self._is_running: return if not self._validate_source_name(): return self._is_running = True self._set_buttons_state("disabled") threading.Thread(target=self._do_step1, daemon=True).start() def _do_step1(self): from src.pipeline import VoiceBankPipeline config = self._get_pipeline_config() pipeline = VoiceBankPipeline(config, self.log_callback) self.log_callback("=" * 50) self.log_callback("【步骤1】MFA语音对齐") success, msg = pipeline.step1_mfa_align() if success: self.log_callback(f"✅ {msg}") else: self.log_callback(f"❌ {msg}") self.log_callback("=" * 50) self.after(0, lambda: self._set_buttons_state("normal")) self._is_running = False def _run_full(self): if self._is_running: return if not self._validate_input(): return self._is_running = True self._set_buttons_state("disabled") threading.Thread(target=self._do_full, daemon=True).start() def _do_full(self): from src.pipeline import VoiceBankPipeline config = self._get_pipeline_config() pipeline = VoiceBankPipeline(config, self.log_callback) success, msg = pipeline.run_make_pipeline() if not success: self.log_callback(f"❌ 流程中断: {msg}") self.after(0, lambda: self._set_buttons_state("normal")) self._is_running = False def _validate_input(self) -> bool: """验证输入""" if not self.source_name_var.get().strip(): messagebox.showerror("错误", "请输入音源名称") return False if not self.input_audio_var.get().strip(): messagebox.showerror("错误", "请选择输入音频") return False if not self.output_dir_var.get().strip(): messagebox.showerror("错误", "请选择输出目录") return False return True def _validate_source_name(self) -> bool: """验证音源名称""" if not self.source_name_var.get().strip(): messagebox.showerror("错误", "请输入音源名称") return False return True class ExportSettingsDialog(ctk.CTkToplevel): """导出设置弹窗""" def __init__(self, master, plugin, voice_bank: str, bank_dir: str, log_callback): super().__init__(master) self.plugin = plugin self.voice_bank = voice_bank self.bank_dir = bank_dir self.log_callback = log_callback self._option_widgets = {} self._is_running = False self.title(f"导出设置 - {plugin.name}") self.geometry("500x400") self.resizable(True, True) self.transient(master) self.grab_set() self._setup_ui() self._center_window() def _center_window(self): """居中显示""" self.update_idletasks() w = self.winfo_width() h = self.winfo_height() x = (self.winfo_screenwidth() - w) // 2 y = (self.winfo_screenheight() - h) // 2 self.geometry(f"{w}x{h}+{x}+{y}") def _setup_ui(self): from src.export_plugins import OptionType # 标题 header = ctk.CTkFrame(self) header.pack(fill="x", padx=10, pady=10) ctk.CTkLabel( header, text=self.plugin.name, font=ctk.CTkFont(size=16, weight="bold") ).pack(anchor="w") ctk.CTkLabel( header, text=self.plugin.description, text_color="gray" ).pack(anchor="w") ctk.CTkLabel( header, text=f"音源: {self.voice_bank}", text_color="gray" ).pack(anchor="w") # 选项区域(可滚动) self.options_frame = ctk.CTkScrollableFrame(self) self.options_frame.pack(fill="both", expand=True, padx=10, pady=5) # 动态生成选项控件 for opt in self.plugin.get_options(): self._create_option_widget(opt) # 底部按钮 btn_frame = ctk.CTkFrame(self, fg_color="transparent") btn_frame.pack(fill="x", padx=10, pady=10) self.cancel_btn = ctk.CTkButton( btn_frame, text="取消", width=80, fg_color="gray", command=self.destroy ) self.cancel_btn.pack(side="left", padx=5) self.reset_btn = ctk.CTkButton( btn_frame, text="恢复默认", width=100, fg_color="#607D8B", command=self._reset_defaults ) self.reset_btn.pack(side="left", padx=5) self.export_btn = ctk.CTkButton( btn_frame, text="导出", width=100, fg_color="#6a9a7a", hover_color="#588868", command=self._do_export ) self.export_btn.pack(side="right", padx=5) def _create_option_widget(self, opt): """创建选项控件""" from src.export_plugins import OptionType frame = ctk.CTkFrame(self.options_frame, fg_color="transparent") frame.pack(fill="x", pady=5) if opt.option_type == OptionType.LABEL: ctk.CTkLabel(frame, text=opt.label, text_color="gray").pack(anchor="w") return ctk.CTkLabel(frame, text=opt.label).pack(anchor="w") if opt.option_type == OptionType.TEXT: var = ctk.StringVar(value=str(opt.default or "")) widget = ctk.CTkEntry(frame, textvariable=var, width=300) widget.pack(anchor="w", pady=2) self._option_widgets[opt.key] = ("text", var) elif opt.option_type == OptionType.NUMBER: var = ctk.StringVar(value=str(opt.default or 0)) widget = ctk.CTkEntry(frame, textvariable=var, width=150) widget.pack(anchor="w", pady=2) self._option_widgets[opt.key] = ("number", var, opt.min_value, opt.max_value) elif opt.option_type == OptionType.SWITCH: var = ctk.BooleanVar(value=bool(opt.default)) widget = ctk.CTkSwitch(frame, text="", variable=var) widget.pack(anchor="w", pady=2) self._option_widgets[opt.key] = ("switch", var) elif opt.option_type == OptionType.COMBO: var = ctk.StringVar(value=str(opt.default or "")) widget = ctk.CTkComboBox(frame, values=opt.choices, variable=var, width=200) widget.pack(anchor="w", pady=2) self._option_widgets[opt.key] = ("combo", var) elif opt.option_type == OptionType.FILE: var = ctk.StringVar(value=str(opt.default or "")) entry_frame = ctk.CTkFrame(frame, fg_color="transparent") entry_frame.pack(anchor="w", pady=2) entry = ctk.CTkEntry(entry_frame, textvariable=var, width=250) entry.pack(side="left") btn = ctk.CTkButton( entry_frame, text="浏览", width=60, command=lambda v=var, ft=opt.file_types: self._browse_file(v, ft) ) btn.pack(side="left", padx=5) self._option_widgets[opt.key] = ("file", var) elif opt.option_type == OptionType.FOLDER: var = ctk.StringVar(value=str(opt.default or "")) entry_frame = ctk.CTkFrame(frame, fg_color="transparent") entry_frame.pack(anchor="w", pady=2) entry = ctk.CTkEntry(entry_frame, textvariable=var, width=250) entry.pack(side="left") btn = ctk.CTkButton( entry_frame, text="浏览", width=60, command=lambda v=var: self._browse_folder(v) ) btn.pack(side="left", padx=5) self._option_widgets[opt.key] = ("folder", var) if opt.description: ctk.CTkLabel( frame, text=opt.description, text_color="gray", font=ctk.CTkFont(size=11) ).pack(anchor="w") def _browse_file(self, var, file_types): ft = file_types if file_types else [("所有文件", "*.*")] path = filedialog.askopenfilename(filetypes=ft) if path: var.set(path) def _browse_folder(self, var): path = filedialog.askdirectory() if path: var.set(path) def _get_options_values(self) -> dict: values = {} for key, widget_info in self._option_widgets.items(): widget_type = widget_info[0] var = widget_info[1] if widget_type == "number": try: val = float(var.get()) min_val = widget_info[2] max_val = widget_info[3] if min_val is not None: val = max(min_val, val) if max_val is not None: val = min(max_val, val) values[key] = int(val) if val == int(val) else val except ValueError: values[key] = 0 elif widget_type == "switch": values[key] = var.get() else: values[key] = var.get() return values def _reset_defaults(self): for opt in self.plugin.get_options(): if opt.key in self._option_widgets: widget_info = self._option_widgets[opt.key] var = widget_info[1] if widget_info[0] == "switch": var.set(bool(opt.default)) else: var.set(str(opt.default or "")) def _do_export(self): if self._is_running: return self._is_running = True self._set_buttons_state("disabled") options = self._get_options_values() threading.Thread(target=self._run_export, args=(options,), daemon=True).start() def _run_export(self, options: dict): self.log_callback("=" * 50) self.log_callback(f"【{self.plugin.name}】音源: {self.voice_bank}") self.plugin.set_progress_callback(self.log_callback) success, msg = self.plugin.export(self.voice_bank, self.bank_dir, options) if success: self.log_callback(f"✅ {msg}") else: self.log_callback(f"❌ {msg}") self.log_callback("=" * 50) self.after(0, self._on_export_complete) def _on_export_complete(self): self._is_running = False self._set_buttons_state("normal") messagebox.showinfo("完成", "导出完成") def _set_buttons_state(self, state: str): self.cancel_btn.configure(state=state) self.reset_btn.configure(state=state) self.export_btn.configure(state=state) class ExportVoiceBankFrame(ctk.CTkFrame): """导出音源页面""" def __init__(self, master, log_callback, config: ConfigManager): super().__init__(master) self.log_callback = log_callback self.config = config self._plugins = {} self._load_plugins() self._setup_ui() self.after(500, self._refresh_voice_banks) def _load_plugins(self): from src.export_plugins import load_plugins plugins_dir = os.path.join( os.path.dirname(os.path.dirname(os.path.abspath(__file__))), "export_plugins" ) self._plugins = load_plugins(plugins_dir) def _setup_ui(self): # 音源选择区域 ctk.CTkLabel( self, text="选择音源", font=ctk.CTkFont(size=16, weight="bold") ).grid(row=0, column=0, columnspan=3, padx=10, pady=(10, 5), sticky="w") ctk.CTkLabel(self, text="音源:").grid(row=1, column=0, padx=10, pady=5, sticky="w") self.voice_bank_var = ctk.StringVar() self.voice_bank_combo = ctk.CTkComboBox( self, values=["(扫描中...)"], variable=self.voice_bank_var, width=250, command=self._on_voice_bank_change ) self.voice_bank_combo.grid(row=1, column=1, padx=5, pady=5, sticky="w") ctk.CTkButton(self, text="刷新", width=60, command=self._refresh_voice_banks, fg_color="#5a6a7a", hover_color="#4a5a6a").grid( row=1, column=2, padx=5, pady=5, sticky="w" ) # 音源信息 self.info_label = ctk.CTkLabel(self, text="", text_color="gray") self.info_label.grid(row=2, column=0, columnspan=3, padx=10, pady=5, sticky="w") # 分隔线 ctk.CTkFrame(self, height=2, fg_color="gray50").grid( row=3, column=0, columnspan=3, padx=10, pady=15, sticky="ew" ) # 导出方式区域 ctk.CTkLabel( self, text="导出方式", font=ctk.CTkFont(size=16, weight="bold") ).grid(row=4, column=0, columnspan=3, padx=10, pady=(10, 5), sticky="w") # 插件列表(可滚动) self.plugins_frame = ctk.CTkScrollableFrame(self, height=250) self.plugins_frame.grid(row=5, column=0, columnspan=3, padx=10, pady=10, sticky="nsew") # 动态生成插件卡片 self._create_plugin_cards() # 底部按钮区域 btn_frame = ctk.CTkFrame(self, fg_color="transparent") btn_frame.grid(row=6, column=0, columnspan=3, padx=10, pady=(5, 10), sticky="ew") ctk.CTkButton( btn_frame, text="📂 打开导出文件夹", width=140, command=self._open_export_folder, fg_color="#5a6a7a", hover_color="#4a5a6a" ).pack(side="left", padx=5) ctk.CTkButton( btn_frame, text="🔌 前往导出插件仓库", width=160, command=self._open_plugin_repo, fg_color="#5a6a7a", hover_color="#4a5a6a" ).pack(side="left", padx=5) # 配置行列权重 self.grid_columnconfigure(1, weight=1) self.grid_rowconfigure(5, weight=1) def _create_plugin_cards(self): """创建插件卡片 - 整个卡片可点击""" for idx, (name, plugin) in enumerate(self._plugins.items()): # 卡片容器 - 作为按钮 card = ctk.CTkFrame( self.plugins_frame, fg_color=("#e8e8e8", "#2a2a2a"), corner_radius=8 ) card.pack(fill="x", pady=6, padx=4) card.bind("", lambda e, c=card: c.configure(fg_color=("#d8d8d8", "#3a3a3a"))) card.bind("", lambda e, c=card: c.configure(fg_color=("#e8e8e8", "#2a2a2a"))) card.bind("", lambda e, p=plugin: self._open_plugin_settings(p)) # 内容容器 content = ctk.CTkFrame(card, fg_color="transparent") content.pack(fill="x", padx=12, pady=10) content.bind("", lambda e, p=plugin: self._open_plugin_settings(p)) # 插件名称 - 白色,较大,左中部 name_label = ctk.CTkLabel( content, text=name, font=ctk.CTkFont(size=15, weight="bold"), text_color=("#1a1a1a", "#ffffff") ) name_label.pack(anchor="w") name_label.bind("", lambda e, p=plugin: self._open_plugin_settings(p)) # 描述 desc_label = ctk.CTkLabel( content, text=plugin.description, text_color="gray", font=ctk.CTkFont(size=12) ) desc_label.pack(anchor="w", pady=(2, 0)) desc_label.bind("", lambda e, p=plugin: self._open_plugin_settings(p)) # 作者和版本 if plugin.author: meta_label = ctk.CTkLabel( content, text=f"作者: {plugin.author} | 版本: {plugin.version}", text_color="gray", font=ctk.CTkFont(size=10) ) meta_label.pack(anchor="w", pady=(2, 0)) meta_label.bind("", lambda e, p=plugin: self._open_plugin_settings(p)) def _open_plugin_settings(self, plugin): """打开插件设置弹窗""" voice_bank = self.voice_bank_var.get() if not voice_bank or voice_bank.startswith("("): messagebox.showerror("错误", "请先选择有效的音源") return bank_dir = self.config.get("bank_dir", "bank") ExportSettingsDialog(self, plugin, voice_bank, bank_dir, self.log_callback) def _open_export_folder(self): """打开导出文件夹""" import subprocess export_dir = os.path.join( os.path.dirname(os.path.dirname(os.path.abspath(__file__))), "export" ) if not os.path.exists(export_dir): os.makedirs(export_dir, exist_ok=True) # Windows 使用 explorer 打开文件夹 if sys.platform == "win32": os.startfile(export_dir) elif sys.platform == "darwin": subprocess.run(["open", export_dir]) else: subprocess.run(["xdg-open", export_dir]) def _open_plugin_repo(self): """打开导出插件仓库""" import webbrowser webbrowser.open("https://github.com/TNOTawa/JinrikiHelper-Plugin") def _refresh_voice_banks(self): """刷新音源列表""" bank_dir = self.config.get("bank_dir", "bank") voice_banks = [] if os.path.exists(bank_dir): for name in os.listdir(bank_dir): source_dir = os.path.join(bank_dir, name) if os.path.isdir(source_dir) and not name.startswith('.'): slices_dir = os.path.join(source_dir, "slices") textgrid_dir = os.path.join(source_dir, "textgrid") if os.path.exists(slices_dir) or os.path.exists(textgrid_dir): voice_banks.append(name) if voice_banks: self.voice_bank_combo.configure(values=voice_banks) self.voice_bank_combo.set(voice_banks[0]) self._on_voice_bank_change(voice_banks[0]) else: self.voice_bank_combo.configure(values=["(未找到音源)"]) self.voice_bank_combo.set("(未找到音源)") self.info_label.configure(text="") def _on_voice_bank_change(self, choice): """音源选择变化""" if choice.startswith("("): self.info_label.configure(text="") return bank_dir = self.config.get("bank_dir", "bank") source_dir = os.path.join(bank_dir, choice) slices_dir = os.path.join(source_dir, "slices") textgrid_dir = os.path.join(source_dir, "textgrid") slices_count = 0 textgrid_count = 0 if os.path.exists(slices_dir): slices_count = len([f for f in os.listdir(slices_dir) if f.endswith('.wav')]) if os.path.exists(textgrid_dir): textgrid_count = len([f for f in os.listdir(textgrid_dir) if f.endswith('.TextGrid')]) self.info_label.configure( text=f"切片: {slices_count} 个 | TextGrid: {textgrid_count} 个" ) class SettingsFrame(ctk.CTkFrame): """设置页面""" def __init__(self, master, config: ConfigManager, on_log_toggle): super().__init__(master) self.config = config self.on_log_toggle = on_log_toggle self._setup_ui() def _setup_ui(self): # 标题 ctk.CTkLabel( self, text="应用设置", font=ctk.CTkFont(size=18, weight="bold") ).pack(anchor="w", padx=15, pady=(15, 20)) # 日志设置区域 log_frame = ctk.CTkFrame(self, fg_color="transparent") log_frame.pack(fill="x", padx=15, pady=10) ctk.CTkLabel( log_frame, text="界面设置", font=ctk.CTkFont(size=14, weight="bold") ).pack(anchor="w", pady=(0, 10)) # 显示日志开关 log_switch_frame = ctk.CTkFrame(log_frame, fg_color="transparent") log_switch_frame.pack(fill="x", pady=5) ctk.CTkLabel(log_switch_frame, text="显示日志输出面板").pack(side="left") self.show_log_var = ctk.BooleanVar(value=self.config.get("show_log", False)) self.log_switch = ctk.CTkSwitch( log_switch_frame, text="", variable=self.show_log_var, command=self._on_log_switch_change ) self.log_switch.pack(side="right") ctk.CTkLabel( log_frame, text="开启后将在主界面底部显示日志输出区域", text_color="gray", font=ctk.CTkFont(size=11) ).pack(anchor="w", pady=(2, 0)) # 分隔线 ctk.CTkFrame(self, height=1, fg_color="gray50").pack(fill="x", padx=15, pady=20) # 关于区域 about_frame = ctk.CTkFrame(self, fg_color="transparent") about_frame.pack(fill="x", padx=15, pady=10) ctk.CTkLabel( about_frame, text="关于", font=ctk.CTkFont(size=14, weight="bold") ).pack(anchor="w", pady=(0, 10)) ctk.CTkLabel( about_frame, text="人力V助手 (JinrikiHelper)", font=ctk.CTkFont(size=12) ).pack(anchor="w") ctk.CTkLabel( about_frame, text="作者:TNOT | 开源协议:MIT", text_color="gray", font=ctk.CTkFont(size=11) ).pack(anchor="w", pady=(2, 0)) ctk.CTkLabel( about_frame, text="本工具集成 Montreal Forced Aligner (MIT License)", text_color="gray", font=ctk.CTkFont(size=11) ).pack(anchor="w", pady=(2, 0)) def _on_log_switch_change(self): """日志开关变化""" show_log = self.show_log_var.get() self.config.set("show_log", show_log) self.on_log_toggle(show_log) class App(ctk.CTk): """主应用窗口""" def __init__(self): super().__init__() self.title("人力V助手 (JinrikiHelper)") self.geometry("750x720") self.minsize(700, 620) self.config = ConfigManager() self._setup_ui() logger.info("应用启动") # 启动后检测 MFA 引擎 self.after(100, self._check_mfa_engine) def _check_mfa_engine(self): """检测 MFA 引擎是否存在,缺失则弹窗提醒""" from src.mfa_runner import check_mfa_available if not check_mfa_available(): messagebox.showwarning( "MFA 引擎缺失", "未检测到 MFA 引擎 (tools/mfa_engine)。\n\n" "MFA 引擎需要单独下载,请查阅 README.md 获取下载地址和安装说明。" ) def _setup_ui(self): self.tabview = ctk.CTkTabview(self) self.tabview.pack(fill="both", expand=True, padx=10, pady=10) tab1 = self.tabview.add("模型下载") tab2 = self.tabview.add("制作音源") tab3 = self.tabview.add("导出音源") tab4 = self.tabview.add("设置") self.download_frame = ModelDownloadFrame(tab1, self._log, self.config) self.download_frame.pack(fill="both", expand=True, padx=5, pady=5) self.make_frame = MakeVoiceBankFrame(tab2, self._log, self.config, self.download_frame) self.make_frame.pack(fill="both", expand=True, padx=5, pady=5) self.export_frame = ExportVoiceBankFrame(tab3, self._log, self.config) self.export_frame.pack(fill="both", expand=True, padx=5, pady=5) self.settings_frame = SettingsFrame(tab4, self.config, self._toggle_log_panel) self.settings_frame.pack(fill="both", expand=True, padx=5, pady=5) # 日志区域 - 默认隐藏 self.log_frame = ctk.CTkFrame(self) ctk.CTkLabel(self.log_frame, text="日志输出:").pack(anchor="w", padx=5, pady=2) self.log_text = ctk.CTkTextbox(self.log_frame, height=100) self.log_text.pack(fill="x", padx=5, pady=5) # 根据配置决定是否显示日志 if self.config.get("show_log", False): self.log_frame.pack(fill="x", padx=10, pady=(0, 10)) def _toggle_log_panel(self, show: bool): """切换日志面板显示""" if show: self.log_frame.pack(fill="x", padx=10, pady=(0, 10)) else: self.log_frame.pack_forget() def _log(self, message): self.log_text.insert("end", f"{message}\n") self.log_text.see("end") def main(): app = App() app.mainloop() if __name__ == "__main__": main()