# -*- coding: utf-8 -*- r"""Inspect a local AniFileBERT background training run. Examples: .\.venv\Scripts\python.exe -m tools.training_status --name schema_v2_cached_wrapper_train_skipcache .\.venv\Scripts\python.exe -m tools.training_status --pid-file reports\schema_v2_cached_wrapper_train_skipcache.pid.txt --stdout logs\schema_v2_cached_wrapper_train_skipcache.out.log """ from __future__ import annotations import argparse import json import os from pathlib import Path import subprocess import sys from typing import Any try: sys.stdout.reconfigure(encoding="utf-8", errors="replace") sys.stderr.reconfigure(encoding="utf-8", errors="replace") except AttributeError: pass def parse_args() -> argparse.Namespace: parser = argparse.ArgumentParser(description="Inspect local AniFileBERT training status") parser.add_argument("--name", help="Run name used for reports/.pid.txt and logs/.out.log/.err.log") parser.add_argument("--pid-file", help="PID file path") parser.add_argument("--stdout", help="stdout log path") parser.add_argument("--stderr", help="stderr log path") parser.add_argument("--tail", type=int, default=80, help="Log lines to show") parser.add_argument("--metrics", action="append", default=[], help="Metrics JSON path to summarize; can repeat") return parser.parse_args() def default_paths(name: str) -> tuple[Path, Path, Path]: return ( Path("reports") / f"{name}.pid.txt", Path("logs") / f"{name}.out.log", Path("logs") / f"{name}.err.log", ) def read_pid(path: Path) -> int | None: try: text = path.read_text(encoding="ascii").strip() except FileNotFoundError: return None try: return int(text) except ValueError: return None def process_status(pid: int | None) -> dict[str, Any]: if pid is None: return {"pid": None, "running": False} if os.name == "nt": cmd = [ "powershell.exe", "-NoProfile", "-Command", f"$p = Get-Process -Id {pid} -ErrorAction SilentlyContinue; " "$p | Select-Object Id,ProcessName,CPU,WorkingSet | ConvertTo-Json -Compress", ] proc = subprocess.run(cmd, capture_output=True, text=True, encoding="utf-8", errors="replace") text = proc.stdout.strip() if not text: return {"pid": pid, "running": False} try: data = json.loads(text) except json.JSONDecodeError: return {"pid": pid, "running": True, "raw": text} return {"pid": pid, "running": True, "process": data} try: os.kill(pid, 0) except OSError: return {"pid": pid, "running": False} return {"pid": pid, "running": True} def tail_lines(path: Path, count: int) -> list[str]: if not path.is_file(): return [] with path.open("r", encoding="utf-8", errors="replace") as handle: lines = handle.readlines() return [line.rstrip("\n") for line in lines[-count:]] def summarize_metrics(path: Path) -> dict[str, Any] | None: if not path.is_file(): return None data = json.loads(path.read_text(encoding="utf-8")) summary: dict[str, Any] = {"path": str(path)} if "modes" in data: for mode_name, mode in data["modes"].items(): if "full_correct" in mode: summary[mode_name] = { "full_correct": mode.get("full_correct"), "case_count": mode.get("case_count"), "full_accuracy": mode.get("full_accuracy"), "failures": [item.get("id") or item.get("filename") for item in mode.get("failures", [])[:10]], } elif "full_match_correct" in mode: summary[mode_name] = { "full_match_correct": mode.get("full_match_correct"), "full_match_total": mode.get("full_match_total"), "full_match_accuracy": mode.get("full_match_accuracy"), "failures": [item.get("filename") for item in mode.get("failures", [])[:10]], } return summary def main() -> None: args = parse_args() if args.name: pid_file, stdout_path, stderr_path = default_paths(args.name) else: pid_file = Path(args.pid_file) if args.pid_file else Path() stdout_path = Path(args.stdout) if args.stdout else Path() stderr_path = Path(args.stderr) if args.stderr else Path() if args.pid_file: pid_file = Path(args.pid_file) if args.stdout: stdout_path = Path(args.stdout) if args.stderr: stderr_path = Path(args.stderr) pid = read_pid(pid_file) if pid_file else None print(json.dumps(process_status(pid), ensure_ascii=False, indent=2)) if stdout_path: print(f"\n--- stdout tail: {stdout_path} ---") for line in tail_lines(stdout_path, args.tail): print(line) if stderr_path: print(f"\n--- stderr tail: {stderr_path} ---") for line in tail_lines(stderr_path, args.tail): print(line) for metric in args.metrics: summary = summarize_metrics(Path(metric)) if summary is not None: print(f"\n--- metrics: {metric} ---") print(json.dumps(summary, ensure_ascii=False, indent=2)) if __name__ == "__main__": main()