File size: 5,417 Bytes

0df0bf9

# -*- coding: utf-8 -*-
r"""Inspect a local AniFileBERT background training run.

Examples:

    .\.venv\Scripts\python.exe -m tools.training_status --name schema_v2_cached_wrapper_train_skipcache
    .\.venv\Scripts\python.exe -m tools.training_status --pid-file reports\schema_v2_cached_wrapper_train_skipcache.pid.txt --stdout logs\schema_v2_cached_wrapper_train_skipcache.out.log
"""

from __future__ import annotations

import argparse
import json
import os
from pathlib import Path
import subprocess
import sys
from typing import Any

try:
    sys.stdout.reconfigure(encoding="utf-8", errors="replace")
    sys.stderr.reconfigure(encoding="utf-8", errors="replace")
except AttributeError:
    pass


def parse_args() -> argparse.Namespace:
    parser = argparse.ArgumentParser(description="Inspect local AniFileBERT training status")
    parser.add_argument("--name", help="Run name used for reports/<name>.pid.txt and logs/<name>.out.log/.err.log")
    parser.add_argument("--pid-file", help="PID file path")
    parser.add_argument("--stdout", help="stdout log path")
    parser.add_argument("--stderr", help="stderr log path")
    parser.add_argument("--tail", type=int, default=80, help="Log lines to show")
    parser.add_argument("--metrics", action="append", default=[], help="Metrics JSON path to summarize; can repeat")
    return parser.parse_args()


def default_paths(name: str) -> tuple[Path, Path, Path]:
    return (
        Path("reports") / f"{name}.pid.txt",
        Path("logs") / f"{name}.out.log",
        Path("logs") / f"{name}.err.log",
    )


def read_pid(path: Path) -> int | None:
    try:
        text = path.read_text(encoding="ascii").strip()
    except FileNotFoundError:
        return None
    try:
        return int(text)
    except ValueError:
        return None


def process_status(pid: int | None) -> dict[str, Any]:
    if pid is None:
        return {"pid": None, "running": False}
    if os.name == "nt":
        cmd = [
            "powershell.exe",
            "-NoProfile",
            "-Command",
            f"$p = Get-Process -Id {pid} -ErrorAction SilentlyContinue; "
            "$p | Select-Object Id,ProcessName,CPU,WorkingSet | ConvertTo-Json -Compress",
        ]
        proc = subprocess.run(cmd, capture_output=True, text=True, encoding="utf-8", errors="replace")
        text = proc.stdout.strip()
        if not text:
            return {"pid": pid, "running": False}
        try:
            data = json.loads(text)
        except json.JSONDecodeError:
            return {"pid": pid, "running": True, "raw": text}
        return {"pid": pid, "running": True, "process": data}
    try:
        os.kill(pid, 0)
    except OSError:
        return {"pid": pid, "running": False}
    return {"pid": pid, "running": True}


def tail_lines(path: Path, count: int) -> list[str]:
    if not path.is_file():
        return []
    with path.open("r", encoding="utf-8", errors="replace") as handle:
        lines = handle.readlines()
    return [line.rstrip("\n") for line in lines[-count:]]


def summarize_metrics(path: Path) -> dict[str, Any] | None:
    if not path.is_file():
        return None
    data = json.loads(path.read_text(encoding="utf-8"))
    summary: dict[str, Any] = {"path": str(path)}
    if "modes" in data:
        for mode_name, mode in data["modes"].items():
            if "full_correct" in mode:
                summary[mode_name] = {
                    "full_correct": mode.get("full_correct"),
                    "case_count": mode.get("case_count"),
                    "full_accuracy": mode.get("full_accuracy"),
                    "failures": [item.get("id") or item.get("filename") for item in mode.get("failures", [])[:10]],
                }
            elif "full_match_correct" in mode:
                summary[mode_name] = {
                    "full_match_correct": mode.get("full_match_correct"),
                    "full_match_total": mode.get("full_match_total"),
                    "full_match_accuracy": mode.get("full_match_accuracy"),
                    "failures": [item.get("filename") for item in mode.get("failures", [])[:10]],
                }
    return summary


def main() -> None:
    args = parse_args()
    if args.name:
        pid_file, stdout_path, stderr_path = default_paths(args.name)
    else:
        pid_file = Path(args.pid_file) if args.pid_file else Path()
        stdout_path = Path(args.stdout) if args.stdout else Path()
        stderr_path = Path(args.stderr) if args.stderr else Path()

    if args.pid_file:
        pid_file = Path(args.pid_file)
    if args.stdout:
        stdout_path = Path(args.stdout)
    if args.stderr:
        stderr_path = Path(args.stderr)

    pid = read_pid(pid_file) if pid_file else None
    print(json.dumps(process_status(pid), ensure_ascii=False, indent=2))

    if stdout_path:
        print(f"\n--- stdout tail: {stdout_path} ---")
        for line in tail_lines(stdout_path, args.tail):
            print(line)
    if stderr_path:
        print(f"\n--- stderr tail: {stderr_path} ---")
        for line in tail_lines(stderr_path, args.tail):
            print(line)

    for metric in args.metrics:
        summary = summarize_metrics(Path(metric))
        if summary is not None:
            print(f"\n--- metrics: {metric} ---")
            print(json.dumps(summary, ensure_ascii=False, indent=2))


if __name__ == "__main__":
    main()