AniFileBERT / tools /training_status.py
ModerRAS's picture
Add local training status helper
0df0bf9
raw
history blame
5.42 kB
# -*- coding: utf-8 -*-
r"""Inspect a local AniFileBERT background training run.
Examples:
.\.venv\Scripts\python.exe -m tools.training_status --name schema_v2_cached_wrapper_train_skipcache
.\.venv\Scripts\python.exe -m tools.training_status --pid-file reports\schema_v2_cached_wrapper_train_skipcache.pid.txt --stdout logs\schema_v2_cached_wrapper_train_skipcache.out.log
"""
from __future__ import annotations
import argparse
import json
import os
from pathlib import Path
import subprocess
import sys
from typing import Any
try:
sys.stdout.reconfigure(encoding="utf-8", errors="replace")
sys.stderr.reconfigure(encoding="utf-8", errors="replace")
except AttributeError:
pass
def parse_args() -> argparse.Namespace:
parser = argparse.ArgumentParser(description="Inspect local AniFileBERT training status")
parser.add_argument("--name", help="Run name used for reports/<name>.pid.txt and logs/<name>.out.log/.err.log")
parser.add_argument("--pid-file", help="PID file path")
parser.add_argument("--stdout", help="stdout log path")
parser.add_argument("--stderr", help="stderr log path")
parser.add_argument("--tail", type=int, default=80, help="Log lines to show")
parser.add_argument("--metrics", action="append", default=[], help="Metrics JSON path to summarize; can repeat")
return parser.parse_args()
def default_paths(name: str) -> tuple[Path, Path, Path]:
return (
Path("reports") / f"{name}.pid.txt",
Path("logs") / f"{name}.out.log",
Path("logs") / f"{name}.err.log",
)
def read_pid(path: Path) -> int | None:
try:
text = path.read_text(encoding="ascii").strip()
except FileNotFoundError:
return None
try:
return int(text)
except ValueError:
return None
def process_status(pid: int | None) -> dict[str, Any]:
if pid is None:
return {"pid": None, "running": False}
if os.name == "nt":
cmd = [
"powershell.exe",
"-NoProfile",
"-Command",
f"$p = Get-Process -Id {pid} -ErrorAction SilentlyContinue; "
"$p | Select-Object Id,ProcessName,CPU,WorkingSet | ConvertTo-Json -Compress",
]
proc = subprocess.run(cmd, capture_output=True, text=True, encoding="utf-8", errors="replace")
text = proc.stdout.strip()
if not text:
return {"pid": pid, "running": False}
try:
data = json.loads(text)
except json.JSONDecodeError:
return {"pid": pid, "running": True, "raw": text}
return {"pid": pid, "running": True, "process": data}
try:
os.kill(pid, 0)
except OSError:
return {"pid": pid, "running": False}
return {"pid": pid, "running": True}
def tail_lines(path: Path, count: int) -> list[str]:
if not path.is_file():
return []
with path.open("r", encoding="utf-8", errors="replace") as handle:
lines = handle.readlines()
return [line.rstrip("\n") for line in lines[-count:]]
def summarize_metrics(path: Path) -> dict[str, Any] | None:
if not path.is_file():
return None
data = json.loads(path.read_text(encoding="utf-8"))
summary: dict[str, Any] = {"path": str(path)}
if "modes" in data:
for mode_name, mode in data["modes"].items():
if "full_correct" in mode:
summary[mode_name] = {
"full_correct": mode.get("full_correct"),
"case_count": mode.get("case_count"),
"full_accuracy": mode.get("full_accuracy"),
"failures": [item.get("id") or item.get("filename") for item in mode.get("failures", [])[:10]],
}
elif "full_match_correct" in mode:
summary[mode_name] = {
"full_match_correct": mode.get("full_match_correct"),
"full_match_total": mode.get("full_match_total"),
"full_match_accuracy": mode.get("full_match_accuracy"),
"failures": [item.get("filename") for item in mode.get("failures", [])[:10]],
}
return summary
def main() -> None:
args = parse_args()
if args.name:
pid_file, stdout_path, stderr_path = default_paths(args.name)
else:
pid_file = Path(args.pid_file) if args.pid_file else Path()
stdout_path = Path(args.stdout) if args.stdout else Path()
stderr_path = Path(args.stderr) if args.stderr else Path()
if args.pid_file:
pid_file = Path(args.pid_file)
if args.stdout:
stdout_path = Path(args.stdout)
if args.stderr:
stderr_path = Path(args.stderr)
pid = read_pid(pid_file) if pid_file else None
print(json.dumps(process_status(pid), ensure_ascii=False, indent=2))
if stdout_path:
print(f"\n--- stdout tail: {stdout_path} ---")
for line in tail_lines(stdout_path, args.tail):
print(line)
if stderr_path:
print(f"\n--- stderr tail: {stderr_path} ---")
for line in tail_lines(stderr_path, args.tail):
print(line)
for metric in args.metrics:
summary = summarize_metrics(Path(metric))
if summary is not None:
print(f"\n--- metrics: {metric} ---")
print(json.dumps(summary, ensure_ascii=False, indent=2))
if __name__ == "__main__":
main()