AniFileBERT / tools /training_status.py

Add local training status helper

0df0bf9 5 days ago

5.42 kB

	# -- coding: utf-8 --
	r"""Inspect a local AniFileBERT background training run.

	Examples:

	.\.venv\Scripts\python.exe -m tools.training_status --name schema_v2_cached_wrapper_train_skipcache
	.\.venv\Scripts\python.exe -m tools.training_status --pid-file reports\schema_v2_cached_wrapper_train_skipcache.pid.txt --stdout logs\schema_v2_cached_wrapper_train_skipcache.out.log
	"""

	from __future__ import annotations

	import argparse
	import json
	import os
	from pathlib import Path
	import subprocess
	import sys
	from typing import Any

	try:
	sys.stdout.reconfigure(encoding="utf-8", errors="replace")
	sys.stderr.reconfigure(encoding="utf-8", errors="replace")
	except AttributeError:
	pass


	def parse_args() -> argparse.Namespace:
	parser = argparse.ArgumentParser(description="Inspect local AniFileBERT training status")
	parser.add_argument("--name", help="Run name used for reports/<name>.pid.txt and logs/<name>.out.log/.err.log")
	parser.add_argument("--pid-file", help="PID file path")
	parser.add_argument("--stdout", help="stdout log path")
	parser.add_argument("--stderr", help="stderr log path")
	parser.add_argument("--tail", type=int, default=80, help="Log lines to show")
	parser.add_argument("--metrics", action="append", default=[], help="Metrics JSON path to summarize; can repeat")
	return parser.parse_args()


	def default_paths(name: str) -> tuple[Path, Path, Path]:
	return (
	Path("reports") / f"{name}.pid.txt",
	Path("logs") / f"{name}.out.log",
	Path("logs") / f"{name}.err.log",
	)


	def read_pid(path: Path) -> int \| None:
	try:
	text = path.read_text(encoding="ascii").strip()
	except FileNotFoundError:
	return None
	try:
	return int(text)
	except ValueError:
	return None


	def process_status(pid: int \| None) -> dict[str, Any]:
	if pid is None:
	return {"pid": None, "running": False}
	if os.name == "nt":
	cmd = [
	"powershell.exe",
	"-NoProfile",
	"-Command",
	f"$p = Get-Process -Id {pid} -ErrorAction SilentlyContinue; "
	"$p \| Select-Object Id,ProcessName,CPU,WorkingSet \| ConvertTo-Json -Compress",
	]
	proc = subprocess.run(cmd, capture_output=True, text=True, encoding="utf-8", errors="replace")
	text = proc.stdout.strip()
	if not text:
	return {"pid": pid, "running": False}
	try:
	data = json.loads(text)
	except json.JSONDecodeError:
	return {"pid": pid, "running": True, "raw": text}
	return {"pid": pid, "running": True, "process": data}
	try:
	os.kill(pid, 0)
	except OSError:
	return {"pid": pid, "running": False}
	return {"pid": pid, "running": True}


	def tail_lines(path: Path, count: int) -> list[str]:
	if not path.is_file():
	return []
	with path.open("r", encoding="utf-8", errors="replace") as handle:
	lines = handle.readlines()
	return [line.rstrip("\n") for line in lines[-count:]]


	def summarize_metrics(path: Path) -> dict[str, Any] \| None:
	if not path.is_file():
	return None
	data = json.loads(path.read_text(encoding="utf-8"))
	summary: dict[str, Any] = {"path": str(path)}
	if "modes" in data:
	for mode_name, mode in data["modes"].items():
	if "full_correct" in mode:
	summary[mode_name] = {
	"full_correct": mode.get("full_correct"),
	"case_count": mode.get("case_count"),
	"full_accuracy": mode.get("full_accuracy"),
	"failures": [item.get("id") or item.get("filename") for item in mode.get("failures", [])[:10]],
	}
	elif "full_match_correct" in mode:
	summary[mode_name] = {
	"full_match_correct": mode.get("full_match_correct"),
	"full_match_total": mode.get("full_match_total"),
	"full_match_accuracy": mode.get("full_match_accuracy"),
	"failures": [item.get("filename") for item in mode.get("failures", [])[:10]],
	}
	return summary


	def main() -> None:
	args = parse_args()
	if args.name:
	pid_file, stdout_path, stderr_path = default_paths(args.name)
	else:
	pid_file = Path(args.pid_file) if args.pid_file else Path()
	stdout_path = Path(args.stdout) if args.stdout else Path()
	stderr_path = Path(args.stderr) if args.stderr else Path()

	if args.pid_file:
	pid_file = Path(args.pid_file)
	if args.stdout:
	stdout_path = Path(args.stdout)
	if args.stderr:
	stderr_path = Path(args.stderr)

	pid = read_pid(pid_file) if pid_file else None
	print(json.dumps(process_status(pid), ensure_ascii=False, indent=2))

	if stdout_path:
	print(f"\n--- stdout tail: {stdout_path} ---")
	for line in tail_lines(stdout_path, args.tail):
	print(line)
	if stderr_path:
	print(f"\n--- stderr tail: {stderr_path} ---")
	for line in tail_lines(stderr_path, args.tail):
	print(line)

	for metric in args.metrics:
	summary = summarize_metrics(Path(metric))
	if summary is not None:
	print(f"\n--- metrics: {metric} ---")
	print(json.dumps(summary, ensure_ascii=False, indent=2))


	if __name__ == "__main__":
	main()