Add local training status helper

Browse files

Files changed (3) hide show

AGENTS.md +9 -0
tools/train_schema_v2_synthetic.py +6 -0
tools/training_status.py +152 -0

AGENTS.md CHANGED Viewed

@@ -142,6 +142,15 @@ The wrapper defaults to:
 Use `--force-cache` to rebuild the combined cache after changing either JSONL,
 vocab, label schema, max length, split ratio, seed, or repeat count.
 Export for Android:
 ```bash

 Use `--force-cache` to rebuild the combined cache after changing either JSONL,
 vocab, label schema, max length, split ratio, seed, or repeat count.
+For background local runs, inspect progress and metrics with:
+```powershell
+.\.venv\Scripts\python.exe -m tools.training_status `
+  --name schema_v2_cached_wrapper_train_skipcache `
+  --metrics reports\schema_v2_best_hardfocus_synth_pathleaf_cache_case_metrics.json `
+  --metrics checkpoints\schema-v2-best-hardfocus-synth-pathleaf-cache\final\parse_eval_metrics.json
+```
 Export for Android:
 ```bash

tools/train_schema_v2_synthetic.py CHANGED Viewed

@@ -25,6 +25,12 @@ import subprocess
 import sys
 from typing import Any, Sequence
 def utc_now() -> str:
     return dt.datetime.now(dt.timezone.utc).replace(microsecond=0).isoformat().replace("+00:00", "Z")

 import sys
 from typing import Any, Sequence
+try:
+    sys.stdout.reconfigure(encoding="utf-8", errors="replace")
+    sys.stderr.reconfigure(encoding="utf-8", errors="replace")
+except AttributeError:
+    pass
 def utc_now() -> str:
     return dt.datetime.now(dt.timezone.utc).replace(microsecond=0).isoformat().replace("+00:00", "Z")

tools/training_status.py ADDED Viewed

	@@ -0,0 +1,152 @@

+# -*- coding: utf-8 -*-
+r"""Inspect a local AniFileBERT background training run.
+Examples:
+    .\.venv\Scripts\python.exe -m tools.training_status --name schema_v2_cached_wrapper_train_skipcache
+    .\.venv\Scripts\python.exe -m tools.training_status --pid-file reports\schema_v2_cached_wrapper_train_skipcache.pid.txt --stdout logs\schema_v2_cached_wrapper_train_skipcache.out.log
+"""
+from __future__ import annotations
+import argparse
+import json
+import os
+from pathlib import Path
+import subprocess
+import sys
+from typing import Any
+try:
+    sys.stdout.reconfigure(encoding="utf-8", errors="replace")
+    sys.stderr.reconfigure(encoding="utf-8", errors="replace")
+except AttributeError:
+    pass
+def parse_args() -> argparse.Namespace:
+    parser = argparse.ArgumentParser(description="Inspect local AniFileBERT training status")
+    parser.add_argument("--name", help="Run name used for reports/<name>.pid.txt and logs/<name>.out.log/.err.log")
+    parser.add_argument("--pid-file", help="PID file path")
+    parser.add_argument("--stdout", help="stdout log path")
+    parser.add_argument("--stderr", help="stderr log path")
+    parser.add_argument("--tail", type=int, default=80, help="Log lines to show")
+    parser.add_argument("--metrics", action="append", default=[], help="Metrics JSON path to summarize; can repeat")
+    return parser.parse_args()
+def default_paths(name: str) -> tuple[Path, Path, Path]:
+    return (
+        Path("reports") / f"{name}.pid.txt",
+        Path("logs") / f"{name}.out.log",
+        Path("logs") / f"{name}.err.log",
+    )
+def read_pid(path: Path) -> int | None:
+    try:
+        text = path.read_text(encoding="ascii").strip()
+    except FileNotFoundError:
+        return None
+    try:
+        return int(text)
+    except ValueError:
+        return None
+def process_status(pid: int | None) -> dict[str, Any]:
+    if pid is None:
+        return {"pid": None, "running": False}
+    if os.name == "nt":
+        cmd = [
+            "powershell.exe",
+            "-NoProfile",
+            "-Command",
+            f"$p = Get-Process -Id {pid} -ErrorAction SilentlyContinue; "
+            "$p | Select-Object Id,ProcessName,CPU,WorkingSet | ConvertTo-Json -Compress",
+        ]
+        proc = subprocess.run(cmd, capture_output=True, text=True, encoding="utf-8", errors="replace")
+        text = proc.stdout.strip()
+        if not text:
+            return {"pid": pid, "running": False}
+        try:
+            data = json.loads(text)
+        except json.JSONDecodeError:
+            return {"pid": pid, "running": True, "raw": text}
+        return {"pid": pid, "running": True, "process": data}
+    try:
+        os.kill(pid, 0)
+    except OSError:
+        return {"pid": pid, "running": False}
+    return {"pid": pid, "running": True}
+def tail_lines(path: Path, count: int) -> list[str]:
+    if not path.is_file():
+        return []
+    with path.open("r", encoding="utf-8", errors="replace") as handle:
+        lines = handle.readlines()
+    return [line.rstrip("\n") for line in lines[-count:]]
+def summarize_metrics(path: Path) -> dict[str, Any] | None:
+    if not path.is_file():
+        return None
+    data = json.loads(path.read_text(encoding="utf-8"))
+    summary: dict[str, Any] = {"path": str(path)}
+    if "modes" in data:
+        for mode_name, mode in data["modes"].items():
+            if "full_correct" in mode:
+                summary[mode_name] = {
+                    "full_correct": mode.get("full_correct"),
+                    "case_count": mode.get("case_count"),
+                    "full_accuracy": mode.get("full_accuracy"),
+                    "failures": [item.get("id") or item.get("filename") for item in mode.get("failures", [])[:10]],
+                }
+            elif "full_match_correct" in mode:
+                summary[mode_name] = {
+                    "full_match_correct": mode.get("full_match_correct"),
+                    "full_match_total": mode.get("full_match_total"),
+                    "full_match_accuracy": mode.get("full_match_accuracy"),
+                    "failures": [item.get("filename") for item in mode.get("failures", [])[:10]],
+                }
+    return summary
+def main() -> None:
+    args = parse_args()
+    if args.name:
+        pid_file, stdout_path, stderr_path = default_paths(args.name)
+    else:
+        pid_file = Path(args.pid_file) if args.pid_file else Path()
+        stdout_path = Path(args.stdout) if args.stdout else Path()
+        stderr_path = Path(args.stderr) if args.stderr else Path()
+    if args.pid_file:
+        pid_file = Path(args.pid_file)
+    if args.stdout:
+        stdout_path = Path(args.stdout)
+    if args.stderr:
+        stderr_path = Path(args.stderr)
+    pid = read_pid(pid_file) if pid_file else None
+    print(json.dumps(process_status(pid), ensure_ascii=False, indent=2))
+    if stdout_path:
+        print(f"\n--- stdout tail: {stdout_path} ---")
+        for line in tail_lines(stdout_path, args.tail):
+            print(line)
+    if stderr_path:
+        print(f"\n--- stderr tail: {stderr_path} ---")
+        for line in tail_lines(stderr_path, args.tail):
+            print(line)
+    for metric in args.metrics:
+        summary = summarize_metrics(Path(metric))
+        if summary is not None:
+            print(f"\n--- metrics: {metric} ---")
+            print(json.dumps(summary, ensure_ascii=False, indent=2))
+if __name__ == "__main__":
+    main()