File size: 5,417 Bytes
0df0bf9
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
# -*- coding: utf-8 -*-
r"""Inspect a local AniFileBERT background training run.

Examples:

    .\.venv\Scripts\python.exe -m tools.training_status --name schema_v2_cached_wrapper_train_skipcache
    .\.venv\Scripts\python.exe -m tools.training_status --pid-file reports\schema_v2_cached_wrapper_train_skipcache.pid.txt --stdout logs\schema_v2_cached_wrapper_train_skipcache.out.log
"""

from __future__ import annotations

import argparse
import json
import os
from pathlib import Path
import subprocess
import sys
from typing import Any

try:
    sys.stdout.reconfigure(encoding="utf-8", errors="replace")
    sys.stderr.reconfigure(encoding="utf-8", errors="replace")
except AttributeError:
    pass


def parse_args() -> argparse.Namespace:
    parser = argparse.ArgumentParser(description="Inspect local AniFileBERT training status")
    parser.add_argument("--name", help="Run name used for reports/<name>.pid.txt and logs/<name>.out.log/.err.log")
    parser.add_argument("--pid-file", help="PID file path")
    parser.add_argument("--stdout", help="stdout log path")
    parser.add_argument("--stderr", help="stderr log path")
    parser.add_argument("--tail", type=int, default=80, help="Log lines to show")
    parser.add_argument("--metrics", action="append", default=[], help="Metrics JSON path to summarize; can repeat")
    return parser.parse_args()


def default_paths(name: str) -> tuple[Path, Path, Path]:
    return (
        Path("reports") / f"{name}.pid.txt",
        Path("logs") / f"{name}.out.log",
        Path("logs") / f"{name}.err.log",
    )


def read_pid(path: Path) -> int | None:
    try:
        text = path.read_text(encoding="ascii").strip()
    except FileNotFoundError:
        return None
    try:
        return int(text)
    except ValueError:
        return None


def process_status(pid: int | None) -> dict[str, Any]:
    if pid is None:
        return {"pid": None, "running": False}
    if os.name == "nt":
        cmd = [
            "powershell.exe",
            "-NoProfile",
            "-Command",
            f"$p = Get-Process -Id {pid} -ErrorAction SilentlyContinue; "
            "$p | Select-Object Id,ProcessName,CPU,WorkingSet | ConvertTo-Json -Compress",
        ]
        proc = subprocess.run(cmd, capture_output=True, text=True, encoding="utf-8", errors="replace")
        text = proc.stdout.strip()
        if not text:
            return {"pid": pid, "running": False}
        try:
            data = json.loads(text)
        except json.JSONDecodeError:
            return {"pid": pid, "running": True, "raw": text}
        return {"pid": pid, "running": True, "process": data}
    try:
        os.kill(pid, 0)
    except OSError:
        return {"pid": pid, "running": False}
    return {"pid": pid, "running": True}


def tail_lines(path: Path, count: int) -> list[str]:
    if not path.is_file():
        return []
    with path.open("r", encoding="utf-8", errors="replace") as handle:
        lines = handle.readlines()
    return [line.rstrip("\n") for line in lines[-count:]]


def summarize_metrics(path: Path) -> dict[str, Any] | None:
    if not path.is_file():
        return None
    data = json.loads(path.read_text(encoding="utf-8"))
    summary: dict[str, Any] = {"path": str(path)}
    if "modes" in data:
        for mode_name, mode in data["modes"].items():
            if "full_correct" in mode:
                summary[mode_name] = {
                    "full_correct": mode.get("full_correct"),
                    "case_count": mode.get("case_count"),
                    "full_accuracy": mode.get("full_accuracy"),
                    "failures": [item.get("id") or item.get("filename") for item in mode.get("failures", [])[:10]],
                }
            elif "full_match_correct" in mode:
                summary[mode_name] = {
                    "full_match_correct": mode.get("full_match_correct"),
                    "full_match_total": mode.get("full_match_total"),
                    "full_match_accuracy": mode.get("full_match_accuracy"),
                    "failures": [item.get("filename") for item in mode.get("failures", [])[:10]],
                }
    return summary


def main() -> None:
    args = parse_args()
    if args.name:
        pid_file, stdout_path, stderr_path = default_paths(args.name)
    else:
        pid_file = Path(args.pid_file) if args.pid_file else Path()
        stdout_path = Path(args.stdout) if args.stdout else Path()
        stderr_path = Path(args.stderr) if args.stderr else Path()

    if args.pid_file:
        pid_file = Path(args.pid_file)
    if args.stdout:
        stdout_path = Path(args.stdout)
    if args.stderr:
        stderr_path = Path(args.stderr)

    pid = read_pid(pid_file) if pid_file else None
    print(json.dumps(process_status(pid), ensure_ascii=False, indent=2))

    if stdout_path:
        print(f"\n--- stdout tail: {stdout_path} ---")
        for line in tail_lines(stdout_path, args.tail):
            print(line)
    if stderr_path:
        print(f"\n--- stderr tail: {stderr_path} ---")
        for line in tail_lines(stderr_path, args.tail):
            print(line)

    for metric in args.metrics:
        summary = summarize_metrics(Path(metric))
        if summary is not None:
            print(f"\n--- metrics: {metric} ---")
            print(json.dumps(summary, ensure_ascii=False, indent=2))


if __name__ == "__main__":
    main()