#!/usr/bin/env python3
"""check_results.py — smoke-validate the schema of measure.py output JSON.

The benchmark's value is the before/after diff of results/baseline.json and
results/dflash.json; this asserts those files have the shape the demo expects so a
broken run is caught locally, not on stage.

Usage: python scripts/check_results.py results/dflash.json results/baseline.json
Exit 0 = all valid, 1 = problems listed.
"""
from __future__ import annotations

import json
import sys

REQUIRED = {
    "label": str,
    "model": str,
    "n": int,
    "tokens_per_s_mean": (int, float),
    "ttft_s_mean": (int, float),
    "runs": list,
}
RUN_KEYS = {"ttft_s", "total_s", "new_tokens", "tokens_per_s", "text"}


def check(path: str) -> list[str]:
    problems: list[str] = []
    try:
        obj = json.load(open(path))
    except (OSError, json.JSONDecodeError) as e:
        return [f"{path}: cannot read/parse ({e})"]
    for key, typ in REQUIRED.items():
        if key not in obj:
            problems.append(f"{path}: missing key '{key}'")
        elif not isinstance(obj[key], typ):
            problems.append(f"{path}: key '{key}' has wrong type {type(obj[key]).__name__}")
    runs = obj.get("runs") or []
    if isinstance(runs, list) and runs:
        missing = RUN_KEYS - set(runs[0])
        if missing:
            problems.append(f"{path}: run[0] missing keys {sorted(missing)}")
    elif isinstance(runs, list):
        problems.append(f"{path}: 'runs' is empty")
    return problems


def main(paths: list[str]) -> int:
    if not paths:
        print(__doc__)
        return 2
    problems: list[str] = []
    for p in paths:
        problems += check(p)
    for p in paths:
        print(f"checked {p}")
    if problems:
        print("\nFAIL:")
        for pr in problems:
            print("  -", pr)
        return 1
    print("\nOK: all result files have the expected schema.")
    return 0


if __name__ == "__main__":
    sys.exit(main(sys.argv[1:]))