| |
| """check_results.py — smoke-validate the schema of measure.py output JSON. |
| |
| The benchmark's value is the before/after diff of results/baseline.json and |
| results/dflash.json; this asserts those files have the shape the demo expects so a |
| broken run is caught locally, not on stage. |
| |
| Usage: python scripts/check_results.py results/dflash.json results/baseline.json |
| Exit 0 = all valid, 1 = problems listed. |
| """ |
| from __future__ import annotations |
|
|
| import json |
| import sys |
|
|
| REQUIRED = { |
| "label": str, |
| "model": str, |
| "n": int, |
| "tokens_per_s_mean": (int, float), |
| "ttft_s_mean": (int, float), |
| "runs": list, |
| } |
| RUN_KEYS = {"ttft_s", "total_s", "new_tokens", "tokens_per_s", "text"} |
|
|
|
|
| def check(path: str) -> list[str]: |
| problems: list[str] = [] |
| try: |
| obj = json.load(open(path)) |
| except (OSError, json.JSONDecodeError) as e: |
| return [f"{path}: cannot read/parse ({e})"] |
| for key, typ in REQUIRED.items(): |
| if key not in obj: |
| problems.append(f"{path}: missing key '{key}'") |
| elif not isinstance(obj[key], typ): |
| problems.append(f"{path}: key '{key}' has wrong type {type(obj[key]).__name__}") |
| runs = obj.get("runs") or [] |
| if isinstance(runs, list) and runs: |
| missing = RUN_KEYS - set(runs[0]) |
| if missing: |
| problems.append(f"{path}: run[0] missing keys {sorted(missing)}") |
| elif isinstance(runs, list): |
| problems.append(f"{path}: 'runs' is empty") |
| return problems |
|
|
|
|
| def main(paths: list[str]) -> int: |
| if not paths: |
| print(__doc__) |
| return 2 |
| problems: list[str] = [] |
| for p in paths: |
| problems += check(p) |
| for p in paths: |
| print(f"checked {p}") |
| if problems: |
| print("\nFAIL:") |
| for pr in problems: |
| print(" -", pr) |
| return 1 |
| print("\nOK: all result files have the expected schema.") |
| return 0 |
|
|
|
|
| if __name__ == "__main__": |
| sys.exit(main(sys.argv[1:])) |
|
|