File size: 1,985 Bytes
0a55ff6 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 | #!/usr/bin/env python3
"""check_results.py — smoke-validate the schema of measure.py output JSON.
The benchmark's value is the before/after diff of results/baseline.json and
results/dflash.json; this asserts those files have the shape the demo expects so a
broken run is caught locally, not on stage.
Usage: python scripts/check_results.py results/dflash.json results/baseline.json
Exit 0 = all valid, 1 = problems listed.
"""
from __future__ import annotations
import json
import sys
REQUIRED = {
"label": str,
"model": str,
"n": int,
"tokens_per_s_mean": (int, float),
"ttft_s_mean": (int, float),
"runs": list,
}
RUN_KEYS = {"ttft_s", "total_s", "new_tokens", "tokens_per_s", "text"}
def check(path: str) -> list[str]:
problems: list[str] = []
try:
obj = json.load(open(path))
except (OSError, json.JSONDecodeError) as e:
return [f"{path}: cannot read/parse ({e})"]
for key, typ in REQUIRED.items():
if key not in obj:
problems.append(f"{path}: missing key '{key}'")
elif not isinstance(obj[key], typ):
problems.append(f"{path}: key '{key}' has wrong type {type(obj[key]).__name__}")
runs = obj.get("runs") or []
if isinstance(runs, list) and runs:
missing = RUN_KEYS - set(runs[0])
if missing:
problems.append(f"{path}: run[0] missing keys {sorted(missing)}")
elif isinstance(runs, list):
problems.append(f"{path}: 'runs' is empty")
return problems
def main(paths: list[str]) -> int:
if not paths:
print(__doc__)
return 2
problems: list[str] = []
for p in paths:
problems += check(p)
for p in paths:
print(f"checked {p}")
if problems:
print("\nFAIL:")
for pr in problems:
print(" -", pr)
return 1
print("\nOK: all result files have the expected schema.")
return 0
if __name__ == "__main__":
sys.exit(main(sys.argv[1:]))
|