lean-laguna / scripts /check_results.py
art87able's picture
Lean Laguna: lossless DFlash speculative decoding on Laguna XS.2 (harness, environment, results)
0a55ff6
#!/usr/bin/env python3
"""check_results.py — smoke-validate the schema of measure.py output JSON.
The benchmark's value is the before/after diff of results/baseline.json and
results/dflash.json; this asserts those files have the shape the demo expects so a
broken run is caught locally, not on stage.
Usage: python scripts/check_results.py results/dflash.json results/baseline.json
Exit 0 = all valid, 1 = problems listed.
"""
from __future__ import annotations
import json
import sys
REQUIRED = {
"label": str,
"model": str,
"n": int,
"tokens_per_s_mean": (int, float),
"ttft_s_mean": (int, float),
"runs": list,
}
RUN_KEYS = {"ttft_s", "total_s", "new_tokens", "tokens_per_s", "text"}
def check(path: str) -> list[str]:
problems: list[str] = []
try:
obj = json.load(open(path))
except (OSError, json.JSONDecodeError) as e:
return [f"{path}: cannot read/parse ({e})"]
for key, typ in REQUIRED.items():
if key not in obj:
problems.append(f"{path}: missing key '{key}'")
elif not isinstance(obj[key], typ):
problems.append(f"{path}: key '{key}' has wrong type {type(obj[key]).__name__}")
runs = obj.get("runs") or []
if isinstance(runs, list) and runs:
missing = RUN_KEYS - set(runs[0])
if missing:
problems.append(f"{path}: run[0] missing keys {sorted(missing)}")
elif isinstance(runs, list):
problems.append(f"{path}: 'runs' is empty")
return problems
def main(paths: list[str]) -> int:
if not paths:
print(__doc__)
return 2
problems: list[str] = []
for p in paths:
problems += check(p)
for p in paths:
print(f"checked {p}")
if problems:
print("\nFAIL:")
for pr in problems:
print(" -", pr)
return 1
print("\nOK: all result files have the expected schema.")
return 0
if __name__ == "__main__":
sys.exit(main(sys.argv[1:]))