File size: 2,073 Bytes
2d05890 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 | #!/usr/bin/env python3
"""Aggregate per-size telemetry files into one mac_<chip>-<ram>gb.json report.
Reads results/mac_bench/<chip>-<ram>gb-<size>/telemetry.json for every size
present, emits results/mac_bench/<chip>-<ram>gb.json.
"""
from __future__ import annotations
import argparse
import json
import sys
from pathlib import Path
SIZES = ["2b", "4b", "9b", "27b"]
def main():
p = argparse.ArgumentParser()
p.add_argument("--chip", required=True, help="e.g. M2-Max")
p.add_argument("--ram-gb", required=True, type=int)
p.add_argument("--out-dir", default="results/mac_bench")
args = p.parse_args()
base = Path(args.out_dir)
prefix = f"{args.chip}-{args.ram_gb}gb"
runs: list[dict] = []
for size in SIZES:
tel = base / f"{prefix}-{size}" / "telemetry.json"
if tel.exists():
runs.append(json.loads(tel.read_text()))
if not runs:
print(f"No telemetry files found for {prefix}", file=sys.stderr)
sys.exit(1)
hardware = runs[0]["hardware"] # same across sizes on one Mac
report = {
"hardware": hardware,
"runs": [{"model": r["model"], "eval": r["eval"], "perf": r["perf"]} for r in runs],
}
out_path = base / f"{prefix}.json"
out_path.write_text(json.dumps(report, indent=2))
print(f"Wrote {out_path}")
# human-readable table
print(f"\n{prefix} ({hardware['chip']}, {hardware['ram_gb']} GB, fanless={hardware['fanless']})")
print(f"{'size':<5} {'gguf':>6} {'tier1':>6} {'comp':>6} {'tok/s':>6} {'ttft':>5} {'rss':>5} {'time':>6}")
print("-" * 56)
for r in report["runs"]:
m = r["model"]; e = r["eval"]; p = r["perf"]
print(f"{m['size']:<5} {m['gguf_gb']:>6.2f} "
f"{e.get('tier1_composite', 0):>6.1f} "
f"{e.get('metrollm_composite', 0):>6.1f} "
f"{p['decode_tok_s_median']:>6.1f} "
f"{p['ttft_ms_median']:>5.0f} "
f"{p['peak_rss_gb']:>5.2f} "
f"{p['runner_wallclock_s']:>6}")
if __name__ == "__main__":
main()
|