#!/usr/bin/env python3 from __future__ import annotations import argparse import json import math from pathlib import Path from typing import Any, Dict, List def _pct(values: List[float], q: float) -> float: if not values: return 0.0 if len(values) == 1: return values[0] idx = (len(values) - 1) * q lo = int(math.floor(idx)) hi = int(math.ceil(idx)) if lo == hi: return values[lo] frac = idx - lo return values[lo] + (values[hi] - values[lo]) * frac def main() -> int: parser = argparse.ArgumentParser(description="Emit latency SLO alerts for eval outputs.") parser.add_argument("--eval-json", required=True, help="Path to eval JSON with `results` rows.") parser.add_argument("--p95-budget-ms", type=float, default=10000.0) parser.add_argument("--p99-budget-ms", type=float, default=15000.0) parser.add_argument("--warn-only", action="store_true", help="Never fail; emit warnings only.") parser.add_argument("--out", default="", help="Optional path for summary JSON output.") args = parser.parse_args() data = json.loads(Path(args.eval_json).read_text(encoding="utf-8")) rows = list(data.get("results", []) or []) latencies = sorted(float(r.get("latency_ms", 0.0) or 0.0) for r in rows) p95 = round(_pct(latencies, 0.95), 2) p99 = round(_pct(latencies, 0.99), 2) avg = round(sum(latencies) / max(1, len(latencies)), 2) breached = (p95 > float(args.p95_budget_ms)) or (p99 > float(args.p99_budget_ms)) summary: Dict[str, Any] = { "ok": (not breached), "count": len(latencies), "avg_latency_ms": avg, "p95_latency_ms": p95, "p99_latency_ms": p99, "p95_budget_ms": float(args.p95_budget_ms), "p99_budget_ms": float(args.p99_budget_ms), "warn_only": bool(args.warn_only), } print(json.dumps(summary, indent=2)) if breached: message = ( f"Latency SLO breached: p95={p95}ms (budget {args.p95_budget_ms}ms), " f"p99={p99}ms (budget {args.p99_budget_ms}ms)." ) print(f"::warning title=Latency SLO::{message}") if args.out: out = Path(args.out) out.parent.mkdir(parents=True, exist_ok=True) out.write_text(json.dumps(summary, indent=2) + "\n", encoding="utf-8") print(f"Wrote latency summary: {out}") if breached and not args.warn_only: return 2 return 0 if __name__ == "__main__": raise SystemExit(main())