Masters-four-Tab-OpenAI / backend /scripts /eval_latency_alert.py
Pete Dunn
Add release preflight, baseline gates, and review-queue observability
94f8da0
#!/usr/bin/env python3
from __future__ import annotations
import argparse
import json
import math
from pathlib import Path
from typing import Any, Dict, List
def _pct(values: List[float], q: float) -> float:
if not values:
return 0.0
if len(values) == 1:
return values[0]
idx = (len(values) - 1) * q
lo = int(math.floor(idx))
hi = int(math.ceil(idx))
if lo == hi:
return values[lo]
frac = idx - lo
return values[lo] + (values[hi] - values[lo]) * frac
def main() -> int:
parser = argparse.ArgumentParser(description="Emit latency SLO alerts for eval outputs.")
parser.add_argument("--eval-json", required=True, help="Path to eval JSON with `results` rows.")
parser.add_argument("--p95-budget-ms", type=float, default=10000.0)
parser.add_argument("--p99-budget-ms", type=float, default=15000.0)
parser.add_argument("--warn-only", action="store_true", help="Never fail; emit warnings only.")
parser.add_argument("--out", default="", help="Optional path for summary JSON output.")
args = parser.parse_args()
data = json.loads(Path(args.eval_json).read_text(encoding="utf-8"))
rows = list(data.get("results", []) or [])
latencies = sorted(float(r.get("latency_ms", 0.0) or 0.0) for r in rows)
p95 = round(_pct(latencies, 0.95), 2)
p99 = round(_pct(latencies, 0.99), 2)
avg = round(sum(latencies) / max(1, len(latencies)), 2)
breached = (p95 > float(args.p95_budget_ms)) or (p99 > float(args.p99_budget_ms))
summary: Dict[str, Any] = {
"ok": (not breached),
"count": len(latencies),
"avg_latency_ms": avg,
"p95_latency_ms": p95,
"p99_latency_ms": p99,
"p95_budget_ms": float(args.p95_budget_ms),
"p99_budget_ms": float(args.p99_budget_ms),
"warn_only": bool(args.warn_only),
}
print(json.dumps(summary, indent=2))
if breached:
message = (
f"Latency SLO breached: p95={p95}ms (budget {args.p95_budget_ms}ms), "
f"p99={p99}ms (budget {args.p99_budget_ms}ms)."
)
print(f"::warning title=Latency SLO::{message}")
if args.out:
out = Path(args.out)
out.parent.mkdir(parents=True, exist_ok=True)
out.write_text(json.dumps(summary, indent=2) + "\n", encoding="utf-8")
print(f"Wrote latency summary: {out}")
if breached and not args.warn_only:
return 2
return 0
if __name__ == "__main__":
raise SystemExit(main())