Spaces:
Sleeping
Sleeping
| """ | |
| local.py — in-process test runner for ReleaseOpsEnvironment. | |
| Runs a sequence of actions against a task without starting the HTTP server. | |
| Useful for debugging scenarios and validating ground_truth.json. | |
| Usage: | |
| python local.py easy_001 | |
| python local.py hard_002 --trace | |
| python local.py all | |
| """ | |
| from __future__ import annotations | |
| import argparse | |
| import json | |
| import sys | |
| from pathlib import Path | |
| # Make sure the server package is importable when run from the repo root | |
| sys.path.insert(0, str(Path(__file__).parent / "server")) | |
| sys.path.insert(0, str(Path(__file__).parent)) | |
| from releaseops_environment import ReleaseOpsEnvironment | |
| from releaseops_env.models import ReleaseAction | |
| TASKS = ["easy_001", "easy_002", "medium_001", "medium_002", "hard_001", "hard_002"] | |
| # Heuristic investigation sequence — covers all required evidence for every task | |
| HEURISTIC_SEQUENCE = [ | |
| {"action_type": "inspect_change", "section": "diff"}, | |
| {"action_type": "inspect_change", "section": "tests"}, | |
| {"action_type": "inspect_change", "section": "approvals"}, | |
| {"action_type": "inspect_dependencies"}, | |
| {"action_type": "search_incidents", "keywords": ["auth", "timeout", "rollback", "regression", "rate", "cascade"]}, | |
| {"action_type": "check_policy"}, | |
| ] | |
| def _make_action(d: dict) -> ReleaseAction: | |
| return ReleaseAction(**d) | |
| def run_heuristic(task_id: str, trace: bool = False) -> dict: | |
| env = ReleaseOpsEnvironment() | |
| obs = env.reset(task_id=task_id) | |
| if trace: | |
| print(f"\n[{task_id}] Change: {obs.change_summary[:80]}") | |
| # Execute investigation steps | |
| for step_dict in HEURISTIC_SEQUENCE: | |
| action = _make_action(step_dict) | |
| result = env.step(action) | |
| if trace: | |
| signals = [r.signal_id for r in result.known_risk_signals] | |
| print(f" {step_dict['action_type']:25s} reward={result.reward:+.2f} signals={signals}") | |
| if result.done: | |
| break | |
| # Inspect top services from blast_radius | |
| blast = env._scenario["change"].get("blast_radius", []) | |
| for svc in blast[:2]: | |
| action = _make_action({"action_type": "inspect_services", "service": svc}) | |
| result = env.step(action) | |
| if trace: | |
| print(f" inspect_services({svc:20s}) reward={result.reward:+.2f}") | |
| if result.done: | |
| break | |
| # Build decision from discovered signals | |
| risk_signals = result.known_risk_signals | |
| codes = [r.signal_id for r in risk_signals] | |
| has_critical = any(r.severity == "critical" for r in risk_signals) | |
| has_high = any(r.severity == "high" for r in risk_signals) | |
| positive_only = all(r.severity in ("low", "info") for r in risk_signals) | |
| if positive_only or not risk_signals: | |
| decision = "approve" | |
| elif has_critical: | |
| decision = "block" | |
| else: | |
| decision = "request_changes" | |
| action = _make_action({ | |
| "action_type": "submit_decision", | |
| "final_decision": decision, | |
| "reason_codes": codes, | |
| }) | |
| final = env.step(action) | |
| score = final.final_score or 0.0 | |
| breakdown = final.grader_breakdown or {} | |
| if trace: | |
| print(f" submit({decision:15s}) score={score:.3f} {breakdown}") | |
| return {"task_id": task_id, "decision": decision, "score": score, "breakdown": breakdown} | |
| def run_all(trace: bool = False) -> None: | |
| results = [run_heuristic(t, trace=trace) for t in TASKS] | |
| total = sum(r["score"] for r in results) | |
| print(f"\n{'='*62}") | |
| print(f"{'Task':<15} {'Decision':<15} {'Score':>6} Breakdown") | |
| print(f"{'='*62}") | |
| for r in results: | |
| bd = " ".join(f"{k}={v:.2f}" for k, v in r["breakdown"].items()) | |
| print(f"{r['task_id']:<15} {r['decision']:<15} {r['score']:>6.3f} {bd}") | |
| print(f"{'='*62}") | |
| print(f"{'AVERAGE':<15} {'':15} {total/len(results):>6.3f}") | |
| if __name__ == "__main__": | |
| parser = argparse.ArgumentParser(description="Run ReleaseOps heuristic locally") | |
| parser.add_argument("task", nargs="?", default="all", | |
| help="Task ID (e.g. easy_001) or 'all' (default)") | |
| parser.add_argument("--trace", action="store_true", | |
| help="Print step-by-step trace") | |
| args = parser.parse_args() | |
| if args.task == "all": | |
| run_all(trace=args.trace) | |
| elif args.task in TASKS: | |
| result = run_heuristic(args.task, trace=True) | |
| print(f"\nFinal: {result}") | |
| else: | |
| print(f"Unknown task '{args.task}'. Choose from: {TASKS} or 'all'") | |
| sys.exit(1) | |