releaseops-env / local.py
eastbrick's picture
first commit
408d02c
"""
local.py — in-process test runner for ReleaseOpsEnvironment.
Runs a sequence of actions against a task without starting the HTTP server.
Useful for debugging scenarios and validating ground_truth.json.
Usage:
python local.py easy_001
python local.py hard_002 --trace
python local.py all
"""
from __future__ import annotations
import argparse
import json
import sys
from pathlib import Path
# Make sure the server package is importable when run from the repo root
sys.path.insert(0, str(Path(__file__).parent / "server"))
sys.path.insert(0, str(Path(__file__).parent))
from releaseops_environment import ReleaseOpsEnvironment
from releaseops_env.models import ReleaseAction
TASKS = ["easy_001", "easy_002", "medium_001", "medium_002", "hard_001", "hard_002"]
# Heuristic investigation sequence — covers all required evidence for every task
HEURISTIC_SEQUENCE = [
{"action_type": "inspect_change", "section": "diff"},
{"action_type": "inspect_change", "section": "tests"},
{"action_type": "inspect_change", "section": "approvals"},
{"action_type": "inspect_dependencies"},
{"action_type": "search_incidents", "keywords": ["auth", "timeout", "rollback", "regression", "rate", "cascade"]},
{"action_type": "check_policy"},
]
def _make_action(d: dict) -> ReleaseAction:
return ReleaseAction(**d)
def run_heuristic(task_id: str, trace: bool = False) -> dict:
env = ReleaseOpsEnvironment()
obs = env.reset(task_id=task_id)
if trace:
print(f"\n[{task_id}] Change: {obs.change_summary[:80]}")
# Execute investigation steps
for step_dict in HEURISTIC_SEQUENCE:
action = _make_action(step_dict)
result = env.step(action)
if trace:
signals = [r.signal_id for r in result.known_risk_signals]
print(f" {step_dict['action_type']:25s} reward={result.reward:+.2f} signals={signals}")
if result.done:
break
# Inspect top services from blast_radius
blast = env._scenario["change"].get("blast_radius", [])
for svc in blast[:2]:
action = _make_action({"action_type": "inspect_services", "service": svc})
result = env.step(action)
if trace:
print(f" inspect_services({svc:20s}) reward={result.reward:+.2f}")
if result.done:
break
# Build decision from discovered signals
risk_signals = result.known_risk_signals
codes = [r.signal_id for r in risk_signals]
has_critical = any(r.severity == "critical" for r in risk_signals)
has_high = any(r.severity == "high" for r in risk_signals)
positive_only = all(r.severity in ("low", "info") for r in risk_signals)
if positive_only or not risk_signals:
decision = "approve"
elif has_critical:
decision = "block"
else:
decision = "request_changes"
action = _make_action({
"action_type": "submit_decision",
"final_decision": decision,
"reason_codes": codes,
})
final = env.step(action)
score = final.final_score or 0.0
breakdown = final.grader_breakdown or {}
if trace:
print(f" submit({decision:15s}) score={score:.3f} {breakdown}")
return {"task_id": task_id, "decision": decision, "score": score, "breakdown": breakdown}
def run_all(trace: bool = False) -> None:
results = [run_heuristic(t, trace=trace) for t in TASKS]
total = sum(r["score"] for r in results)
print(f"\n{'='*62}")
print(f"{'Task':<15} {'Decision':<15} {'Score':>6} Breakdown")
print(f"{'='*62}")
for r in results:
bd = " ".join(f"{k}={v:.2f}" for k, v in r["breakdown"].items())
print(f"{r['task_id']:<15} {r['decision']:<15} {r['score']:>6.3f} {bd}")
print(f"{'='*62}")
print(f"{'AVERAGE':<15} {'':15} {total/len(results):>6.3f}")
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="Run ReleaseOps heuristic locally")
parser.add_argument("task", nargs="?", default="all",
help="Task ID (e.g. easy_001) or 'all' (default)")
parser.add_argument("--trace", action="store_true",
help="Print step-by-step trace")
args = parser.parse_args()
if args.task == "all":
run_all(trace=args.trace)
elif args.task in TASKS:
result = run_heuristic(args.task, trace=True)
print(f"\nFinal: {result}")
else:
print(f"Unknown task '{args.task}'. Choose from: {TASKS} or 'all'")
sys.exit(1)