eastbrick's picture
first commit
408d02c
"""FastAPI application for ReleaseOps-Env.
Exposes the core OpenEnv endpoints (reset/step/state/health) via create_app,
plus the hackathon-required endpoints: /tasks, /grader, /baseline.
"""
import json
import os
from pathlib import Path
from fastapi import HTTPException
from openenv.core.env_server.http_server import create_app
from releaseops_env.models import ReleaseAction, ReleaseObservation
from server.releaseops_environment import ReleaseOpsEnvironment
TASKS_DIR = Path(__file__).parent.parent / "tasks"
# ── Core OpenEnv app ────────────────────────────────────────────────
app = create_app(
ReleaseOpsEnvironment,
ReleaseAction,
ReleaseObservation,
env_name="releaseops_env",
)
# ── /tasks β€” list available tasks and action schema ─────────────────
@app.get("/tasks")
def list_tasks():
"""Return all available tasks with metadata and the action schema."""
tasks = []
for task_dir in sorted(TASKS_DIR.iterdir()):
if not task_dir.is_dir():
continue
gt_path = task_dir / "ground_truth.json"
if not gt_path.exists():
continue
with open(gt_path) as f:
gt = json.load(f)
tasks.append(
{
"task_id": task_dir.name,
"difficulty": gt.get("difficulty", "unknown"),
"optimal_decision": gt.get("optimal_decision", ""),
"max_steps": gt.get("max_steps", 12),
"expected_score_range": gt.get("expected_score_range", {}),
}
)
return {
"tasks": tasks,
"action_schema": ReleaseAction.model_json_schema(),
"observation_schema": ReleaseObservation.model_json_schema(),
}
# ── /grader β€” run grader on a specific task with a given trajectory ──
@app.post("/grader")
def run_grader(task_id: str = "easy_001"):
"""
Run a full episode with an optimal-ish trajectory and return the grader score.
This endpoint creates a fresh environment, plays a reference trajectory
for the given task, and returns the grading result.
"""
env = ReleaseOpsEnvironment()
obs = env.reset(task_id=task_id)
gt_path = TASKS_DIR / task_id / "ground_truth.json"
if not gt_path.exists():
raise HTTPException(status_code=404, detail=f"Task '{task_id}' not found")
with open(gt_path) as f:
gt = json.load(f)
# Play a standard evidence-gathering trajectory
evidence_actions = [
ReleaseAction(action_type="inspect_change", section="diff"),
ReleaseAction(action_type="inspect_change", section="tests"),
ReleaseAction(action_type="inspect_change", section="approvals"),
ReleaseAction(action_type="inspect_dependencies"),
ReleaseAction(
action_type="search_incidents", keywords=["retry", "timeout", "latency"]
),
ReleaseAction(action_type="check_policy"),
]
for action in evidence_actions:
obs = env.step(action)
if obs.done:
break
if not obs.done:
obs = env.step(
ReleaseAction(
action_type="submit_decision",
final_decision=gt.get("optimal_decision", "block"),
reason_codes=gt.get("required_reason_codes", []),
)
)
return {
"task_id": task_id,
"score": obs.final_score,
"grader_breakdown": obs.grader_breakdown,
"done": obs.done,
"steps_taken": env.state.step_count,
"cumulative_reward": obs.cumulative_reward,
}
# ── /baseline β€” run baseline agent on all tasks ─────────────────────
@app.post("/baseline")
def run_baseline_endpoint():
"""
Run the built-in heuristic baseline agent against all tasks.
Returns scores for each task. Does NOT require an LLM API key β€”
uses a rule-based heuristic agent for reproducibility.
"""
from baseline.heuristic_agent import run_heuristic_baseline
results = run_heuristic_baseline()
return results
def main():
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=7860)
if __name__ == "__main__":
main()