Spaces:
Sleeping
Sleeping
File size: 4,328 Bytes
408d02c | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 | """FastAPI application for ReleaseOps-Env.
Exposes the core OpenEnv endpoints (reset/step/state/health) via create_app,
plus the hackathon-required endpoints: /tasks, /grader, /baseline.
"""
import json
import os
from pathlib import Path
from fastapi import HTTPException
from openenv.core.env_server.http_server import create_app
from releaseops_env.models import ReleaseAction, ReleaseObservation
from server.releaseops_environment import ReleaseOpsEnvironment
TASKS_DIR = Path(__file__).parent.parent / "tasks"
# ββ Core OpenEnv app ββββββββββββββββββββββββββββββββββββββββββββββββ
app = create_app(
ReleaseOpsEnvironment,
ReleaseAction,
ReleaseObservation,
env_name="releaseops_env",
)
# ββ /tasks β list available tasks and action schema βββββββββββββββββ
@app.get("/tasks")
def list_tasks():
"""Return all available tasks with metadata and the action schema."""
tasks = []
for task_dir in sorted(TASKS_DIR.iterdir()):
if not task_dir.is_dir():
continue
gt_path = task_dir / "ground_truth.json"
if not gt_path.exists():
continue
with open(gt_path) as f:
gt = json.load(f)
tasks.append(
{
"task_id": task_dir.name,
"difficulty": gt.get("difficulty", "unknown"),
"optimal_decision": gt.get("optimal_decision", ""),
"max_steps": gt.get("max_steps", 12),
"expected_score_range": gt.get("expected_score_range", {}),
}
)
return {
"tasks": tasks,
"action_schema": ReleaseAction.model_json_schema(),
"observation_schema": ReleaseObservation.model_json_schema(),
}
# ββ /grader β run grader on a specific task with a given trajectory ββ
@app.post("/grader")
def run_grader(task_id: str = "easy_001"):
"""
Run a full episode with an optimal-ish trajectory and return the grader score.
This endpoint creates a fresh environment, plays a reference trajectory
for the given task, and returns the grading result.
"""
env = ReleaseOpsEnvironment()
obs = env.reset(task_id=task_id)
gt_path = TASKS_DIR / task_id / "ground_truth.json"
if not gt_path.exists():
raise HTTPException(status_code=404, detail=f"Task '{task_id}' not found")
with open(gt_path) as f:
gt = json.load(f)
# Play a standard evidence-gathering trajectory
evidence_actions = [
ReleaseAction(action_type="inspect_change", section="diff"),
ReleaseAction(action_type="inspect_change", section="tests"),
ReleaseAction(action_type="inspect_change", section="approvals"),
ReleaseAction(action_type="inspect_dependencies"),
ReleaseAction(
action_type="search_incidents", keywords=["retry", "timeout", "latency"]
),
ReleaseAction(action_type="check_policy"),
]
for action in evidence_actions:
obs = env.step(action)
if obs.done:
break
if not obs.done:
obs = env.step(
ReleaseAction(
action_type="submit_decision",
final_decision=gt.get("optimal_decision", "block"),
reason_codes=gt.get("required_reason_codes", []),
)
)
return {
"task_id": task_id,
"score": obs.final_score,
"grader_breakdown": obs.grader_breakdown,
"done": obs.done,
"steps_taken": env.state.step_count,
"cumulative_reward": obs.cumulative_reward,
}
# ββ /baseline β run baseline agent on all tasks βββββββββββββββββββββ
@app.post("/baseline")
def run_baseline_endpoint():
"""
Run the built-in heuristic baseline agent against all tasks.
Returns scores for each task. Does NOT require an LLM API key β
uses a rule-based heuristic agent for reproducibility.
"""
from baseline.heuristic_agent import run_heuristic_baseline
results = run_heuristic_baseline()
return results
def main():
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=7860)
if __name__ == "__main__":
main()
|