Spaces:

eastbrick
/

releaseops-env

Sleeping

File size: 4,328 Bytes

408d02c

"""FastAPI application for ReleaseOps-Env.

Exposes the core OpenEnv endpoints (reset/step/state/health) via create_app,
plus the hackathon-required endpoints: /tasks, /grader, /baseline.
"""

import json
import os
from pathlib import Path

from fastapi import HTTPException
from openenv.core.env_server.http_server import create_app
from releaseops_env.models import ReleaseAction, ReleaseObservation
from server.releaseops_environment import ReleaseOpsEnvironment

TASKS_DIR = Path(__file__).parent.parent / "tasks"

# ── Core OpenEnv app ────────────────────────────────────────────────
app = create_app(
    ReleaseOpsEnvironment,
    ReleaseAction,
    ReleaseObservation,
    env_name="releaseops_env",
)


# ── /tasks — list available tasks and action schema ─────────────────
@app.get("/tasks")
def list_tasks():
    """Return all available tasks with metadata and the action schema."""
    tasks = []
    for task_dir in sorted(TASKS_DIR.iterdir()):
        if not task_dir.is_dir():
            continue
        gt_path = task_dir / "ground_truth.json"
        if not gt_path.exists():
            continue
        with open(gt_path) as f:
            gt = json.load(f)
        tasks.append(
            {
                "task_id": task_dir.name,
                "difficulty": gt.get("difficulty", "unknown"),
                "optimal_decision": gt.get("optimal_decision", ""),
                "max_steps": gt.get("max_steps", 12),
                "expected_score_range": gt.get("expected_score_range", {}),
            }
        )

    return {
        "tasks": tasks,
        "action_schema": ReleaseAction.model_json_schema(),
        "observation_schema": ReleaseObservation.model_json_schema(),
    }


# ── /grader — run grader on a specific task with a given trajectory ──
@app.post("/grader")
def run_grader(task_id: str = "easy_001"):
    """
    Run a full episode with an optimal-ish trajectory and return the grader score.

    This endpoint creates a fresh environment, plays a reference trajectory
    for the given task, and returns the grading result.
    """
    env = ReleaseOpsEnvironment()
    obs = env.reset(task_id=task_id)

    gt_path = TASKS_DIR / task_id / "ground_truth.json"
    if not gt_path.exists():
        raise HTTPException(status_code=404, detail=f"Task '{task_id}' not found")

    with open(gt_path) as f:
        gt = json.load(f)

    # Play a standard evidence-gathering trajectory
    evidence_actions = [
        ReleaseAction(action_type="inspect_change", section="diff"),
        ReleaseAction(action_type="inspect_change", section="tests"),
        ReleaseAction(action_type="inspect_change", section="approvals"),
        ReleaseAction(action_type="inspect_dependencies"),
        ReleaseAction(
            action_type="search_incidents", keywords=["retry", "timeout", "latency"]
        ),
        ReleaseAction(action_type="check_policy"),
    ]

    for action in evidence_actions:
        obs = env.step(action)
        if obs.done:
            break

    if not obs.done:
        obs = env.step(
            ReleaseAction(
                action_type="submit_decision",
                final_decision=gt.get("optimal_decision", "block"),
                reason_codes=gt.get("required_reason_codes", []),
            )
        )

    return {
        "task_id": task_id,
        "score": obs.final_score,
        "grader_breakdown": obs.grader_breakdown,
        "done": obs.done,
        "steps_taken": env.state.step_count,
        "cumulative_reward": obs.cumulative_reward,
    }


# ── /baseline — run baseline agent on all tasks ─────────────────────
@app.post("/baseline")
def run_baseline_endpoint():
    """
    Run the built-in heuristic baseline agent against all tasks.

    Returns scores for each task. Does NOT require an LLM API key —
    uses a rule-based heuristic agent for reproducibility.
    """
    from baseline.heuristic_agent import run_heuristic_baseline

    results = run_heuristic_baseline()
    return results


def main():
    import uvicorn

    uvicorn.run(app, host="0.0.0.0", port=7860)


if __name__ == "__main__":
    main()