# server/app.py
from fastapi import FastAPI, HTTPException
from openenv.core.env_server import create_fastapi_app
from ..models import Action, Observation, TaskInfo
from .environment import CodeDebugEnvironment
from .tasks import TASK_REGISTRY
from .grader import grade
# Core OpenEnv app (provides /reset, /step, /state, /ws, /health)
app = create_fastapi_app(CodeDebugEnvironment, Action, Observation)
# ── Additional required hackathon endpoints ────────────────────────────
from fastapi.responses import HTMLResponse
@app.get("/", response_class=HTMLResponse)
@app.get("/web", response_class=HTMLResponse)
@app.get("/web/", response_class=HTMLResponse)
def home():
return """
Code Debug Env | OpenEnv
Code Debug Env
A production-grade OpenEnv for training frontier reasoning agents on code repair tasks. v1.1.0
"""
@app.get("/tasks")
def list_tasks() -> list[TaskInfo]:
"""Return all tasks with their action schema."""
return [
TaskInfo(
task_id=tid,
difficulty=task["difficulty"],
description=task["description"],
action_schema=Action.model_json_schema(),
)
for tid, task in TASK_REGISTRY.items()
]
@app.get("/grader")
def get_grader_score(task_id: str, submitted_code: str) -> dict:
"""
Grade a submission directly (for testing / evaluation).
Returns: { score: float, passed: int, total: int, test_results: list }
"""
if task_id not in TASK_REGISTRY:
raise HTTPException(status_code=404, detail=f"Unknown task_id: {task_id}")
task = TASK_REGISTRY[task_id]
result = grade(submitted_code, task_id, task["test_suite"])
return {
"task_id": task_id,
"score": result["score"],
"passed": result["passed"],
"total": result["total"],
"test_results": [r.model_dump() for r in result["test_results"]],
}
@app.get("/baseline")
def run_baseline() -> dict:
"""
Run the baseline agent on all tasks and return scores.
This endpoint triggers the baseline inference script.
"""
import subprocess, sys, json, re, os
try:
# Get absolute path to the baseline script
base_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
baseline_script = os.path.join(base_dir, "baseline", "run_baseline.py")
result = subprocess.run(
[sys.executable, baseline_script, "--output", "json"],
capture_output=True, text=True, timeout=120,
)
# Robustly find JSON in potentially noisy stdout
stdout = result.stdout.strip()
stderr = result.stderr.strip()
match = re.search(r'(\{.*\})', stdout, re.DOTALL)
if match:
try:
return json.loads(match.group(1))
except Exception as j_err:
raise ValueError(f"JSON Decode Error: {j_err}. Raw Match: {match.group(1)}")
raise ValueError(f"No JSON found. Stdout: {stdout[:100]}. Stderr: {stderr[:100]}. ReturnCode: {result.returncode}")
except Exception as e:
raise HTTPException(status_code=500, detail=str(e))
def main():
"""Entry point for the server."""
import uvicorn
uvicorn.run(app, host="0.0.0.0", port=8000)
if __name__ == "__main__":
main()