# server/app.py from fastapi import FastAPI, HTTPException from openenv.core.env_server import create_fastapi_app from ..models import Action, Observation, TaskInfo from .environment import CodeDebugEnvironment from .tasks import TASK_REGISTRY from .grader import grade # Core OpenEnv app (provides /reset, /step, /state, /ws, /health) app = create_fastapi_app(CodeDebugEnvironment, Action, Observation) # ── Additional required hackathon endpoints ──────────────────────────── from fastapi.responses import HTMLResponse @app.get("/", response_class=HTMLResponse) @app.get("/web", response_class=HTMLResponse) @app.get("/web/", response_class=HTMLResponse) def home(): return """ Code Debug Env | OpenEnv

Code Debug Env

A production-grade OpenEnv for training frontier reasoning agents on code repair tasks. v1.1.0

📡 Server Health 📋 Task Registry 🤖 Run Baseline ⏳ Evaluating (~2 mins)... 📖 Documentation
""" @app.get("/tasks") def list_tasks() -> list[TaskInfo]: """Return all tasks with their action schema.""" return [ TaskInfo( task_id=tid, difficulty=task["difficulty"], description=task["description"], action_schema=Action.model_json_schema(), ) for tid, task in TASK_REGISTRY.items() ] @app.get("/grader") def get_grader_score(task_id: str, submitted_code: str) -> dict: """ Grade a submission directly (for testing / evaluation). Returns: { score: float, passed: int, total: int, test_results: list } """ if task_id not in TASK_REGISTRY: raise HTTPException(status_code=404, detail=f"Unknown task_id: {task_id}") task = TASK_REGISTRY[task_id] result = grade(submitted_code, task_id, task["test_suite"]) return { "task_id": task_id, "score": result["score"], "passed": result["passed"], "total": result["total"], "test_results": [r.model_dump() for r in result["test_results"]], } @app.get("/baseline") def run_baseline() -> dict: """ Run the baseline agent on all tasks and return scores. This endpoint triggers the baseline inference script. """ import subprocess, sys, json, re, os try: # Get absolute path to the baseline script base_dir = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) baseline_script = os.path.join(base_dir, "baseline", "run_baseline.py") result = subprocess.run( [sys.executable, baseline_script, "--output", "json"], capture_output=True, text=True, timeout=120, ) # Robustly find JSON in potentially noisy stdout stdout = result.stdout.strip() stderr = result.stderr.strip() match = re.search(r'(\{.*\})', stdout, re.DOTALL) if match: try: return json.loads(match.group(1)) except Exception as j_err: raise ValueError(f"JSON Decode Error: {j_err}. Raw Match: {match.group(1)}") raise ValueError(f"No JSON found. Stdout: {stdout[:100]}. Stderr: {stderr[:100]}. ReturnCode: {result.returncode}") except Exception as e: raise HTTPException(status_code=500, detail=str(e)) def main(): """Entry point for the server.""" import uvicorn uvicorn.run(app, host="0.0.0.0", port=8000) if __name__ == "__main__": main()