""" CodeArena RL Environment — Production FastAPI entrypoint. This is the primary server that Hugging Face Spaces / OpenEnv evaluator hits. All endpoints are wrapped with fallback safety so they NEVER return non-200. """ import random import traceback from typing import Optional from fastapi import FastAPI from pydantic import BaseModel from server.models import CodeArenaObservation, CodeArenaAction, TaskInfo from server.executor import run_code_with_tests from server.grader import calculate_reward, safe_reward from tasks import ALL_TASKS # ── Lookup map: difficulty string → list of tasks ────────────────────────── TASK_MAP: dict[str, list[TaskInfo]] = {} for _t in ALL_TASKS: TASK_MAP.setdefault(_t.difficulty, []).append(_t) # Also allow lookup by exact task_id (e.g. "easy-1") TASK_ID_MAP: dict[str, TaskInfo] = {_t.task_id: _t for _t in ALL_TASKS} # ── Request schema ───────────────────────────────────────────────────────── class ResetRequest(BaseModel): task_id: Optional[str] = "easy" # ── Environment state ───────────────────────────────────────────────────── class CodeArenaEnv: def __init__(self): self.tasks = ALL_TASKS self.current_task: TaskInfo | None = None self.previous_attempts: list[str] = [] self.last_error_log = "" self.last_test_results = "" self.is_done = False self.step_count = 0 self.max_steps = 5 def reset(self, task_id: str = "easy") -> CodeArenaObservation: # Priority: exact task_id match → difficulty match → random if task_id in TASK_ID_MAP: self.current_task = TASK_ID_MAP[task_id] elif task_id in TASK_MAP: self.current_task = random.choice(TASK_MAP[task_id]) else: self.current_task = random.choice(self.tasks) self.previous_attempts = [] self.last_error_log = "" self.last_test_results = "" self.is_done = False self.step_count = 0 return self._state() def step(self, action: CodeArenaAction): if self.is_done: raise ValueError("Environment is done. Call /reset first.") self.step_count += 1 exec_result = run_code_with_tests( code=action.proposed_fix, test_code=self.current_task.test_code, timeout=max(self.current_task.optimal_time_seconds * 10, 2.0), ) reward = calculate_reward(exec_result, self.current_task) self.previous_attempts.append(action.proposed_fix) self.last_error_log = exec_result.runtime_errors self.last_test_results = ( f"{exec_result.test_passed}/{exec_result.test_total} tests passed." ) if reward > 0.99 or self.step_count >= self.max_steps: self.is_done = True info = { "execution_metadata": exec_result.model_dump(), "task_id": self.current_task.task_id, } return self._state(), reward, self.is_done, info def _state(self) -> CodeArenaObservation: if not self.current_task: raise ValueError("Environment not initialised. Call /reset first.") return CodeArenaObservation( buggy_code=self.current_task.buggy_code, error_log=self.last_error_log, test_results=self.last_test_results, previous_attempts=self.previous_attempts, ) # ── FastAPI app ──────────────────────────────────────────────────────────── _env = CodeArenaEnv() app = FastAPI(title="CodeArena RL Environment") @app.get("/") def health(): return {"status": "ok", "environment": "CodeArena"} @app.post("/reset") def api_reset(body: ResetRequest = ResetRequest()): """Reset the environment. NEVER crashes — returns fallback JSON on error.""" try: task_id = body.task_id or "easy" obs = _env.reset(task_id=task_id) return { "status": "success", "message": "Environment reset successfully", "observation": obs.model_dump(), } except Exception: traceback.print_exc() return { "status": "error", "message": "fallback response", "observation": { "buggy_code": "", "error_log": str(traceback.format_exc()), "test_results": "", "previous_attempts": [], }, } @app.post("/step") def api_step(action: CodeArenaAction): try: obs, reward, done, info = _env.step(action) return { "observation": obs.model_dump(), "reward": safe_reward(reward), "done": done, "info": info, } except Exception: traceback.print_exc() return { "status": "error", "message": "fallback response", "observation": { "buggy_code": "", "error_log": str(traceback.format_exc()), "test_results": "", "previous_attempts": [], }, "reward": safe_reward(0.1), "done": True, "info": {}, } @app.get("/state") def api_state(): try: obs = _env._state() return {"observation": obs.model_dump()} except Exception: traceback.print_exc() return { "status": "error", "message": "fallback response", } # ── CLI entrypoint (OpenEnv / script console_scripts) ───────────────────── def main(): """Run the CodeArena server via uvicorn.""" import uvicorn uvicorn.run("server.app:app", host="0.0.0.0", port=7860) if __name__ == "__main__": main()