codearena-rl / server /app.py
havinashpatil
Finalizing CodeArena RL Benchmark: frontend improvements, GRPO training scripts, and cleaned environment
03a7eb9
raw
history blame
13.4 kB
"""
CodeArena RL Environment β€” Production FastAPI entrypoint.
This is the primary server that Hugging Face Spaces / OpenEnv evaluator hits.
All endpoints are wrapped with fallback safety so they NEVER return non-200.
"""
import random
import traceback
from typing import Optional
from fastapi import FastAPI
from fastapi.middleware.cors import CORSMiddleware
from pydantic import BaseModel
from server.models import CodeArenaObservation, CodeArenaAction, TaskInfo
from server.executor import run_code_with_tests
from server.grader import calculate_reward, safe_reward, force_valid_reward
from server.ai_fixer import generate_fix
from server.raw_runner import run_raw_code
from server.memory import store_success, log_complexity_reward, get_complexity_reward_stats, get_all_memories
from server.algorithm_detector import detect_complexity, detect_problem_type, get_optimization_hint
from tasks import ALL_TASKS
# ── Lookup map: difficulty string β†’ list of tasks ──────────────────────────
TASK_MAP: dict[str, list[TaskInfo]] = {}
for _t in ALL_TASKS:
TASK_MAP.setdefault(_t.difficulty, []).append(_t)
# Also allow lookup by exact task_id (e.g. "easy-1")
TASK_ID_MAP: dict[str, TaskInfo] = {_t.task_id: _t for _t in ALL_TASKS}
# ── Request schema ─────────────────────────────────────────────────────────
class ResetRequest(BaseModel):
task_id: Optional[str] = "easy"
# ── Environment state ─────────────────────────────────────────────────────
class CodeArenaEnv:
def __init__(self):
self.tasks = ALL_TASKS
self.current_task: TaskInfo | None = None
self.previous_attempts: list[str] = []
self.last_error_log = ""
self.last_test_results = ""
self.is_done = False
self.step_count = 0
self.max_steps = 5
self.episode_rewards_history: list[float] = []
def reset(self, task_id: str = "easy") -> CodeArenaObservation:
if task_id == "auto":
if not self.episode_rewards_history:
task_id = "easy"
else:
avg_reward = sum(self.episode_rewards_history) / len(self.episode_rewards_history)
if avg_reward < 0.4:
task_id = "easy"
elif avg_reward <= 0.75:
task_id = "medium"
else:
task_id = "hard"
# Priority: exact task_id match β†’ difficulty match β†’ random
if task_id in TASK_ID_MAP:
self.current_task = TASK_ID_MAP[task_id]
elif task_id in TASK_MAP:
self.current_task = random.choice(TASK_MAP[task_id])
else:
self.current_task = random.choice(self.tasks)
self.previous_attempts = []
self.last_error_log = ""
self.last_test_results = ""
self.is_done = False
self.step_count = 0
return self._state()
def step(self, action: CodeArenaAction):
if self.is_done:
raise ValueError("Environment is done. Call /reset first.")
self.step_count += 1
print(f"[DEBUG] Step {self.step_count}: Processing action")
print(f"[DEBUG] Proposed fix length: {len(action.proposed_fix)} chars")
print(f"[DEBUG] Proposed fix preview: {action.proposed_fix[:200]}...")
exec_result = run_code_with_tests(
code=action.proposed_fix,
test_code=self.current_task.test_code,
timeout=max(self.current_task.optimal_time_seconds * 10, 2.0),
)
print(f"[DEBUG] Execution result: compile_success={exec_result.compile_success}, test_passed={exec_result.test_passed}/{exec_result.test_total}, exec_time={exec_result.execution_time_seconds:.2f}s")
if exec_result.runtime_errors:
print(f"[DEBUG] Runtime errors: {exec_result.runtime_errors[:500]}")
base_reward, reward_components = calculate_reward(exec_result, self.current_task, action.proposed_fix)
print(f"[DEBUG] Base reward: {base_reward:.3f}")
print(f"[DEBUG] Reward components: {reward_components}")
step_penalty = 0.01 * self.step_count # Reduced from 0.02 for gentler learning
novelty_penalty = 0.1 if action.proposed_fix in self.previous_attempts else 0.0
print(f"[DEBUG] Penalties: step={step_penalty:.3f}, novelty={novelty_penalty:.3f}")
final_reward = base_reward - step_penalty - novelty_penalty
final_reward = max(0.001, min(0.999, float(final_reward)))
print(f"[DEBUG] Final reward: {final_reward:.3f}")
self.previous_attempts.append(action.proposed_fix)
self.last_error_log = exec_result.runtime_errors
self.last_test_results = (
f"{exec_result.test_passed}/{exec_result.test_total} tests passed."
)
if final_reward > 0.99 or self.step_count >= self.max_steps:
self.is_done = True
self.episode_rewards_history.append(final_reward)
if len(self.episode_rewards_history) > 5:
self.episode_rewards_history.pop(0)
info = {
"execution_metadata": exec_result.model_dump(),
"task_id": self.current_task.task_id,
"reward_components": reward_components,
"test_results": self.last_test_results,
"llm_feedback": reward_components.get("feedback", "No feedback provided.")
}
return self._state(), final_reward, self.is_done, info
def _state(self) -> CodeArenaObservation:
if not self.current_task:
raise ValueError("Environment not initialised. Call /reset first.")
return CodeArenaObservation(
buggy_code=self.current_task.buggy_code,
error_log=self.last_error_log,
test_results=self.last_test_results,
previous_attempts=self.previous_attempts,
)
# ── FastAPI app ────────────────────────────────────────────────────────────
_env = CodeArenaEnv()
app = FastAPI(title="CodeArena RL Environment")
# Allow the Vite dev server (port 3000) and any other origin to call this API
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_credentials=True,
allow_methods=["*"],
allow_headers=["*"],
)
@app.get("/health")
def health():
return {"status": "ok", "environment": "CodeArena"}
@app.post("/reset")
def api_reset(body: ResetRequest = ResetRequest()):
"""Reset the environment. NEVER crashes β€” returns fallback JSON on error."""
try:
task_id = body.task_id or "easy"
obs = _env.reset(task_id=task_id)
return {
"status": "success",
"message": "Environment reset successfully",
"observation": obs.model_dump(),
"info": {
"task_id": _env.current_task.task_id if _env.current_task else "",
"difficulty": _env.current_task.difficulty if _env.current_task else ""
}
}
except Exception:
traceback.print_exc()
return {
"status": "error",
"message": "fallback response",
"observation": {
"buggy_code": "",
"error_log": str(traceback.format_exc()),
"test_results": "",
"previous_attempts": [],
},
}
@app.post("/step")
def api_step(action: CodeArenaAction):
try:
# Compatibility: support both 'proposed_fix' and 'action'
fix = action.proposed_fix or action.action
if not fix:
return {"status": "error", "message": "No code provided in 'proposed_fix' or 'action'"}
# Patch the action object to ensure _env.step gets what it expects
action.proposed_fix = fix
obs, reward, done, info = _env.step(action)
# Safety fallback before force_valid_reward
if reward is None:
reward = 0.5
return {
"observation": obs.model_dump(),
"reward": force_valid_reward(reward),
"done": done,
"info": info,
}
except Exception:
traceback.print_exc()
return {
"status": "error",
"message": "fallback response",
"observation": {
"buggy_code": "",
"error_log": str(traceback.format_exc()),
"test_results": "",
"previous_attempts": [],
},
"reward": force_valid_reward(0.1),
"done": True,
"info": {},
}
@app.get("/state")
def api_state():
try:
obs = _env._state()
return {
"step": _env.step_count,
"history": _env.previous_attempts,
"observation": obs.model_dump()
}
except Exception:
traceback.print_exc()
return {
"status": "error",
"message": "fallback response",
}
# ── AI Fix endpoint ───────────────────────────────────────────────────────
class FixRequest(BaseModel):
code: str
error_log: Optional[str] = ""
ollama_url: Optional[str] = "http://localhost:11434"
model: Optional[str] = "llama3.2:latest"
use_ollama: Optional[bool] = True
reward: Optional[float] = 0.0
task_id: Optional[str] = ""
@app.post("/fix")
def api_fix(body: FixRequest):
"""Generate a code fix using Ollama (if available) or built-in pattern fixer."""
try:
result = generate_fix(
code=body.code,
error_log=body.error_log or "",
ollama_url=body.ollama_url,
model=body.model,
use_ollama=body.use_ollama,
reward=body.reward or 0.0,
task_id=body.task_id or "",
)
return result
except Exception:
traceback.print_exc()
return {
"fixed_code": body.code,
"method": "passthrough",
"success": False,
"error": traceback.format_exc()
}
# ── Raw Runner endpoint (Sandbox) ──────────────────────────────────────────
class RawRequest(BaseModel):
code: str
@app.post("/run_raw")
def api_run_raw(body: RawRequest):
"""Run arbitrary code without tests and return output/complexity and reward."""
try:
result = run_raw_code(body.code)
# Calculate simulated reward for sandbox
# Penalty for errors, slight penalty for extremely high exec time
reward = 0.95
reward_components = {"Execution Success": 0.5, "Error Free": 0.45}
if result.stderr:
reward = 0.1
reward_components["Error Free"] = 0.0
if result.execution_time > 1.0:
reward -= 0.15
reward_components["Time Complexity"] = -0.15
return {
"status": "success",
"stdout": result.stdout,
"stderr": result.stderr,
"execution_time": result.execution_time,
"time_complexity_hint": result.time_complexity_hint,
"reward": force_valid_reward(reward),
"reward_components": reward_components,
"done": False # Sandbox mode is never "done" strictly by execution, AI must verify optimality
}
except Exception as e:
traceback.print_exc()
return {
"status": "error",
"stderr": str(e),
"stdout": "",
"execution_time": 0,
"time_complexity_hint": "Error evaluating complexity.",
"reward": force_valid_reward(0.0),
"reward_components": {},
"done": False
}
# ── Stats & Memory endpoints (Research Dashboard) ─────────────────────────
@app.get("/stats")
def api_stats():
"""Return complexity vs reward stats from CSV log."""
try:
return {
"complexity_reward_stats": get_complexity_reward_stats(),
"episode_history": _env.episode_rewards_history,
"mean_reward": round(sum(_env.episode_rewards_history) / max(1, len(_env.episode_rewards_history)), 3),
}
except Exception:
traceback.print_exc()
return {"complexity_reward_stats": {}, "episode_history": [], "mean_reward": 0.0}
@app.get("/memory")
def api_memory():
"""Return all stored best solutions from agent memory."""
try:
return {"memories": get_all_memories()}
except Exception:
return {"memories": {}}
# ── CLI entrypoint (OpenEnv / script console_scripts) ─────────────────────
def main():
"""Run the CodeArena server via uvicorn."""
import uvicorn
uvicorn.run("server.app:app", host="0.0.0.0", port=7860)
if __name__ == "__main__":
main()