code-debug-env / server /environment.py
Souravdanyal's picture
error fixing
d510c1d
# server/environment.py
# Core environment: manages episode state, dispatches to task banks and graders.
import random
from uuid import uuid4
from typing import Optional
from openenv.core.env_server.interfaces import Environment
from openenv.core.env_server.types import State
from models import DebugAction, DebugObservation, DebugState
from server.tasks.task_easy import get_random_easy_task
from server.tasks.task_medium import get_random_medium_task
from server.tasks.task_hard import get_random_hard_task
from server.graders.grader_easy import grade_easy
from server.graders.grader_medium import grade_medium
from server.graders.grader_hard import grade_hard
TASK_GETTERS = {
"easy": get_random_easy_task,
"medium": get_random_medium_task,
"hard": get_random_hard_task,
}
GRADERS = {
"easy": grade_easy,
"medium": grade_medium,
"hard": grade_hard,
}
MAX_STEPS = 5
class CodeDebugEnvironment(Environment):
"""
OpenEnv environment for LLM-based code debugging.
Supports 3 difficulty levels with partial rewards and cumulative tracking.
"""
def __init__(self):
self._episode_id: str = str(uuid4())
self._difficulty: str = "easy"
self._current_task: Optional[dict] = None
self._step_count: int = 0
self._cumulative_reward: float = 0.0
self._best_reward: float = 0.0
self._current_reward: float = 0.0
self._done: bool = False
def reset(self, difficulty: Optional[str] = None) -> DebugObservation:
"""Start a new episode. Optionally specify difficulty: easy | medium | hard."""
self._episode_id = str(uuid4())
self._step_count = 0
self._cumulative_reward = 0.0
self._best_reward = 0.0
self._current_reward = 0.0
self._done = False
if difficulty and difficulty in TASK_GETTERS:
self._difficulty = difficulty
else:
self._difficulty = random.choice(["easy", "medium", "hard"])
self._current_task = TASK_GETTERS[self._difficulty]()
return DebugObservation(
task_id=self._current_task["task_id"],
difficulty=self._difficulty,
buggy_code=self._current_task["buggy_code"],
instructions=self._current_task["instructions"],
test_cases_description=self._current_task["test_cases_description"],
reward=None,
cumulative_reward=0.0,
best_reward=0.0,
passed_tests=None,
total_tests=len(self._current_task["test_cases"]),
feedback=None,
done=False,
)
def step(self, action: DebugAction) -> DebugObservation:
"""Submit fixed_code. Returns observation with reward, cumulative_reward, feedback, done."""
if self._done:
return DebugObservation(
task_id=self._current_task["task_id"] if self._current_task else "none",
difficulty=self._difficulty,
buggy_code=self._current_task["buggy_code"] if self._current_task else "",
instructions="Episode done. Call reset() to start a new episode.",
test_cases_description="",
reward=self._best_reward,
cumulative_reward=self._cumulative_reward,
best_reward=self._best_reward,
passed_tests=None,
total_tests=0,
feedback="Episode ended. Call reset() to start a new task.",
done=True,
)
self._step_count += 1
# ── Invalid action penalty ─────────────────────────────────────────
code = action.fixed_code.strip() if action.fixed_code else ""
if not code:
done = self._step_count >= MAX_STEPS
self._done = done
self._cumulative_reward += 0.0
return DebugObservation(
task_id=self._current_task["task_id"],
difficulty=self._difficulty,
buggy_code=self._current_task["buggy_code"],
instructions=self._current_task["instructions"],
test_cases_description=self._current_task["test_cases_description"],
reward=0.0,
cumulative_reward=self._cumulative_reward,
best_reward=self._best_reward,
passed_tests=0,
total_tests=len(self._current_task["test_cases"]),
feedback="❌ Invalid action: fixed_code is empty. Submit valid Python code.",
done=done,
)
# ── Grade the submission ───────────────────────────────────────────
grader = GRADERS[self._difficulty]
if self._difficulty == "hard":
reward, passed, total, feedback, _ = grader(
action.fixed_code, self._current_task, action.explanation
)
else:
reward, passed, total, feedback, _ = grader(
action.fixed_code, self._current_task
)
self._current_reward = reward
self._cumulative_reward += reward
self._best_reward = max(self._best_reward, reward)
done = (reward == 1.0) or (self._step_count >= MAX_STEPS)
self._done = done
return DebugObservation(
task_id=self._current_task["task_id"],
difficulty=self._difficulty,
buggy_code=self._current_task["buggy_code"],
instructions=self._current_task["instructions"],
test_cases_description=self._current_task["test_cases_description"],
reward=reward,
cumulative_reward=self._cumulative_reward,
best_reward=self._best_reward,
passed_tests=passed,
total_tests=total,
feedback=feedback,
done=done,
)
@property
def state(self) -> DebugState:
"""Return current episode metadata."""
return DebugState(
episode_id=self._episode_id,
step_count=self._step_count,
task_id=self._current_task["task_id"] if self._current_task else "none",
difficulty=self._difficulty,
max_steps=MAX_STEPS,
current_reward=self._current_reward,
cumulative_reward=self._cumulative_reward,
best_reward=self._best_reward,
done=self._done,
)