Spaces:

Souravdanyal
/

code-debug-env

Running

App Files Files Community

code-debug-env / server /environment.py

Souravdanyal

Fix log format, timeout protection, SPEC.md, tests, max_steps=5, remove pycache

d298b6d 2 months ago

raw

history blame

6.95 kB

	# server/environment.py
	# Core environment: manages episode state, dispatches to task banks and graders.

	import random
	from uuid import uuid4
	from typing import Optional

	from openenv.core.env_server.interfaces import Environment
	from openenv.core.env_server.types import State

	from models import DebugAction, DebugObservation, DebugState
	from server.tasks.task_easy import get_random_easy_task
	from server.tasks.task_medium import get_random_medium_task
	from server.tasks.task_hard import get_random_hard_task
	from server.graders.grader_easy import grade_easy
	from server.graders.grader_medium import grade_medium
	from server.graders.grader_hard import grade_hard


	TASK_GETTERS = {
	"easy": get_random_easy_task,
	"medium": get_random_medium_task,
	"hard": get_random_hard_task,
	}

	GRADERS = {
	"easy": grade_easy,
	"medium": grade_medium,
	"hard": grade_hard,
	}

	MAX_STEPS = 5


	class CodeDebugEnvironment(Environment):
	"""
	OpenEnv environment for LLM-based code debugging.
	Supports 3 difficulty levels with partial rewards.
	"""

	def __init__(self):
	self._episode_id: str = str(uuid4())
	self._difficulty: str = "easy"
	self._current_task: Optional[dict] = None
	self._step_count: int = 0
	self._best_reward: float = 0.0
	self._current_reward: float = 0.0
	self._done: bool = False

	def reset(self, difficulty: Optional[str] = None) -> DebugObservation:
	"""
	Start a new episode. Optionally specify difficulty: easy \| medium \| hard.
	If not specified, cycles randomly.
	"""
	self._episode_id = str(uuid4())
	self._step_count = 0
	self._best_reward = 0.0
	self._current_reward = 0.0
	self._done = False

	# Validate difficulty
	if difficulty and difficulty in TASK_GETTERS:
	self._difficulty = difficulty
	else:
	self._difficulty = random.choice(["easy", "medium", "hard"])

	# Load a task
	self._current_task = TASK_GETTERS[self._difficulty]()

	return DebugObservation(
	task_id=self._current_task["task_id"],
	difficulty=self._difficulty,
	buggy_code=self._current_task["buggy_code"],
	instructions=self._current_task["instructions"],
	test_cases_description=self._current_task["test_cases_description"],
	reward=None,
	passed_tests=None,
	total_tests=len(self._current_task["test_cases"]),
	feedback=None,
	done=False,
	)

	def step(self, action: DebugAction) -> DebugObservation:
	"""
	Agent submits fixed_code (and optionally explanation for hard tasks).
	Returns observation with reward, feedback, and done flag.
	"""
	if self._done:
	return DebugObservation(
	task_id=self._current_task["task_id"] if self._current_task else "none",
	difficulty=self._difficulty,
	buggy_code=self._current_task["buggy_code"] if self._current_task else "",
	instructions="Episode is already done. Call reset() to start a new episode.",
	test_cases_description="",
	reward=self._best_reward,
	passed_tests=None,
	total_tests=0,
	feedback="Episode ended. Please call reset() to start a new task.",
	done=True,
	)

	self._step_count += 1

	# ── Invalid action penalty ──────────────────────────────────────────
	code = action.fixed_code.strip() if action.fixed_code else ""
	if not code:
	done = self._step_count >= MAX_STEPS
	self._done = done
	return DebugObservation(
	task_id=self._current_task["task_id"],
	difficulty=self._difficulty,
	buggy_code=self._current_task["buggy_code"],
	instructions=self._current_task["instructions"],
	test_cases_description=self._current_task["test_cases_description"],
	reward=0.0,
	passed_tests=0,
	total_tests=len(self._current_task["test_cases"]),
	feedback="❌ Invalid action: fixed_code is empty. Penalty applied. Submit valid Python code.",
	done=done,
	)

	# Check for obvious non-Python (very short or no 'def' keyword)
	if len(code) < 5 or ("def " not in code and "lambda" not in code and "=" not in code):
	done = self._step_count >= MAX_STEPS
	self._done = done
	return DebugObservation(
	task_id=self._current_task["task_id"],
	difficulty=self._difficulty,
	buggy_code=self._current_task["buggy_code"],
	instructions=self._current_task["instructions"],
	test_cases_description=self._current_task["test_cases_description"],
	reward=0.0,
	passed_tests=0,
	total_tests=len(self._current_task["test_cases"]),
	feedback="❌ Invalid action: submission does not appear to be valid Python. Penalty applied.",
	done=done,
	)

	# Grade the submission
	grader = GRADERS[self._difficulty]
	if self._difficulty == "hard":
	reward, passed, total, feedback, _ = grader(
	action.fixed_code, self._current_task, action.explanation
	)
	else:
	reward, passed, total, feedback, _ = grader(
	action.fixed_code, self._current_task
	)

	self._current_reward = reward
	self._best_reward = max(self._best_reward, reward)

	# Episode ends if: perfect score OR max steps reached
	done = (reward == 1.0) or (self._step_count >= MAX_STEPS)
	self._done = done

	return DebugObservation(
	task_id=self._current_task["task_id"],
	difficulty=self._difficulty,
	buggy_code=self._current_task["buggy_code"],
	instructions=self._current_task["instructions"],
	test_cases_description=self._current_task["test_cases_description"],
	reward=reward,
	passed_tests=passed,
	total_tests=total,
	feedback=feedback,
	done=done,
	)

	@property
	def state(self) -> DebugState:
	"""Return current episode metadata."""
	return DebugState(
	episode_id=self._episode_id,
	step_count=self._step_count,
	task_id=self._current_task["task_id"] if self._current_task else "none",
	difficulty=self._difficulty,
	max_steps=MAX_STEPS,
	current_reward=self._current_reward,
	best_reward=self._best_reward,
	done=self._done,
	)