code-debug-env / models.py
Souravdanyal's picture
Initial commit
2ce1061
raw
history blame
2.41 kB
# models.py
# Typed Pydantic models for Action, Observation, and State
# These are the contracts between the agent and the environment.
from typing import Optional, List
from pydantic import Field
from openenv.core.env_server.types import Action, Observation, State
class DebugAction(Action):
"""Action submitted by the agent: fixed code + optional explanation."""
fixed_code: str = Field(
...,
description="The corrected Python function as a string. Must be valid Python."
)
explanation: Optional[str] = Field(
default=None,
description=(
"Required for 'hard' difficulty tasks. Explain what was wrong "
"and why your fix is correct. Affects reward on hard tasks."
)
)
class TestResult(Action):
"""Sub-model: result of a single test case."""
test_id: int
passed: bool
expected: str
got: str
class DebugObservation(Observation):
"""Observation returned after each step()."""
# Task info
task_id: str = Field(..., description="Unique ID of the current task instance")
difficulty: str = Field(..., description="Task difficulty: easy | medium | hard")
buggy_code: str = Field(..., description="The buggy Python code the agent must fix")
instructions: str = Field(..., description="Natural language instructions for the task")
test_cases_description: str = Field(
..., description="Description of what the test cases check"
)
# After step() — feedback
reward: Optional[float] = Field(
default=None, description="Score from 0.0 to 1.0 for this step"
)
passed_tests: Optional[int] = Field(
default=None, description="Number of test cases passed"
)
total_tests: Optional[int] = Field(
default=None, description="Total number of test cases"
)
feedback: Optional[str] = Field(
default=None,
description="Detailed feedback: which tests failed and why"
)
done: bool = Field(default=False, description="True when episode is complete")
class DebugState(State):
"""Internal environment state, returned by GET /state."""
episode_id: str = "" # ← required by validator: GET /state must return episode_id
task_id: str
difficulty: str
step_count: int = 0
max_steps: int = 3
current_reward: float = 0.0
best_reward: float = 0.0
done: bool = False