Spaces:
Running
Running
| # models.py — Typed Pydantic models for Action, Observation, and State | |
| from typing import Optional | |
| from pydantic import Field | |
| from openenv.core.env_server.types import Action, Observation, State | |
| class DebugAction(Action): | |
| """Action submitted by the agent: fixed code + optional explanation.""" | |
| fixed_code: str = Field( | |
| ..., | |
| description="Complete corrected Python function. Must be valid Python including imports." | |
| ) | |
| explanation: Optional[str] = Field( | |
| default=None, | |
| description="Required for hard tasks. Explain what was wrong and why your fix is correct." | |
| ) | |
| class DebugObservation(Observation): | |
| """Observation returned after reset() and step().""" | |
| task_id: str = Field(..., description="Unique task identifier e.g. easy_003") | |
| difficulty: str = Field(..., description="Task difficulty: easy | medium | hard") | |
| buggy_code: str = Field(..., description="The buggy Python code the agent must fix") | |
| instructions: str = Field(..., description="Natural language instructions for the task") | |
| test_cases_description: str = Field(..., description="What the test cases check") | |
| # Step feedback fields | |
| reward: Optional[float] = Field(default=None, description="Immediate reward 0.0-1.0 (null on reset)") | |
| cumulative_reward: float = Field(default=0.0, description="Total reward accumulated this episode") | |
| best_reward: float = Field(default=0.0, description="Best reward achieved this episode") | |
| passed_tests: Optional[int] = Field(default=None, description="Tests passed (null on reset)") | |
| total_tests: Optional[int] = Field(default=None, description="Total test cases (always 3)") | |
| feedback: Optional[str] = Field(default=None, description="Per-test feedback: Input, Expected, Got") | |
| done: bool = Field(default=False, description="True when episode complete") | |
| class DebugState(State): | |
| """Internal environment state returned by GET /state.""" | |
| episode_id: str = "" | |
| task_id: str = "none" | |
| difficulty: str = "easy" | |
| step_count: int = 0 | |
| max_steps: int = 5 | |
| current_reward: float = 0.0 | |
| cumulative_reward: float = 0.0 | |
| best_reward: float = 0.0 | |
| done: bool = False | |