Spaces:
Running
Running
| from __future__ import annotations | |
| from typing import Any | |
| from pydantic import BaseModel, Field | |
| try: | |
| from openenv.core.env_server.types import Action, Observation, State | |
| except ImportError: | |
| class Action(BaseModel): | |
| model_config = {"extra": "forbid"} | |
| class Observation(BaseModel): | |
| reward: float = Field(default=0.0, ge=0.0, le=1.0) | |
| done: bool = False | |
| class State(BaseModel): | |
| episode_id: str = "" | |
| step_count: int = 0 | |
| class AdaptAction(Action): | |
| session_id: str = Field( | |
| default="", | |
| description="Environment session id for server-routed calls.", | |
| ) | |
| code: str = Field(..., min_length=1, description="Python code to execute.") | |
| class AdaptObservation(Observation): | |
| session_id: str = Field(default="", description="Session id for the active environment instance.") | |
| problem_id: str = Field(default="", description="Current problem identifier.") | |
| problem_type: str = Field(default="", description="Current generated problem family.") | |
| difficulty: str = Field(default="", description="Current curriculum difficulty tier.") | |
| attempt_number: int = Field(default=0, ge=0, description="1-indexed attempt number within the episode.") | |
| max_steps: int = Field(default=3, ge=1, description="Maximum attempts allowed for the episode.") | |
| problem: str = Field(default="", description="Problem statement shown to the agent.") | |
| input_format: str = Field(default="", description="Expected stdin format.") | |
| constraints: str = Field(default="", description="Problem constraints.") | |
| feedback: str = Field(default="", description="Human-readable execution feedback.") | |
| pass_rate: float = Field(default=0.0, ge=0.0, le=1.0) | |
| visible_pass_rate: float = Field(default=0.0, ge=0.0, le=1.0) | |
| hidden_pass_rate: float = Field(default=0.0, ge=0.0, le=1.0) | |
| syntax_valid: bool = Field(default=True) | |
| execution_status: str = Field(default="not_run") | |
| timeout_count: int = Field(default=0, ge=0) | |
| runtime_error_count: int = Field(default=0, ge=0) | |
| invalid_output_count: int = Field(default=0, ge=0) | |
| wrong_answer_count: int = Field(default=0, ge=0) | |
| format_compliance: float = Field(default=0.0, ge=0.0, le=1.0) | |
| reward_components: dict[str, float] = Field(default_factory=dict) | |
| generator_reward_signal: float = Field(default=0.0) | |
| class AdaptState(State): | |
| session_id: str = Field(default="") | |
| problem_id: str = Field(default="") | |
| problem_type: str = Field(default="") | |
| difficulty: str = Field(default="") | |
| generator_mode: str = Field(default="heuristic") | |
| max_steps: int = Field(default=3, ge=1) | |
| generated_problem: dict[str, Any] = Field(default_factory=dict) | |
| last_reward: float = Field(default=0.0) | |
| last_pass_rate: float = Field(default=0.0, ge=0.0, le=1.0) | |
| last_feedback: str = Field(default="") | |
| last_execution_status: str = Field(default="ready") | |
| generator_reward_signal: float = Field(default=0.0) | |
| history: dict[str, Any] = Field(default_factory=dict) | |
| recent_metrics: dict[str, Any] = Field(default_factory=dict) | |