| from __future__ import annotations |
|
|
| from typing import Any |
|
|
| from openenv.core.env_server.types import Action, Observation, State |
| from pydantic import Field |
|
|
|
|
| class AdaptAction(Action): |
| code: str = Field(..., min_length=1, description="Python code to execute.") |
|
|
|
|
| class AdaptObservation(Observation): |
| problem_id: str = Field(default="", description="Current problem identifier.") |
| difficulty: str = Field(default="", description="Current curriculum difficulty tier.") |
| problem: str = Field(default="", description="Problem statement shown to the agent.") |
| input_format: str = Field(default="", description="Expected stdin format.") |
| constraints: str = Field(default="", description="Problem constraints.") |
| examples: list[dict[str, str]] = Field(default_factory=list) |
| visible_tests: list[dict[str, str]] = Field(default_factory=list) |
| feedback: str = Field(default="", description="Human-readable execution feedback.") |
| pass_rate: float = Field(default=0.0, ge=0.0, le=1.0) |
| visible_pass_rate: float = Field(default=0.0, ge=0.0, le=1.0) |
| hidden_pass_rate: float = Field(default=0.0, ge=0.0, le=1.0) |
| syntax_valid: bool = Field(default=True) |
| execution_status: str = Field(default="not_run") |
| timeout_count: int = Field(default=0, ge=0) |
| runtime_error_count: int = Field(default=0, ge=0) |
| format_compliance: float = Field(default=0.0, ge=0.0, le=1.0) |
| reward_components: dict[str, float] = Field(default_factory=dict) |
|
|
|
|
| class AdaptState(State): |
| problem_id: str = Field(default="") |
| difficulty: str = Field(default="") |
| last_reward: float = Field(default=0.0) |
| last_pass_rate: float = Field(default=0.0, ge=0.0, le=1.0) |
| last_feedback: str = Field(default="") |
| recent_metrics: dict[str, Any] = Field(default_factory=dict) |
|
|