from typing import Any, Dict, Literal from openenv.core.env_server.types import Action, Observation, State from pydantic import Field class WhyDidItFailAction(Action): """Agent's diagnostic action.""" action_type: Literal["inspect_logs", "inspect_config", "inspect_gradients", "submit_diagnosis"] = Field( ..., description="One of: inspect_logs | inspect_config | inspect_gradients | submit_diagnosis" ) diagnosis: str | None = Field(None, description= "Required when action_type=submit_diagnosis. Its the agent's conclusion about what is wrong.") suggested_fix: str | None = Field(None, description= "Required when action_type=submit_diagnosis. Exact fix to apply.") reasoning: str | None = Field(None, description= "Required when action_type=submit_diagnosis. Explain what evidence led to this diagnosis.") metadata: Dict[str, Any] = Field(default_factory=dict, description="Additional metadata.") class WhyDidItFailState(State): """Full episode state exposed via GET /state and WSStateMessage.""" scenario_key: str | None = Field(None, description= "Key of the active scenario (e.g. 'exploding_gradients'). None before reset.") difficulty: str | None = Field(None, description= "Difficulty tier of the active scenario: easy, medium, or hard.") inspection_order: list[str] = Field(default_factory=list, description= "Sources inspected so far this episode, in the order they were first visited.") required_sources: list[str] = Field(default_factory=list, description= "Sources the agent must inspect before submitting a valid diagnosis.") max_steps: int = Field(0, description= "Hard step ceiling for this episode. Exceeding it terminates with score 0.10.") class WhyDidItFailObservation(Observation): """What the agent sees after each action.""" task_description: str = Field(..., description= "The problem the agent must diagnose.") visible_data: dict = Field(..., description= "Data returned by the last action (logs, config, gradients, etc.).") available_actions: list[str] = Field(..., description= "Which action_types are valid on this step.") steps_taken: int = Field(..., description= "Number of actions taken so far in this episode.") reward: float = Field(default=0.10, description= # type: ignore[override] "Score for the current step. 0.90 = max.") done: bool = Field(default=False, description= "True when the episode has ended.") feedback: str = Field(..., description= "Partial progress hint from the environment.")