# models.py — Typed Pydantic models for Action, Observation, and State

from typing import Optional
from pydantic import Field
from openenv.core.env_server.types import Action, Observation, State


class DebugAction(Action):
    """Action submitted by the agent: fixed code + optional explanation."""

    fixed_code: str = Field(
        ...,
        description="Complete corrected Python function. Must be valid Python including imports."
    )
    explanation: Optional[str] = Field(
        default=None,
        description="Required for hard tasks. Explain what was wrong and why your fix is correct."
    )


class DebugObservation(Observation):
    """Observation returned after reset() and step()."""

    task_id: str = Field(..., description="Unique task identifier e.g. easy_003")
    difficulty: str = Field(..., description="Task difficulty: easy | medium | hard")
    buggy_code: str = Field(..., description="The buggy Python code the agent must fix")
    instructions: str = Field(..., description="Natural language instructions for the task")
    test_cases_description: str = Field(..., description="What the test cases check")

    # Step feedback fields
    reward: Optional[float] = Field(default=None, description="Immediate reward 0.0-1.0 (null on reset)")
    cumulative_reward: float = Field(default=0.0, description="Total reward accumulated this episode")
    best_reward: float = Field(default=0.0, description="Best reward achieved this episode")
    passed_tests: Optional[int] = Field(default=None, description="Tests passed (null on reset)")
    total_tests: Optional[int] = Field(default=None, description="Total test cases (always 3)")
    feedback: Optional[str] = Field(default=None, description="Per-test feedback: Input, Expected, Got")
    done: bool = Field(default=False, description="True when episode complete")


class DebugState(State):
    """Internal environment state returned by GET /state."""

    episode_id: str = ""
    task_id: str = "none"
    difficulty: str = "easy"
    step_count: int = 0
    max_steps: int = 5
    current_reward: float = 0.0
    cumulative_reward: float = 0.0
    best_reward: float = 0.0
    done: bool = False