# models.py
from __future__ import annotations
from pydantic import BaseModel, Field
from typing import Optional
import uuid


class Action(BaseModel):
    """Agent's action: submit a code patch to fix the buggy function."""
    patch: str = Field(
        description="Full replacement of the function body (valid Python source code)."
    )
    task_id: str = Field(
        description="Which task this patch targets. Must match a task from /tasks."
    )
    think: Optional[str] = Field(
        default=None,
        description="Optional chain-of-thought reasoning. Providing this earns r_cot bonus."
    )


class TestResult(BaseModel):
    name: str
    passed: bool
    error: Optional[str] = None


class Observation(BaseModel):
    """What the agent sees after reset() or step()."""
    task_id: str
    buggy_code: str = Field(description="Current version of the code (may be patched).")
    task_description: str
    test_results: list[TestResult] = Field(default_factory=list)
    passed: int = 0
    total: int = 0
    score: float = 0.0
    done: bool = False
    reward: float = Field(default=0.0, exclude=True) # Required by openenv 0.2 serialization
    error: Optional[str] = None


class State(BaseModel):
    """Episode metadata — returned by state() endpoint."""
    episode_id: str = Field(default_factory=lambda: str(uuid.uuid4()))
    task_id: str = ""
    step_count: int = 0
    max_steps: int = 10
    current_score: float = 0.0
    best_score: float = 0.0


class TaskInfo(BaseModel):
    """Returned by /tasks endpoint."""
    task_id: str
    difficulty: str          # "easy" | "medium" | "hard"
    description: str
    action_schema: dict      # JSON schema of Action for this task