"""
models/models.py
OpenEnv-compliant Pydantic models.

Three public models:
  Observation  → what the agent sees
  Action       → what the agent does
  Reward       → structured reward signal

One internal model (used by env.py):
  InternalState → hidden ground truth for grading
"""

from __future__ import annotations

from typing import Any, Dict, List, Optional
from pydantic import BaseModel, field_validator, model_validator


# ---------------------------------------------------------------------------
# 1. Message (building block for conversation_history)
# ---------------------------------------------------------------------------

class Message(BaseModel):
    """A single turn in the conversation."""
    role: str      # "user" | "assistant"
    content: str


# ---------------------------------------------------------------------------
# 2. Observation — what the agent receives at each step
# ---------------------------------------------------------------------------

class Observation(BaseModel):
    """
    Everything the agent can see.

    Fields:
      instruction           Original task string (never changes).
      known_info            Info collected so far: {"time": "10 AM", ...}
      conversation_history  Full Q&A history as Message objects.
      last_response         The environment's most recent reply (or None).
    """
    instruction: str
    known_info: Dict[str, str] = {}
    constraints: Dict[str, Any] = {}
    conversation_history: List[Message] = []
    last_response: Optional[str] = None


# ---------------------------------------------------------------------------
# 3. Action — what the agent can do
# ---------------------------------------------------------------------------

class Action(BaseModel):
    """
    Agent action. Two types:
      "ask"     → ask a clarification question
      "execute" → attempt to complete the task

    Validation:
      - type == "ask"     requires question
      - type == "execute" requires at least proposed_time OR proposed_participants
    """
    type: str                                    # "ask" | "execute"
    question: Optional[str] = None               # for ask
    proposed_time: Optional[str] = None          # for execute
    proposed_participants: Optional[List[str]] = None  # for execute
    proposed_location: Optional[str] = None      # for execute (optional field)

    @field_validator("type")
    @classmethod
    def type_must_be_valid(cls, v: str) -> str:
        if v not in ("ask", "execute"):
            raise ValueError(f"Action type must be 'ask' or 'execute', got '{v}'")
        return v

    @model_validator(mode="after")
    def check_fields_for_type(self) -> "Action":
        if self.type == "ask":
            if not self.question or not self.question.strip():
                raise ValueError("Action type 'ask' requires a non-empty 'question'.")
        if self.type == "execute":
            if self.proposed_time is None and self.proposed_participants is None:
                raise ValueError(
                    "Action type 'execute' requires at least 'proposed_time' "
                    "or 'proposed_participants'."
                )
        return self


# ---------------------------------------------------------------------------
# 4. Reward — structured reward signal
# ---------------------------------------------------------------------------

class Reward(BaseModel):
    """
    Structured reward returned by the environment.

    score  → always clamped to [0.0, 1.0]
    reason → human-readable explanation (optional)
    """
    score: float
    reason: Optional[str] = None

    @field_validator("score")
    @classmethod
    def clamp_score(cls, v: float) -> float:
        """Hard clamp: score is always in [0.0, 1.0]."""
        return max(0.0, min(1.0, v))


# ---------------------------------------------------------------------------
# 5. InternalState — hidden ground truth (used by env.py, not exposed)
# ---------------------------------------------------------------------------

class InternalState(BaseModel):
    """
    Ground truth known only to the environment / grader.
    Never sent to the agent directly.

    Fields:
      true_time          Correct answer for the time field.
      true_participants  Correct answer for participants.
      true_location      Correct answer for location (optional).
      collected_info     What has been revealed so far via Q&A.
      question_count     How many questions the agent has asked.
      done               Whether the episode is finished.
    """
    true_time: str = ""
    true_participants: List[str] = []
    true_location: Optional[str] = None
    constraints: Dict[str, Any] = {}

    collected_info: Dict[str, str] = {}
    question_count: int = 0
    done: bool = False