""" models/models.py OpenEnv-compliant Pydantic models. Three public models: Observation → what the agent sees Action → what the agent does Reward → structured reward signal One internal model (used by env.py): InternalState → hidden ground truth for grading """ from __future__ import annotations from typing import Any, Dict, List, Optional from pydantic import BaseModel, field_validator, model_validator # --------------------------------------------------------------------------- # 1. Message (building block for conversation_history) # --------------------------------------------------------------------------- class Message(BaseModel): """A single turn in the conversation.""" role: str # "user" | "assistant" content: str # --------------------------------------------------------------------------- # 2. Observation — what the agent receives at each step # --------------------------------------------------------------------------- class Observation(BaseModel): """ Everything the agent can see. Fields: instruction Original task string (never changes). known_info Info collected so far: {"time": "10 AM", ...} conversation_history Full Q&A history as Message objects. last_response The environment's most recent reply (or None). """ instruction: str known_info: Dict[str, str] = {} constraints: Dict[str, Any] = {} conversation_history: List[Message] = [] last_response: Optional[str] = None # --------------------------------------------------------------------------- # 3. Action — what the agent can do # --------------------------------------------------------------------------- class Action(BaseModel): """ Agent action. Two types: "ask" → ask a clarification question "execute" → attempt to complete the task Validation: - type == "ask" requires question - type == "execute" requires at least proposed_time OR proposed_participants """ type: str # "ask" | "execute" question: Optional[str] = None # for ask proposed_time: Optional[str] = None # for execute proposed_participants: Optional[List[str]] = None # for execute proposed_location: Optional[str] = None # for execute (optional field) @field_validator("type") @classmethod def type_must_be_valid(cls, v: str) -> str: if v not in ("ask", "execute"): raise ValueError(f"Action type must be 'ask' or 'execute', got '{v}'") return v @model_validator(mode="after") def check_fields_for_type(self) -> "Action": if self.type == "ask": if not self.question or not self.question.strip(): raise ValueError("Action type 'ask' requires a non-empty 'question'.") if self.type == "execute": if self.proposed_time is None and self.proposed_participants is None: raise ValueError( "Action type 'execute' requires at least 'proposed_time' " "or 'proposed_participants'." ) return self # --------------------------------------------------------------------------- # 4. Reward — structured reward signal # --------------------------------------------------------------------------- class Reward(BaseModel): """ Structured reward returned by the environment. score → always clamped to [0.0, 1.0] reason → human-readable explanation (optional) """ score: float reason: Optional[str] = None @field_validator("score") @classmethod def clamp_score(cls, v: float) -> float: """Hard clamp: score is always in [0.0, 1.0].""" return max(0.0, min(1.0, v)) # --------------------------------------------------------------------------- # 5. InternalState — hidden ground truth (used by env.py, not exposed) # --------------------------------------------------------------------------- class InternalState(BaseModel): """ Ground truth known only to the environment / grader. Never sent to the agent directly. Fields: true_time Correct answer for the time field. true_participants Correct answer for participants. true_location Correct answer for location (optional). collected_info What has been revealed so far via Q&A. question_count How many questions the agent has asked. done Whether the episode is finished. """ true_time: str = "" true_participants: List[str] = [] true_location: Optional[str] = None constraints: Dict[str, Any] = {} collected_info: Dict[str, str] = {} question_count: int = 0 done: bool = False