ambiguity-env / models /models.py
Yaser77
feat: implement dynamic constraint system with stochastic generation and UI visualization
e461841
"""
models/models.py
OpenEnv-compliant Pydantic models.
Three public models:
Observation β†’ what the agent sees
Action β†’ what the agent does
Reward β†’ structured reward signal
One internal model (used by env.py):
InternalState β†’ hidden ground truth for grading
"""
from __future__ import annotations
from typing import Any, Dict, List, Optional
from pydantic import BaseModel, field_validator, model_validator
# ---------------------------------------------------------------------------
# 1. Message (building block for conversation_history)
# ---------------------------------------------------------------------------
class Message(BaseModel):
"""A single turn in the conversation."""
role: str # "user" | "assistant"
content: str
# ---------------------------------------------------------------------------
# 2. Observation β€” what the agent receives at each step
# ---------------------------------------------------------------------------
class Observation(BaseModel):
"""
Everything the agent can see.
Fields:
instruction Original task string (never changes).
known_info Info collected so far: {"time": "10 AM", ...}
conversation_history Full Q&A history as Message objects.
last_response The environment's most recent reply (or None).
"""
instruction: str
known_info: Dict[str, str] = {}
constraints: Dict[str, Any] = {}
conversation_history: List[Message] = []
last_response: Optional[str] = None
# ---------------------------------------------------------------------------
# 3. Action β€” what the agent can do
# ---------------------------------------------------------------------------
class Action(BaseModel):
"""
Agent action. Two types:
"ask" β†’ ask a clarification question
"execute" β†’ attempt to complete the task
Validation:
- type == "ask" requires question
- type == "execute" requires at least proposed_time OR proposed_participants
"""
type: str # "ask" | "execute"
question: Optional[str] = None # for ask
proposed_time: Optional[str] = None # for execute
proposed_participants: Optional[List[str]] = None # for execute
proposed_location: Optional[str] = None # for execute (optional field)
@field_validator("type")
@classmethod
def type_must_be_valid(cls, v: str) -> str:
if v not in ("ask", "execute"):
raise ValueError(f"Action type must be 'ask' or 'execute', got '{v}'")
return v
@model_validator(mode="after")
def check_fields_for_type(self) -> "Action":
if self.type == "ask":
if not self.question or not self.question.strip():
raise ValueError("Action type 'ask' requires a non-empty 'question'.")
if self.type == "execute":
if self.proposed_time is None and self.proposed_participants is None:
raise ValueError(
"Action type 'execute' requires at least 'proposed_time' "
"or 'proposed_participants'."
)
return self
# ---------------------------------------------------------------------------
# 4. Reward β€” structured reward signal
# ---------------------------------------------------------------------------
class Reward(BaseModel):
"""
Structured reward returned by the environment.
score β†’ always clamped to [0.0, 1.0]
reason β†’ human-readable explanation (optional)
"""
score: float
reason: Optional[str] = None
@field_validator("score")
@classmethod
def clamp_score(cls, v: float) -> float:
"""Hard clamp: score is always in [0.0, 1.0]."""
return max(0.0, min(1.0, v))
# ---------------------------------------------------------------------------
# 5. InternalState β€” hidden ground truth (used by env.py, not exposed)
# ---------------------------------------------------------------------------
class InternalState(BaseModel):
"""
Ground truth known only to the environment / grader.
Never sent to the agent directly.
Fields:
true_time Correct answer for the time field.
true_participants Correct answer for participants.
true_location Correct answer for location (optional).
collected_info What has been revealed so far via Q&A.
question_count How many questions the agent has asked.
done Whether the episode is finished.
"""
true_time: str = ""
true_participants: List[str] = []
true_location: Optional[str] = None
constraints: Dict[str, Any] = {}
collected_info: Dict[str, str] = {}
question_count: int = 0
done: bool = False