Spaces:
Sleeping
Sleeping
Yaser77
feat: implement dynamic constraint system with stochastic generation and UI visualization
e461841 | """ | |
| models/models.py | |
| OpenEnv-compliant Pydantic models. | |
| Three public models: | |
| Observation β what the agent sees | |
| Action β what the agent does | |
| Reward β structured reward signal | |
| One internal model (used by env.py): | |
| InternalState β hidden ground truth for grading | |
| """ | |
| from __future__ import annotations | |
| from typing import Any, Dict, List, Optional | |
| from pydantic import BaseModel, field_validator, model_validator | |
| # --------------------------------------------------------------------------- | |
| # 1. Message (building block for conversation_history) | |
| # --------------------------------------------------------------------------- | |
| class Message(BaseModel): | |
| """A single turn in the conversation.""" | |
| role: str # "user" | "assistant" | |
| content: str | |
| # --------------------------------------------------------------------------- | |
| # 2. Observation β what the agent receives at each step | |
| # --------------------------------------------------------------------------- | |
| class Observation(BaseModel): | |
| """ | |
| Everything the agent can see. | |
| Fields: | |
| instruction Original task string (never changes). | |
| known_info Info collected so far: {"time": "10 AM", ...} | |
| conversation_history Full Q&A history as Message objects. | |
| last_response The environment's most recent reply (or None). | |
| """ | |
| instruction: str | |
| known_info: Dict[str, str] = {} | |
| constraints: Dict[str, Any] = {} | |
| conversation_history: List[Message] = [] | |
| last_response: Optional[str] = None | |
| # --------------------------------------------------------------------------- | |
| # 3. Action β what the agent can do | |
| # --------------------------------------------------------------------------- | |
| class Action(BaseModel): | |
| """ | |
| Agent action. Two types: | |
| "ask" β ask a clarification question | |
| "execute" β attempt to complete the task | |
| Validation: | |
| - type == "ask" requires question | |
| - type == "execute" requires at least proposed_time OR proposed_participants | |
| """ | |
| type: str # "ask" | "execute" | |
| question: Optional[str] = None # for ask | |
| proposed_time: Optional[str] = None # for execute | |
| proposed_participants: Optional[List[str]] = None # for execute | |
| proposed_location: Optional[str] = None # for execute (optional field) | |
| def type_must_be_valid(cls, v: str) -> str: | |
| if v not in ("ask", "execute"): | |
| raise ValueError(f"Action type must be 'ask' or 'execute', got '{v}'") | |
| return v | |
| def check_fields_for_type(self) -> "Action": | |
| if self.type == "ask": | |
| if not self.question or not self.question.strip(): | |
| raise ValueError("Action type 'ask' requires a non-empty 'question'.") | |
| if self.type == "execute": | |
| if self.proposed_time is None and self.proposed_participants is None: | |
| raise ValueError( | |
| "Action type 'execute' requires at least 'proposed_time' " | |
| "or 'proposed_participants'." | |
| ) | |
| return self | |
| # --------------------------------------------------------------------------- | |
| # 4. Reward β structured reward signal | |
| # --------------------------------------------------------------------------- | |
| class Reward(BaseModel): | |
| """ | |
| Structured reward returned by the environment. | |
| score β always clamped to [0.0, 1.0] | |
| reason β human-readable explanation (optional) | |
| """ | |
| score: float | |
| reason: Optional[str] = None | |
| def clamp_score(cls, v: float) -> float: | |
| """Hard clamp: score is always in [0.0, 1.0].""" | |
| return max(0.0, min(1.0, v)) | |
| # --------------------------------------------------------------------------- | |
| # 5. InternalState β hidden ground truth (used by env.py, not exposed) | |
| # --------------------------------------------------------------------------- | |
| class InternalState(BaseModel): | |
| """ | |
| Ground truth known only to the environment / grader. | |
| Never sent to the agent directly. | |
| Fields: | |
| true_time Correct answer for the time field. | |
| true_participants Correct answer for participants. | |
| true_location Correct answer for location (optional). | |
| collected_info What has been revealed so far via Q&A. | |
| question_count How many questions the agent has asked. | |
| done Whether the episode is finished. | |
| """ | |
| true_time: str = "" | |
| true_participants: List[str] = [] | |
| true_location: Optional[str] = None | |
| constraints: Dict[str, Any] = {} | |
| collected_info: Dict[str, str] = {} | |
| question_count: int = 0 | |
| done: bool = False | |