Spaces:
Sleeping
Sleeping
File size: 4,838 Bytes
c06cf60 e461841 c06cf60 e461841 c06cf60 e461841 c06cf60 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 | """
models/models.py
OpenEnv-compliant Pydantic models.
Three public models:
Observation β what the agent sees
Action β what the agent does
Reward β structured reward signal
One internal model (used by env.py):
InternalState β hidden ground truth for grading
"""
from __future__ import annotations
from typing import Any, Dict, List, Optional
from pydantic import BaseModel, field_validator, model_validator
# ---------------------------------------------------------------------------
# 1. Message (building block for conversation_history)
# ---------------------------------------------------------------------------
class Message(BaseModel):
"""A single turn in the conversation."""
role: str # "user" | "assistant"
content: str
# ---------------------------------------------------------------------------
# 2. Observation β what the agent receives at each step
# ---------------------------------------------------------------------------
class Observation(BaseModel):
"""
Everything the agent can see.
Fields:
instruction Original task string (never changes).
known_info Info collected so far: {"time": "10 AM", ...}
conversation_history Full Q&A history as Message objects.
last_response The environment's most recent reply (or None).
"""
instruction: str
known_info: Dict[str, str] = {}
constraints: Dict[str, Any] = {}
conversation_history: List[Message] = []
last_response: Optional[str] = None
# ---------------------------------------------------------------------------
# 3. Action β what the agent can do
# ---------------------------------------------------------------------------
class Action(BaseModel):
"""
Agent action. Two types:
"ask" β ask a clarification question
"execute" β attempt to complete the task
Validation:
- type == "ask" requires question
- type == "execute" requires at least proposed_time OR proposed_participants
"""
type: str # "ask" | "execute"
question: Optional[str] = None # for ask
proposed_time: Optional[str] = None # for execute
proposed_participants: Optional[List[str]] = None # for execute
proposed_location: Optional[str] = None # for execute (optional field)
@field_validator("type")
@classmethod
def type_must_be_valid(cls, v: str) -> str:
if v not in ("ask", "execute"):
raise ValueError(f"Action type must be 'ask' or 'execute', got '{v}'")
return v
@model_validator(mode="after")
def check_fields_for_type(self) -> "Action":
if self.type == "ask":
if not self.question or not self.question.strip():
raise ValueError("Action type 'ask' requires a non-empty 'question'.")
if self.type == "execute":
if self.proposed_time is None and self.proposed_participants is None:
raise ValueError(
"Action type 'execute' requires at least 'proposed_time' "
"or 'proposed_participants'."
)
return self
# ---------------------------------------------------------------------------
# 4. Reward β structured reward signal
# ---------------------------------------------------------------------------
class Reward(BaseModel):
"""
Structured reward returned by the environment.
score β always clamped to [0.0, 1.0]
reason β human-readable explanation (optional)
"""
score: float
reason: Optional[str] = None
@field_validator("score")
@classmethod
def clamp_score(cls, v: float) -> float:
"""Hard clamp: score is always in [0.0, 1.0]."""
return max(0.0, min(1.0, v))
# ---------------------------------------------------------------------------
# 5. InternalState β hidden ground truth (used by env.py, not exposed)
# ---------------------------------------------------------------------------
class InternalState(BaseModel):
"""
Ground truth known only to the environment / grader.
Never sent to the agent directly.
Fields:
true_time Correct answer for the time field.
true_participants Correct answer for participants.
true_location Correct answer for location (optional).
collected_info What has been revealed so far via Q&A.
question_count How many questions the agent has asked.
done Whether the episode is finished.
"""
true_time: str = ""
true_participants: List[str] = []
true_location: Optional[str] = None
constraints: Dict[str, Any] = {}
collected_info: Dict[str, str] = {}
question_count: int = 0
done: bool = False
|