Spaces:

Yaser77
/

ambiguity-env

Sleeping

Yaser77

feat: implement dynamic constraint system with stochastic generation and UI visualization

e461841 about 2 months ago

4.84 kB

	"""
	models/models.py
	OpenEnv-compliant Pydantic models.

	Three public models:
	Observation → what the agent sees
	Action → what the agent does
	Reward → structured reward signal

	One internal model (used by env.py):
	InternalState → hidden ground truth for grading
	"""

	from __future__ import annotations

	from typing import Any, Dict, List, Optional
	from pydantic import BaseModel, field_validator, model_validator


	# ---------------------------------------------------------------------------
	# 1. Message (building block for conversation_history)
	# ---------------------------------------------------------------------------

	class Message(BaseModel):
	"""A single turn in the conversation."""
	role: str # "user" \| "assistant"
	content: str


	# ---------------------------------------------------------------------------
	# 2. Observation — what the agent receives at each step
	# ---------------------------------------------------------------------------

	class Observation(BaseModel):
	"""
	Everything the agent can see.

	Fields:
	instruction Original task string (never changes).
	known_info Info collected so far: {"time": "10 AM", ...}
	conversation_history Full Q&A history as Message objects.
	last_response The environment's most recent reply (or None).
	"""
	instruction: str
	known_info: Dict[str, str] = {}
	constraints: Dict[str, Any] = {}
	conversation_history: List[Message] = []
	last_response: Optional[str] = None


	# ---------------------------------------------------------------------------
	# 3. Action — what the agent can do
	# ---------------------------------------------------------------------------

	class Action(BaseModel):
	"""
	Agent action. Two types:
	"ask" → ask a clarification question
	"execute" → attempt to complete the task

	Validation:
	- type == "ask" requires question
	- type == "execute" requires at least proposed_time OR proposed_participants
	"""
	type: str # "ask" \| "execute"
	question: Optional[str] = None # for ask
	proposed_time: Optional[str] = None # for execute
	proposed_participants: Optional[List[str]] = None # for execute
	proposed_location: Optional[str] = None # for execute (optional field)

	@field_validator("type")
	@classmethod
	def type_must_be_valid(cls, v: str) -> str:
	if v not in ("ask", "execute"):
	raise ValueError(f"Action type must be 'ask' or 'execute', got '{v}'")
	return v

	@model_validator(mode="after")
	def check_fields_for_type(self) -> "Action":
	if self.type == "ask":
	if not self.question or not self.question.strip():
	raise ValueError("Action type 'ask' requires a non-empty 'question'.")
	if self.type == "execute":
	if self.proposed_time is None and self.proposed_participants is None:
	raise ValueError(
	"Action type 'execute' requires at least 'proposed_time' "
	"or 'proposed_participants'."
	)
	return self


	# ---------------------------------------------------------------------------
	# 4. Reward — structured reward signal
	# ---------------------------------------------------------------------------

	class Reward(BaseModel):
	"""
	Structured reward returned by the environment.

	score → always clamped to [0.0, 1.0]
	reason → human-readable explanation (optional)
	"""
	score: float
	reason: Optional[str] = None

	@field_validator("score")
	@classmethod
	def clamp_score(cls, v: float) -> float:
	"""Hard clamp: score is always in [0.0, 1.0]."""
	return max(0.0, min(1.0, v))


	# ---------------------------------------------------------------------------
	# 5. InternalState — hidden ground truth (used by env.py, not exposed)
	# ---------------------------------------------------------------------------

	class InternalState(BaseModel):
	"""
	Ground truth known only to the environment / grader.
	Never sent to the agent directly.

	Fields:
	true_time Correct answer for the time field.
	true_participants Correct answer for participants.
	true_location Correct answer for location (optional).
	collected_info What has been revealed so far via Q&A.
	question_count How many questions the agent has asked.
	done Whether the episode is finished.
	"""
	true_time: str = ""
	true_participants: List[str] = []
	true_location: Optional[str] = None
	constraints: Dict[str, Any] = {}

	collected_info: Dict[str, str] = {}
	question_count: int = 0
	done: bool = False