from pydantic import BaseModel,Field from typing import List, Optional, Dict, Any,Literal # ACTION (Agent -> Environment) class WorkSpaceAction(BaseModel): action_type: Literal["message_expert", "propose_draft", "submit_final"] = Field(description="the type of action the agent is taking") target: Optional[Literal["Security", "Finance", "UX", "All"]] = Field( default=None, description="Who the agent is talking to. Null if submitting final." ) content: str = Field( description="The actual message, question, or draft payload." ) # INTERNAL STATE (Environment's Brain - Hidden from Agent) class ExpertState(BaseModel): name: str hidden_constraint: str frustration_level: float = 0.0 # Goes up if the agent ignores them... constraint_discovered_by_agent: bool = False # Used to give the +0.1 dense reward... constraint_shifted: bool = False class WorkspaceState(BaseModel): turn_count: int = 0 max_turns: int = 15 experts: Dict[str, ExpertState] chat_history: List[Dict[str, str]] # Full transcript of the negotiation is_done: bool = False # OBSERVATION (Environment -> Agent) class WorkspaceObservation(BaseModel): feedback: str = Field(description="The reply from the expert(s) or system.") current_turn: int reward: float = Field(description="Step reward (e.g., +0.1) or Final Harmonic Mean.") done: bool