Spaces:
Sleeping
Sleeping
| from pydantic import BaseModel,Field | |
| from typing import List, Optional, Dict, Any,Literal | |
| # ACTION (Agent -> Environment) | |
| class WorkSpaceAction(BaseModel): | |
| action_type: Literal["message_expert", "propose_draft", "submit_final"] = Field(description="the type of action the agent is taking") | |
| target: Optional[Literal["Security", "Finance", "UX", "All"]] = Field( | |
| default=None, | |
| description="Who the agent is talking to. Null if submitting final." | |
| ) | |
| content: str = Field( | |
| description="The actual message, question, or draft payload." | |
| ) | |
| # INTERNAL STATE (Environment's Brain - Hidden from Agent) | |
| class ExpertState(BaseModel): | |
| name: str | |
| hidden_constraint: str | |
| frustration_level: float = 0.0 # Goes up if the agent ignores them... | |
| constraint_discovered_by_agent: bool = False # Used to give the +0.1 dense reward... | |
| constraint_shifted: bool = False | |
| class WorkspaceState(BaseModel): | |
| turn_count: int = 0 | |
| max_turns: int = 15 | |
| experts: Dict[str, ExpertState] | |
| chat_history: List[Dict[str, str]] # Full transcript of the negotiation | |
| is_done: bool = False | |
| # OBSERVATION (Environment -> Agent) | |
| class WorkspaceObservation(BaseModel): | |
| feedback: str = Field(description="The reply from the expert(s) or system.") | |
| current_turn: int | |
| reward: float = Field(description="Step reward (e.g., +0.1) or Final Harmonic Mean.") | |
| done: bool |