from pydantic import BaseModel,Field
from typing import List, Optional, Dict, Any,Literal


# ACTION (Agent -> Environment)
class WorkSpaceAction(BaseModel):
    action_type: Literal["message_expert", "propose_draft", "submit_final"] = Field(description="the type of action the agent is taking")

    target: Optional[Literal["Security", "Finance", "UX", "All"]] = Field(
        default=None, 
        description="Who the agent is talking to. Null if submitting final."
    )

    content: str = Field(
        description="The actual message, question, or draft payload."
    )

# INTERNAL STATE (Environment's Brain - Hidden from Agent)
class ExpertState(BaseModel):
    name: str
    hidden_constraint: str
    frustration_level: float = 0.0 # Goes up if the agent ignores them...
    constraint_discovered_by_agent: bool = False # Used to give the +0.1 dense reward...
    constraint_shifted: bool = False

class WorkspaceState(BaseModel):
    turn_count: int = 0
    max_turns: int = 15
    experts: Dict[str, ExpertState]
    chat_history: List[Dict[str, str]] # Full transcript of the negotiation
    is_done: bool = False

# OBSERVATION (Environment -> Agent)
class WorkspaceObservation(BaseModel):
    feedback: str = Field(description="The reply from the expert(s) or system.")
    current_turn: int
    reward: float = Field(description="Step reward (e.g., +0.1) or Final Harmonic Mean.")
    done: bool