""" Typed models for SupportEnv - Customer Support RL Environment. These Pydantic models define the strict contract between client and server. """ from typing import List, Optional, Literal, Dict, Any from pydantic import Field from openenv.core.env_server import Action, Observation, State class SupportAction(Action): """ Action that an agent can take in the support environment. action_type: The type of action to perform - "classify": Categorize the ticket (billing, technical, general, account) - "respond": Send a response to the customer - "escalate": Escalate to human agent - "request_info": Ask customer for more information - "resolve": Mark ticket as resolved - "lookup_kb": Query the knowledge base for policy/procedure info content: The actual content of the action - For classify: the category label - For respond: the response text - For escalate: reason for escalation - For request_info: what information is needed - For resolve: resolution summary - For lookup_kb: search query (e.g. "refund", "billing", "password") confidence: Optional confidence score (0.0-1.0) for the action """ action_type: Literal["classify", "respond", "escalate", "request_info", "resolve", "lookup_kb"] content: str confidence: Optional[float] = Field(default=None, ge=0.0, le=1.0) class SupportObservation(Observation): """ Observation returned by the environment after each step. Inherits from Observation base class which provides: - done: bool - whether episode is complete - reward: Optional[float] - reward for this step """ # Ticket information ticket_id: str ticket_text: str ticket_subject: str customer_name: str # Context interaction_history: List[Dict[str, str]] = Field(default_factory=list) customer_sentiment: float = Field(ge=-1.0, le=1.0) # -1 = angry, 0 = neutral, 1 = happy # Current state current_classification: Optional[str] = None is_classified: bool = False is_escalated: bool = False # Metadata task_difficulty: Literal["easy", "medium", "hard"] steps_remaining: int max_steps: int # Feedback message: str = "" available_actions: List[str] = Field(default_factory=lambda: [ "classify", "respond", "escalate", "request_info", "resolve", "lookup_kb" ]) class SupportState(State): """ Internal state of the environment (for debugging/monitoring). Inherits from State base class which provides: - episode_id: Optional[str] - step_count: int """ # Target information (HIDDEN from default model_dump) target_category: str = Field(default="", exclude=True) target_resolution: str = Field(default="", exclude=True) requires_escalation: bool = Field(default=False, exclude=True) # Episode tracking task_id: str = "" task_difficulty: str = "" max_steps: int = 10 # Performance tracking classification_correct: bool = False response_quality_score: float = 0.01 escalation_correct: bool = False resolved: bool = False # Cumulative metrics total_reward: float = 0.01 customer_sentiment: float = 0.01 class PublicSupportState(State): """ Public state of the environment returned to the agent. Excludes secret target fields to prevent information leaks. """ # Episode tracking task_id: str = "" task_difficulty: str = "" max_steps: int = 10 # Performance tracking classification_correct: bool = False response_quality_score: float = 0.01 escalation_correct: bool = False resolved: bool = False # Cumulative metrics total_reward: float = 0.01 customer_sentiment: float = 0.01