Spaces:
Sleeping
Sleeping
| """ | |
| Typed models for SupportEnv - Customer Support RL Environment. | |
| These Pydantic models define the strict contract between client and server. | |
| """ | |
| from typing import List, Optional, Literal, Dict, Any | |
| from pydantic import Field | |
| from openenv.core.env_server import Action, Observation, State | |
| class SupportAction(Action): | |
| """ | |
| Action that an agent can take in the support environment. | |
| action_type: The type of action to perform | |
| - "classify": Categorize the ticket (billing, technical, general, account) | |
| - "respond": Send a response to the customer | |
| - "escalate": Escalate to human agent | |
| - "request_info": Ask customer for more information | |
| - "resolve": Mark ticket as resolved | |
| - "lookup_kb": Query the knowledge base for policy/procedure info | |
| content: The actual content of the action | |
| - For classify: the category label | |
| - For respond: the response text | |
| - For escalate: reason for escalation | |
| - For request_info: what information is needed | |
| - For resolve: resolution summary | |
| - For lookup_kb: search query (e.g. "refund", "billing", "password") | |
| confidence: Optional confidence score (0.0-1.0) for the action | |
| """ | |
| action_type: Literal["classify", "respond", "escalate", "request_info", "resolve", "lookup_kb"] | |
| content: str | |
| confidence: Optional[float] = Field(default=None, ge=0.0, le=1.0) | |
| class SupportObservation(Observation): | |
| """ | |
| Observation returned by the environment after each step. | |
| Inherits from Observation base class which provides: | |
| - done: bool - whether episode is complete | |
| - reward: Optional[float] - reward for this step | |
| """ | |
| # Ticket information | |
| ticket_id: str | |
| ticket_text: str | |
| ticket_subject: str | |
| customer_name: str | |
| # Context | |
| interaction_history: List[Dict[str, str]] = Field(default_factory=list) | |
| customer_sentiment: float = Field(ge=-1.0, le=1.0) # -1 = angry, 0 = neutral, 1 = happy | |
| # Current state | |
| current_classification: Optional[str] = None | |
| is_classified: bool = False | |
| is_escalated: bool = False | |
| # Metadata | |
| task_difficulty: Literal["easy", "medium", "hard"] | |
| steps_remaining: int | |
| max_steps: int | |
| # Feedback | |
| message: str = "" | |
| available_actions: List[str] = Field(default_factory=lambda: [ | |
| "classify", "respond", "escalate", "request_info", "resolve", "lookup_kb" | |
| ]) | |
| class SupportState(State): | |
| """ | |
| Internal state of the environment (for debugging/monitoring). | |
| Inherits from State base class which provides: | |
| - episode_id: Optional[str] | |
| - step_count: int | |
| """ | |
| # Target information (HIDDEN from default model_dump) | |
| target_category: str = Field(default="", exclude=True) | |
| target_resolution: str = Field(default="", exclude=True) | |
| requires_escalation: bool = Field(default=False, exclude=True) | |
| # Episode tracking | |
| task_id: str = "" | |
| task_difficulty: str = "" | |
| max_steps: int = 10 | |
| # Performance tracking | |
| classification_correct: bool = False | |
| response_quality_score: float = 0.01 | |
| escalation_correct: bool = False | |
| resolved: bool = False | |
| # Cumulative metrics | |
| total_reward: float = 0.01 | |
| customer_sentiment: float = 0.01 | |
| class PublicSupportState(State): | |
| """ | |
| Public state of the environment returned to the agent. | |
| Excludes secret target fields to prevent information leaks. | |
| """ | |
| # Episode tracking | |
| task_id: str = "" | |
| task_difficulty: str = "" | |
| max_steps: int = 10 | |
| # Performance tracking | |
| classification_correct: bool = False | |
| response_quality_score: float = 0.01 | |
| escalation_correct: bool = False | |
| resolved: bool = False | |
| # Cumulative metrics | |
| total_reward: float = 0.01 | |
| customer_sentiment: float = 0.01 |