Spaces:

yashshinde0080
/

SupportEnv

Sleeping

App Files Files Community

SupportEnv / models.py

yashshinde0080

10/4/2026

30e1fea about 2 months ago

raw

history blame contribute delete

3.85 kB

	"""
	Typed models for SupportEnv - Customer Support RL Environment.
	These Pydantic models define the strict contract between client and server.
	"""

	from typing import List, Optional, Literal, Dict, Any
	from pydantic import Field
	from openenv.core.env_server import Action, Observation, State


	class SupportAction(Action):
	"""
	Action that an agent can take in the support environment.

	action_type: The type of action to perform
	- "classify": Categorize the ticket (billing, technical, general, account)
	- "respond": Send a response to the customer
	- "escalate": Escalate to human agent
	- "request_info": Ask customer for more information
	- "resolve": Mark ticket as resolved
	- "lookup_kb": Query the knowledge base for policy/procedure info

	content: The actual content of the action
	- For classify: the category label
	- For respond: the response text
	- For escalate: reason for escalation
	- For request_info: what information is needed
	- For resolve: resolution summary
	- For lookup_kb: search query (e.g. "refund", "billing", "password")

	confidence: Optional confidence score (0.0-1.0) for the action
	"""
	action_type: Literal["classify", "respond", "escalate", "request_info", "resolve", "lookup_kb"]
	content: str
	confidence: Optional[float] = Field(default=None, ge=0.0, le=1.0)


	class SupportObservation(Observation):
	"""
	Observation returned by the environment after each step.

	Inherits from Observation base class which provides:
	- done: bool - whether episode is complete
	- reward: Optional[float] - reward for this step
	"""
	# Ticket information
	ticket_id: str
	ticket_text: str
	ticket_subject: str
	customer_name: str

	# Context
	interaction_history: List[Dict[str, str]] = Field(default_factory=list)
	customer_sentiment: float = Field(ge=-1.0, le=1.0) # -1 = angry, 0 = neutral, 1 = happy

	# Current state
	current_classification: Optional[str] = None
	is_classified: bool = False
	is_escalated: bool = False

	# Metadata
	task_difficulty: Literal["easy", "medium", "hard"]
	steps_remaining: int
	max_steps: int

	# Feedback
	message: str = ""
	available_actions: List[str] = Field(default_factory=lambda: [
	"classify", "respond", "escalate", "request_info", "resolve", "lookup_kb"
	])


	class SupportState(State):
	"""
	Internal state of the environment (for debugging/monitoring).

	Inherits from State base class which provides:
	- episode_id: Optional[str]
	- step_count: int
	"""
	# Target information (HIDDEN from default model_dump)
	target_category: str = Field(default="", exclude=True)
	target_resolution: str = Field(default="", exclude=True)
	requires_escalation: bool = Field(default=False, exclude=True)

	# Episode tracking
	task_id: str = ""
	task_difficulty: str = ""
	max_steps: int = 10

	# Performance tracking
	classification_correct: bool = False
	response_quality_score: float = 0.01
	escalation_correct: bool = False
	resolved: bool = False

	# Cumulative metrics
	total_reward: float = 0.01
	customer_sentiment: float = 0.01


	class PublicSupportState(State):
	"""
	Public state of the environment returned to the agent.
	Excludes secret target fields to prevent information leaks.
	"""
	# Episode tracking
	task_id: str = ""
	task_difficulty: str = ""
	max_steps: int = 10

	# Performance tracking
	classification_correct: bool = False
	response_quality_score: float = 0.01
	escalation_correct: bool = False
	resolved: bool = False

	# Cumulative metrics
	total_reward: float = 0.01
	customer_sentiment: float = 0.01