support-env / server /environment.py
SwapnilPatil28's picture
Upload folder using huggingface_hub
73d1307 verified
import uuid
from typing import List, Dict
from openenv.core.env_server import Environment
from models import SupportAction, SupportObservation, SupportState
class SupportEnvironment(Environment):
def __init__(self):
super().__init__()
self._db = {
"T1": "Customer History: Frequent refunder. Status: Valid invoice.",
"T4": "System Log: Payment gateway timed out. Card ending in 4242.",
"T6": "Dev Log: 500 error on /api/auth. Cluster: us-east-1."
}
self.tasks = {
"easy": [{"id": "T1", "text": "Refund status?", "dept": "Billing"}],
"medium": [{"id": "T2", "text": "Upgrade account?", "dept": "Sales"},
{"id": "T3", "text": "App keeps crashing.", "dept": "Tech"}],
"hard": [{"id": "T4", "text": "Payment failed!", "dept": "Billing"},
{"id": "T5", "text": "Bulk pricing?", "dept": "Sales"},
{"id": "T6", "text": "API Auth failure.", "dept": "Tech"}]
}
def reset(self, task_name: str = "easy") -> SupportObservation:
self.current_task = self.tasks.get(task_name, self.tasks["easy"])
self._state = SupportState(episode_id=str(uuid.uuid4()), task_id=task_name, current_ticket_index=0)
t = self.current_task[0]
return SupportObservation(done=False, reward=0.01, ticket_id=t["id"], content=t["text"])
def step(self, action: SupportAction) -> SupportObservation:
self._state.step_count += 1
ticket = self.current_task[self._state.current_ticket_index]
if action.action_type == "search":
# Small penalty but staying > 0
return SupportObservation(done=False, reward=0.01, ticket_id=ticket["id"], content=ticket["text"], search_result=self._db.get(ticket["id"], "No record."))
correct = action.department.strip().lower() == ticket["dept"].lower()
# Strictly between 0 and 1: 0.99 for correct, 0.01 for incorrect
reward = 0.99 if correct else 0.01
self._state.current_ticket_index += 1
if self._state.current_ticket_index < len(self.current_task):
t = self.current_task[self._state.current_ticket_index]
return SupportObservation(done=False, reward=reward, ticket_id=t["id"], content=t["text"])
return SupportObservation(done=True, reward=reward, ticket_id="EOF", content="Done.")
@property
def state(self) -> SupportState:
return self._state