support-ticket-env / server /support_environment.py
Vighnesh
Fix #5: accumulate Task 2 classification credit into final score β€” action scaled to 0.7 max, classify adds up to 0.3, total 1.0
55ff252
"""
Customer Support Ticket Resolution β€” OpenEnv Environment (server side).
Implements the three tasks:
Task 1 (easy) – Classify a single ticket
Task 2 (medium) – Choose the correct action for a classified ticket
Task 3 (hard) – Fully resolve a queue of tickets with minimal steps
"""
from __future__ import annotations
import random
from typing import Optional
from openenv.core.env_server.interfaces import Environment
from openenv.core.env_server.types import State
from support_ticket_env.models import SupportAction, SupportObservation, SupportState
from support_ticket_env.tickets import TICKETS, TICKET_LOOKUP
from support_ticket_env.graders import (
grade_task1,
grade_task2,
grade_task3,
loop_penalty,
)
class SupportTicketEnvironment(Environment):
"""
OpenEnv environment that simulates a customer-support triage desk.
The task_id (1, 2, or 3) is set when the environment is reset.
"""
SUPPORTS_CONCURRENT_SESSIONS = True
def __init__(self) -> None:
super().__init__()
self._task_id: int = 1
self._ticket: dict = {}
self._classified: bool = False
self._classified_correctly: bool = False # tracks actual correctness, not just attempt
self._task2_cls_score: float = 0.0 # accumulated classification partial credit for Task 2
self._resolved: bool = False
self._step_count: int = 0
self._total_reward: float = 0.0
self._episode_id: Optional[str] = None
# Task 3: queue of tickets
self._queue: list[dict] = []
self._tickets_resolved: int = 0
self._tickets_total: int = 1
def get_metadata(self):
from openenv.core.env_server.types import EnvironmentMetadata
return EnvironmentMetadata(
name="support_ticket_env",
description="A real-world customer support ticket triage environment where an AI agent classifies tickets, selects actions, and resolves queues.",
version="1.0.0",
author="AlgoCore",
documentation_url="https://github.com/TryingHardToBeDeveloper/support-ticket-env",
)
# ──────────────────────── reset ────────────────────────────
def reset(
self,
seed: Optional[int] = None,
episode_id: Optional[str] = None,
task_id: int = 1,
**kwargs,
) -> SupportObservation:
rng = random.Random(seed)
self._episode_id = episode_id
self._task_id = int(task_id)
self._step_count = 0
self._total_reward = 0.0
self._classified = False
self._classified_correctly = False
self._task2_cls_score = 0.0
self._resolved = False
if self._task_id == 3:
# Give the agent a queue of 3 tickets
self._queue = rng.sample(TICKETS, k=3)
self._tickets_total = len(self._queue)
self._tickets_resolved = 0
self._ticket = self._queue[0]
else:
self._ticket = rng.choice(TICKETS)
self._tickets_total = 1
self._tickets_resolved = 0
return self._make_obs(
feedback="New episode started. Read the ticket and take action.",
score=0.0,
)
# ──────────────────────── step ─────────────────────────────
def step(self, action: SupportAction, **kwargs) -> SupportObservation: # type: ignore[override]
self._step_count += 1
penalty = loop_penalty(self._step_count)
if self._task_id == 1:
obs = self._step_task1(action)
elif self._task_id == 2:
obs = self._step_task2(action)
else:
obs = self._step_task3(action)
# Apply loop penalty on top of step reward
obs.reward = (obs.reward or 0.0) + penalty
obs.reward = round(max(-1.0, min(1.0, obs.reward)), 4)
self._total_reward += obs.reward
obs.step_count = self._step_count
return obs
# ──────────────────────── Task 1 ───────────────────────────
def _step_task1(self, action: SupportAction) -> SupportObservation:
if action.action_type != "classify":
return self._make_obs(
feedback="Task 1 requires a 'classify' action.",
score=0.0,
done=False,
)
score = grade_task1(
predicted_category=action.category or "",
correct_category=self._ticket["category"],
)
self._classified = score == 1.0
correct = self._ticket["category"]
if score == 1.0:
feedback = f"βœ… Correct! Category: '{correct}'."
done = True
else:
feedback = (
f"❌ Wrong. You said '{action.category}', correct is '{correct}'."
)
done = True # Task 1 is one-shot β€” agent gets one attempt
obs = self._make_obs(feedback=feedback, score=score, done=done)
if done:
self._resolved = True
return obs
# ──────────────────────── Task 2 ───────────────────────────
def _step_task2(self, action: SupportAction) -> SupportObservation:
# First step must be classification
if not self._classified:
if action.action_type != "classify":
return self._make_obs(
feedback="Please classify the ticket first.",
score=0.0,
)
cat_score = grade_task1(
action.category or "", self._ticket["category"]
)
self._classified = True
self._task2_cls_score = cat_score * 0.3 # store β€” combined with action score at step 2
# TODO: store self._classified_correctly here too if grade_task2
# is ever extended to factor in classification correctness
return self._make_obs(
feedback=(
f"Classified as '{action.category}'. "
f"{'Correct βœ…' if cat_score == 1.0 else 'Incorrect ❌'} "
"Now choose an action."
),
score=self._task2_cls_score,
)
# Second step: choose action
action_score = grade_task2(
action_type=action.action_type,
correct_action=self._ticket["correct_action"],
category=self._ticket["category"],
)
# Scale action score to 0.7 max so classification credit (0.0-0.3) has real room.
# Total max = 0.7 (perfect action) + 0.3 (correct classify) = 1.0
# Clamp AFTER addition β€” pre-clamping would silently discard classification credit.
score = round(min(1.0, action_score * 0.7 + self._task2_cls_score), 4)
correct = self._ticket["correct_action"]
if action_score == 1.0:
feedback = f"βœ… Correct action: '{correct}'."
elif action_score == 0.5:
feedback = (
f"⚠️ Partial credit. '{action.action_type}' is defensible "
f"but '{correct}' is preferred."
)
else:
feedback = f"❌ Wrong action. Correct: '{correct}'."
self._resolved = True
return self._make_obs(feedback=feedback, score=score, done=True)
# ──────────────────────── Task 3 ───────────────────────────
def _step_task3(self, action: SupportAction) -> SupportObservation:
MAX_STEPS = 15
if not self._classified:
# Must classify first
if action.action_type != "classify":
return self._make_obs(
feedback="Classify the ticket before taking action.",
score=0.0,
)
cat_score = grade_task1(
action.category or "", self._ticket["category"]
)
self._classified = True
self._classified_correctly = (cat_score == 1.0) # real correctness tracked
return self._make_obs(
feedback=(
f"Classified '{self._ticket['id']}' as '{action.category}'. "
f"{'Correct βœ…' if cat_score == 1.0 else 'Incorrect ❌'} "
"Now resolve it."
),
score=cat_score * 0.1,
)
# Resolve current ticket
action_correct = action.action_type == self._ticket["correct_action"]
pair = frozenset({action.action_type, self._ticket["correct_action"]})
action_partial = (not action_correct) and pair in {
frozenset({"reply", "escalate"})
}
score = grade_task3(
classified_correctly=self._classified_correctly, # real score, not just attempt flag
action_correct=action_correct,
action_partial=action_partial,
reply_text=action.reply_text,
category=self._ticket["category"], # ground truth category
resolution_hint=self._ticket.get("resolution_hint", ""), # per-ticket hint keywords
resolved=True,
steps_taken=self._step_count,
max_steps=MAX_STEPS,
)
self._tickets_resolved += 1
correct_action = self._ticket["correct_action"]
# Advance to next ticket in queue
if self._tickets_resolved < self._tickets_total:
self._ticket = self._queue[self._tickets_resolved]
self._classified = False
feedback = (
f"Ticket resolved (score {score:.2f}). "
f"Moving to next ticket ({self._tickets_resolved + 1}/{self._tickets_total})."
)
done = False
else:
feedback = (
f"All {self._tickets_total} tickets resolved! "
f"Episode score: {self._total_reward + score:.2f}"
)
done = True
self._resolved = True
return self._make_obs(feedback=feedback, score=score, done=done)
# ──────────────────────── helpers ──────────────────────────
def _make_obs(
self,
feedback: str,
score: float,
done: bool = False,
) -> SupportObservation:
return SupportObservation(
ticket_id=self._ticket.get("id", ""),
ticket_text=self._ticket.get("text", ""),
task_id=self._task_id,
current_category=self._ticket.get("category") if self._classified else None,
resolved=self._resolved,
step_count=self._step_count,
feedback=feedback,
score=score,
reward=score,
done=done,
)
# ──────────────────────── state ────────────────────────────
@property
def state(self) -> SupportState:
return SupportState(
episode_id=self._episode_id,
step_count=self._step_count,
task_id=self._task_id,
ticket_id=self._ticket.get("id", ""),
correct_category=self._ticket.get("category", ""),
correct_action=self._ticket.get("correct_action", ""),
classified=self._classified,
resolved=self._resolved,
total_reward=self._total_reward,
tickets_resolved=self._tickets_resolved,
tickets_total=self._tickets_total,
)