support-ticket-env / graders.py
Vighnesh
Fix #4: use resolution_hint in reply scoring β€” category hits 0.03, hint hits 0.05, cap 0.25 (intentional specificity incentive)
3d8844e
"""
Graders for all three tasks.
Each grader returns a float in [0.0, 1.0].
Task 1 – Classification (easy)
- 1.0 : correct category
- 0.0 : wrong category
Task 2 – Action Selection (medium)
- 1.0 : correct action
- 0.5 : partially correct (e.g., escalate vs reply both defensible)
- 0.0 : clearly wrong (e.g., close an unsolved ticket)
Task 3 – Full Resolution (hard)
Combines classification + action + reply quality into a single score.
Rewards partial progress so the agent gets signal throughout the trajectory.
"""
from __future__ import annotations
from typing import Dict, Any
# ─────────────────────────── helpers ───────────────────────────
# Pairs of actions that are considered "close enough" for partial credit
_PARTIAL_CREDIT_PAIRS = {
frozenset({"reply", "escalate"}), # borderline tickets
}
_KEYWORD_REWARDS: Dict[str, list[str]] = {
"billing": ["refund", "charge", "invoice", "payment", "billing"],
"account": ["password", "login", "account", "cancel", "subscription"],
"technical": ["engineering", "escalate", "bug", "crash", "error", "fix"],
"refund": ["refund", "return", "credit", "process"],
"general": ["hours", "contact", "phone", "information", "help"],
}
def _reply_quality(
reply_text: str,
category: str,
resolution_hint: str = "",
) -> float:
"""Return 0.0–0.25 based on how relevant the reply text is.
Two-tier keyword scoring (both case-insensitive, punctuation-stripped):
- Category keyword hit β†’ 0.03 each (broad topical relevance)
- Hint keyword hit β†’ 0.05 each (specific resolution relevance)
Total capped at 0.25 β€” intentionally rewards specificity over vagueness.
Total grade_task3 weights: 0.20 + 0.40 + 0.25 + 0.15 = 1.00
"""
if not reply_text:
return 0.0
import re
cleaned = re.sub(r'[^\w\s]', ' ', reply_text.lower())
# Broad category keywords β€” 0.03 each
category_keywords = _KEYWORD_REWARDS.get(category, [])
category_score = sum(0.03 for kw in category_keywords if kw in cleaned)
# Specific hint keywords β€” 0.05 each (extracted from resolution_hint)
hint_score = 0.0
if resolution_hint:
hint_words = set(re.sub(r'[^\w\s]', ' ', resolution_hint.lower()).split())
# filter out short/common stop words
hint_words = {w for w in hint_words if len(w) > 3}
hint_score = sum(0.05 for w in hint_words if w in cleaned)
return round(min(0.25, category_score + hint_score), 4)
# ─────────────────────────── Task 1 ────────────────────────────
def grade_task1(
predicted_category: str,
correct_category: str,
) -> float:
"""Binary classification reward."""
return 1.0 if predicted_category == correct_category else 0.0
# ─────────────────────────── Task 2 ────────────────────────────
def grade_task2(
action_type: str,
correct_action: str,
category: str | None = None,
) -> float:
"""
Action-selection reward.
Full credit for exact match, partial credit for defensible alternatives.
Penalises closing an unresolved ticket.
"""
if action_type == correct_action:
return 1.0
# Partial credit for ambiguous cases
pair = frozenset({action_type, correct_action})
if pair in _PARTIAL_CREDIT_PAIRS:
return 0.5
# Closing an unresolved ticket is always wrong
if action_type == "close":
return 0.0
return 0.0
# ─────────────────────────── Task 3 ────────────────────────────
def grade_task3(
classified_correctly: bool,
action_correct: bool,
action_partial: bool,
reply_text: str | None,
category: str,
resolved: bool,
steps_taken: int,
max_steps: int = 5,
resolution_hint: str = "",
) -> float:
"""
Multi-step resolution reward with partial progress.
Breakdown:
0.20 – classification correct
0.40 – action correct (0.20 if partial)
0.25 – reply quality (two-tier: category keywords @0.03, hint keywords @0.05)
0.15 – efficiency bonus (fewer steps β†’ higher bonus)
"""
score = 0.0
if classified_correctly:
score += 0.20
if action_correct:
score += 0.40
elif action_partial:
score += 0.20
if reply_text:
score += _reply_quality(reply_text, category, resolution_hint)
# Efficiency: full 0.15 for 1 step, 0 for max_steps steps
if resolved and steps_taken <= max_steps:
efficiency = max(0.0, (max_steps - steps_taken) / (max_steps - 1))
score += 0.15 * efficiency
return round(min(1.0, score), 4)
# ─────────────────────────── Penalty ───────────────────────────
def loop_penalty(step_count: int, max_steps: int = 10) -> float:
"""Return a negative reward if agent is stuck in a loop."""
if step_count > max_steps:
return -0.05 * (step_count - max_steps)
return 0.0