Spaces:

AlgoCore
/

support-ticket-env

Sleeping

Vighnesh

Fix #4: use resolution_hint in reply scoring — category hits 0.03, hint hits 0.05, cap 0.25 (intentional specificity incentive)

3d8844e about 1 month ago

raw

history blame contribute delete

5.47 kB

	"""
	Graders for all three tasks.

	Each grader returns a float in [0.0, 1.0].

	Task 1 – Classification (easy)
	- 1.0 : correct category
	- 0.0 : wrong category

	Task 2 – Action Selection (medium)
	- 1.0 : correct action
	- 0.5 : partially correct (e.g., escalate vs reply both defensible)
	- 0.0 : clearly wrong (e.g., close an unsolved ticket)

	Task 3 – Full Resolution (hard)
	Combines classification + action + reply quality into a single score.
	Rewards partial progress so the agent gets signal throughout the trajectory.
	"""

	from __future__ import annotations
	from typing import Dict, Any


	# ─────────────────────────── helpers ───────────────────────────

	# Pairs of actions that are considered "close enough" for partial credit
	_PARTIAL_CREDIT_PAIRS = {
	frozenset({"reply", "escalate"}), # borderline tickets
	}

	_KEYWORD_REWARDS: Dict[str, list[str]] = {
	"billing": ["refund", "charge", "invoice", "payment", "billing"],
	"account": ["password", "login", "account", "cancel", "subscription"],
	"technical": ["engineering", "escalate", "bug", "crash", "error", "fix"],
	"refund": ["refund", "return", "credit", "process"],
	"general": ["hours", "contact", "phone", "information", "help"],
	}


	def _reply_quality(
	reply_text: str,
	category: str,
	resolution_hint: str = "",
	) -> float:
	"""Return 0.0–0.25 based on how relevant the reply text is.

	Two-tier keyword scoring (both case-insensitive, punctuation-stripped):
	- Category keyword hit → 0.03 each (broad topical relevance)
	- Hint keyword hit → 0.05 each (specific resolution relevance)
	Total capped at 0.25 — intentionally rewards specificity over vagueness.

	Total grade_task3 weights: 0.20 + 0.40 + 0.25 + 0.15 = 1.00
	"""
	if not reply_text:
	return 0.0

	import re
	cleaned = re.sub(r'[^\w\s]', ' ', reply_text.lower())

	# Broad category keywords — 0.03 each
	category_keywords = _KEYWORD_REWARDS.get(category, [])
	category_score = sum(0.03 for kw in category_keywords if kw in cleaned)

	# Specific hint keywords — 0.05 each (extracted from resolution_hint)
	hint_score = 0.0
	if resolution_hint:
	hint_words = set(re.sub(r'[^\w\s]', ' ', resolution_hint.lower()).split())
	# filter out short/common stop words
	hint_words = {w for w in hint_words if len(w) > 3}
	hint_score = sum(0.05 for w in hint_words if w in cleaned)

	return round(min(0.25, category_score + hint_score), 4)


	# ─────────────────────────── Task 1 ────────────────────────────

	def grade_task1(
	predicted_category: str,
	correct_category: str,
	) -> float:
	"""Binary classification reward."""
	return 1.0 if predicted_category == correct_category else 0.0


	# ─────────────────────────── Task 2 ────────────────────────────

	def grade_task2(
	action_type: str,
	correct_action: str,
	category: str \| None = None,
	) -> float:
	"""
	Action-selection reward.
	Full credit for exact match, partial credit for defensible alternatives.
	Penalises closing an unresolved ticket.
	"""
	if action_type == correct_action:
	return 1.0

	# Partial credit for ambiguous cases
	pair = frozenset({action_type, correct_action})
	if pair in _PARTIAL_CREDIT_PAIRS:
	return 0.5

	# Closing an unresolved ticket is always wrong
	if action_type == "close":
	return 0.0

	return 0.0


	# ─────────────────────────── Task 3 ────────────────────────────

	def grade_task3(
	classified_correctly: bool,
	action_correct: bool,
	action_partial: bool,
	reply_text: str \| None,
	category: str,
	resolved: bool,
	steps_taken: int,
	max_steps: int = 5,
	resolution_hint: str = "",
	) -> float:
	"""
	Multi-step resolution reward with partial progress.

	Breakdown:
	0.20 – classification correct
	0.40 – action correct (0.20 if partial)
	0.25 – reply quality (two-tier: category keywords @0.03, hint keywords @0.05)
	0.15 – efficiency bonus (fewer steps → higher bonus)
	"""
	score = 0.0

	if classified_correctly:
	score += 0.20

	if action_correct:
	score += 0.40
	elif action_partial:
	score += 0.20

	if reply_text:
	score += _reply_quality(reply_text, category, resolution_hint)

	# Efficiency: full 0.15 for 1 step, 0 for max_steps steps
	if resolved and steps_taken <= max_steps:
	efficiency = max(0.0, (max_steps - steps_taken) / (max_steps - 1))
	score += 0.15 * efficiency

	return round(min(1.0, score), 4)


	# ─────────────────────────── Penalty ───────────────────────────

	def loop_penalty(step_count: int, max_steps: int = 10) -> float:
	"""Return a negative reward if agent is stuck in a loop."""
	if step_count > max_steps:
	return -0.05 * (step_count - max_steps)
	return 0.0