Spaces:
Sleeping
Sleeping
File size: 4,668 Bytes
78940a4 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 | from .models import Action, Email, State
from .reward import compute_dense_reward
def grade_easy(action: Action, email: Email) -> float:
if email.id == "e1":
if action.action_type == "reply":
return compute_dense_reward(True)
return compute_dense_reward(False)
if email.id == "e2":
if action.action_type == "mark_spam":
return compute_dense_reward(True)
return compute_dense_reward(False)
return compute_dense_reward(False)
def grade_medium(action: Action, email: Email) -> float:
if email.id == "m1":
# Missing order ID logic -> should request info
if action.action_type == "request_info":
return compute_dense_reward(True)
elif action.action_type == "reply" and action.response_text:
if "order" in action.response_text.lower() and "id" in action.response_text.lower():
return compute_dense_reward(True)
return compute_dense_reward(True) * 0.5
elif action.action_type == "forward" and action.forward_to and "support" in action.forward_to.lower():
# Escalating an incomplete ticket rather than asking for info gets partial score
return compute_dense_reward(True) * 0.3
return compute_dense_reward(False)
elif email.id == "m2":
if action.action_type in ["mark_spam", "archive"]:
return compute_dense_reward(True)
return compute_dense_reward(False)
elif email.id == "m3":
if action.action_type == "reply" and action.response_text and len(action.response_text) > 5:
return compute_dense_reward(True)
elif action.action_type == "reply":
return compute_dense_reward(True) * 0.5
return compute_dense_reward(False)
return compute_dense_reward(False)
def grade_hard(action: Action, email: Email) -> float:
if email.id == "h1": # VIP Issue
# We need reply containing "investigating" or "downtime"
base = 0.0
if action.action_type == "reply":
if action.response_text and "investigat" in action.response_text.lower():
base = 1.0
else:
base = 0.5
elif action.action_type == "escalate":
if action.priority_level == "urgent":
base = 1.0
else:
base = 0.7
return compute_dense_reward(True) * base
elif email.id == "h2": # Alert
if action.action_type == "escalate":
if action.priority_level == "urgent":
return compute_dense_reward(True)
return compute_dense_reward(True) * 0.7
elif action.action_type == "forward" and action.forward_to and "eng" in action.forward_to.lower():
if action.priority_level == "urgent":
return compute_dense_reward(True)
return compute_dense_reward(True) * 0.5
return compute_dense_reward(False)
elif email.id == "h3": # Compliance
if action.action_type == "reply":
if action.response_text and "acknowledge" in action.response_text.lower():
return compute_dense_reward(True)
return compute_dense_reward(True) * 0.4
elif action.action_type == "forward" and action.forward_to and "legal" in action.forward_to.lower():
return compute_dense_reward(True)
return compute_dense_reward(False)
elif email.id == "h4": # CEO Phish Attack
if action.action_type == "mark_spam":
return compute_dense_reward(True)
elif action.action_type == "archive" or action.action_type == "escalate":
return compute_dense_reward(True) * 0.5
return compute_dense_reward(False)
elif email.id == "h5": # Intern project
if action.action_type == "reply":
return compute_dense_reward(True)
return compute_dense_reward(False)
return compute_dense_reward(False)
def grade_action(task_level: str, action: Action, email: Email, state: State) -> float:
tasks_sizes = {"easy": 2, "medium": 3, "hard": 5}
size = tasks_sizes.get(task_level, 1)
if task_level == "easy":
r = grade_easy(action, email)
elif task_level == "medium":
r = grade_medium(action, email)
elif task_level == "hard":
r = grade_hard(action, email)
else:
r = 0.0
# Strictly bound reward between 0.0 and 1.0 for this step
# Max episode score stays bounded since pop removes 1 email, at max 1 for each email / sizes = 1.0 total max
scaled_reward = max(0.0, min(1.0, r / size))
return scaled_reward
|