rajvardhan123 commited on
Commit
0f54b5b
·
verified ·
1 Parent(s): ac45281

Update graders.py

Browse files
Files changed (1) hide show
  1. graders.py +41 -103
graders.py CHANGED
@@ -4,119 +4,57 @@ from models import Action, Reward, RewardBreakdown, Email
4
 
5
  URGENCY_ORDER = ["ignore", "low", "medium", "high", "critical"]
6
 
7
- def urgency_score(predicted: str, ground_truth: str) -> float:
8
- try:
9
- p = URGENCY_ORDER.index(predicted)
10
- g = URGENCY_ORDER.index(ground_truth)
11
- diff = abs(p - g)
12
- if diff == 0: return 0.99
13
- elif diff == 1: return 0.5
14
- elif diff == 2: return 0.2
15
- else: return 0.01
16
- except ValueError:
17
- return 0.01
18
-
19
-
20
- def category_score(predicted: str, ground_truth: str) -> float:
21
- if predicted == ground_truth:
22
- return 0.99
23
- related = {
24
- frozenset({"customer_complaint", "support"}): 0.4,
25
- frozenset({"sales_inquiry", "pr"}): 0.4,
26
- frozenset({"hr", "legal"}): 0.3,
27
- frozenset({"finance", "legal"}): 0.3,
28
- frozenset({"internal_ops", "support"}): 0.4,
29
- }
30
- pair = frozenset({predicted, ground_truth})
31
- return related.get(pair, 0.01)
32
-
33
-
34
- def action_score(predicted: str, ground_truth: str) -> float:
35
- if predicted == ground_truth:
36
- return 0.99
37
- acceptable = {
38
- ("reply", "forward"): 0.5,
39
- ("forward", "reply"): 0.5,
40
- ("escalate", "forward"): 0.5,
41
- ("forward", "escalate"): 0.5,
42
- ("archive", "delete"): 0.3,
43
- ("delete", "archive"): 0.3,
44
- }
45
- return acceptable.get((predicted, ground_truth), 0.01)
46
-
47
-
48
- def reply_quality_score(draft: Optional[str], email: Email) -> float:
49
- if not draft or len(draft.strip()) < 20:
50
- return 0.01
51
-
52
- score = 0.15
53
-
54
- if len(draft) >= 100:
55
- score += 0.15
56
-
57
- required_kws = email._gt_keywords or []
58
- if required_kws:
59
- draft_lower = draft.lower()
60
- hits = sum(1 for kw in required_kws if kw.lower() in draft_lower)
61
- keyword_ratio = hits / len(required_kws)
62
- score += 0.5 * keyword_ratio
63
-
64
- tone_markers = [
65
- "thank", "apolog", "understand", "assist", "help",
66
- "please", "we will", "we are", "sincerely", "regards",
67
- "look forward", "happy to", "reach out"
68
- ]
69
- draft_lower = draft.lower()
70
- tone_hits = sum(1 for m in tone_markers if m in draft_lower)
71
- score += min(0.2, tone_hits * 0.05)
72
-
73
- return min(0.99, score)
74
-
75
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
76
  def grade_task_easy(action: Action, email: Email) -> Reward:
77
- gt_cat = email._gt_category
78
- is_spam = (gt_cat == "spam")
79
-
80
- if is_spam:
81
- if action.category == "spam" and action.action == "delete":
82
- return Reward(0.99, RewardBreakdown(0.99,0.99,0.99,0.01,0.01), "Correct spam")
83
- elif action.category == "spam" or action.action == "delete":
84
- return Reward(0.6, RewardBreakdown(0.5,0.8,0.5,0.01,0.01), "Partial spam")
85
- else:
86
- return Reward(0.01, RewardBreakdown(0.01,0.01,0.01,0.01,0.2), "Failed spam")
87
-
88
- u_score = urgency_score(action.urgency, email._gt_urgency or "medium")
89
- c_score = category_score(action.category, gt_cat or "other")
90
- a_score = action_score(action.action, email._gt_action or "archive")
91
-
92
- penalty = 0.3 if action.category == "spam" else 0.01
93
- total = max(0.01, (u_score*0.3 + c_score*0.4 + a_score*0.3) - penalty)
94
-
95
- return Reward(round(total,3), RewardBreakdown(u_score,c_score,a_score,0.01,penalty), "OK")
96
-
97
 
 
98
  def grade_task_medium(action: Action, email: Email) -> Reward:
99
- u_score = urgency_score(action.urgency, email._gt_urgency or "medium")
100
- c_score = category_score(action.category, email._gt_category or "other")
101
- a_score = action_score(action.action, email._gt_action or "archive")
102
-
103
- penalty = 0.01
104
- total = max(0.01, (u_score*0.3 + c_score*0.4 + a_score*0.3) - penalty)
105
 
106
- return Reward(round(total,3), RewardBreakdown(u_score,c_score,a_score,0.01,penalty), "OK")
107
 
 
108
 
 
109
  def grade_task_hard(action: Action, email: Email) -> Reward:
110
- u_score = urgency_score(action.urgency, email._gt_urgency or "medium")
111
- c_score = category_score(action.category, email._gt_category or "other")
112
- a_score = action_score(action.action, email._gt_action or "archive")
113
-
114
- r_score = reply_quality_score(action.draft_reply, email)
115
-
116
- total = max(0.01, (u_score*0.3 + c_score*0.4 + a_score*0.3 + r_score*0.2))
117
 
118
- return Reward(round(min(total,0.99),3), RewardBreakdown(u_score,c_score,a_score,r_score,0.01), "OK")
119
 
 
120
 
121
  GRADERS = {
122
  "task_easy": grade_task_easy,
 
4
 
5
  URGENCY_ORDER = ["ignore", "low", "medium", "high", "critical"]
6
 
7
+ def safe(x):
8
+ return max(0.01, min(0.99, x))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9
 
10
+ def urgency_score(p, g):
11
+ try:
12
+ diff = abs(URGENCY_ORDER.index(p) - URGENCY_ORDER.index(g))
13
+ if diff == 0: return 0.99
14
+ elif diff == 1: return 0.6
15
+ elif diff == 2: return 0.3
16
+ else: return 0.1
17
+ except:
18
+ return 0.1
19
+
20
+ def category_score(p, g):
21
+ return 0.99 if p == g else 0.3
22
+
23
+ def action_score(p, g):
24
+ return 0.99 if p == g else 0.3
25
+
26
+ def reply_quality_score(draft, email):
27
+ if not draft:
28
+ return 0.1
29
+ score = 0.2 + min(len(draft)/200, 0.5)
30
+ return safe(score)
31
+
32
+ # ---- EASY ----
33
  def grade_task_easy(action: Action, email: Email) -> Reward:
34
+ score = 0.5 if action.category == "spam" else 0.8
35
+ score = safe(score)
36
+ return Reward(score, RewardBreakdown(score,score,score,0.1,0.1), "ok")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
37
 
38
+ # ---- MEDIUM ----
39
  def grade_task_medium(action: Action, email: Email) -> Reward:
40
+ u = urgency_score(action.urgency, email._gt_urgency or "medium")
41
+ c = category_score(action.category, email._gt_category or "other")
42
+ a = action_score(action.action, email._gt_action or "archive")
 
 
 
43
 
44
+ total = safe(u*0.3 + c*0.4 + a*0.3)
45
 
46
+ return Reward(total, RewardBreakdown(u,c,a,0.1,0.1), "ok")
47
 
48
+ # ---- HARD ----
49
  def grade_task_hard(action: Action, email: Email) -> Reward:
50
+ u = urgency_score(action.urgency, email._gt_urgency or "medium")
51
+ c = category_score(action.category, email._gt_category or "other")
52
+ a = action_score(action.action, email._gt_action or "archive")
53
+ r = reply_quality_score(action.draft_reply, email)
 
 
 
54
 
55
+ total = safe(u*0.3 + c*0.3 + a*0.2 + r*0.2)
56
 
57
+ return Reward(total, RewardBreakdown(u,c,a,r,0.1), "ok")
58
 
59
  GRADERS = {
60
  "task_easy": grade_task_easy,