Spaces:

openenv123
/

openenv-email-triage

Sleeping

App Files Files Community

rajvardhan123 commited on 8 days ago

Commit

0f54b5b

verified ·

1 Parent(s): ac45281

Update graders.py

Browse files

Files changed (1) hide show

graders.py +41 -103

graders.py CHANGED Viewed

@@ -4,119 +4,57 @@ from models import Action, Reward, RewardBreakdown, Email
 URGENCY_ORDER = ["ignore", "low", "medium", "high", "critical"]
-def urgency_score(predicted: str, ground_truth: str) -> float:
-    try:
-        p = URGENCY_ORDER.index(predicted)
-        g = URGENCY_ORDER.index(ground_truth)
-        diff = abs(p - g)
-        if diff == 0:   return 0.99
-        elif diff == 1: return 0.5
-        elif diff == 2: return 0.2
-        else:           return 0.01
-    except ValueError:
-        return 0.01
-def category_score(predicted: str, ground_truth: str) -> float:
-    if predicted == ground_truth:
-        return 0.99
-    related = {
-        frozenset({"customer_complaint", "support"}): 0.4,
-        frozenset({"sales_inquiry", "pr"}): 0.4,
-        frozenset({"hr", "legal"}): 0.3,
-        frozenset({"finance", "legal"}): 0.3,
-        frozenset({"internal_ops", "support"}): 0.4,
-    }
-    pair = frozenset({predicted, ground_truth})
-    return related.get(pair, 0.01)
-def action_score(predicted: str, ground_truth: str) -> float:
-    if predicted == ground_truth:
-        return 0.99
-    acceptable = {
-        ("reply", "forward"):   0.5,
-        ("forward", "reply"):   0.5,
-        ("escalate", "forward"): 0.5,
-        ("forward", "escalate"): 0.5,
-        ("archive", "delete"):  0.3,
-        ("delete", "archive"):  0.3,
-    }
-    return acceptable.get((predicted, ground_truth), 0.01)
-def reply_quality_score(draft: Optional[str], email: Email) -> float:
-    if not draft or len(draft.strip()) < 20:
-        return 0.01
-    score = 0.15
-    if len(draft) >= 100:
-        score += 0.15
-    required_kws = email._gt_keywords or []
-    if required_kws:
-        draft_lower = draft.lower()
-        hits = sum(1 for kw in required_kws if kw.lower() in draft_lower)
-        keyword_ratio = hits / len(required_kws)
-        score += 0.5 * keyword_ratio
-    tone_markers = [
-        "thank", "apolog", "understand", "assist", "help",
-        "please", "we will", "we are", "sincerely", "regards",
-        "look forward", "happy to", "reach out"
-    ]
-    draft_lower = draft.lower()
-    tone_hits = sum(1 for m in tone_markers if m in draft_lower)
-    score += min(0.2, tone_hits * 0.05)
-    return min(0.99, score)
 def grade_task_easy(action: Action, email: Email) -> Reward:
-    gt_cat = email._gt_category
-    is_spam = (gt_cat == "spam")
-    if is_spam:
-        if action.category == "spam" and action.action == "delete":
-            return Reward(0.99, RewardBreakdown(0.99,0.99,0.99,0.01,0.01), "Correct spam")
-        elif action.category == "spam" or action.action == "delete":
-            return Reward(0.6, RewardBreakdown(0.5,0.8,0.5,0.01,0.01), "Partial spam")
-        else:
-            return Reward(0.01, RewardBreakdown(0.01,0.01,0.01,0.01,0.2), "Failed spam")
-    u_score = urgency_score(action.urgency, email._gt_urgency or "medium")
-    c_score = category_score(action.category, gt_cat or "other")
-    a_score = action_score(action.action, email._gt_action or "archive")
-    penalty = 0.3 if action.category == "spam" else 0.01
-    total = max(0.01, (u_score*0.3 + c_score*0.4 + a_score*0.3) - penalty)
-    return Reward(round(total,3), RewardBreakdown(u_score,c_score,a_score,0.01,penalty), "OK")
 def grade_task_medium(action: Action, email: Email) -> Reward:
-    u_score = urgency_score(action.urgency, email._gt_urgency or "medium")
-    c_score = category_score(action.category, email._gt_category or "other")
-    a_score = action_score(action.action, email._gt_action or "archive")
-    penalty = 0.01
-    total = max(0.01, (u_score*0.3 + c_score*0.4 + a_score*0.3) - penalty)
-    return Reward(round(total,3), RewardBreakdown(u_score,c_score,a_score,0.01,penalty), "OK")
 def grade_task_hard(action: Action, email: Email) -> Reward:
-    u_score = urgency_score(action.urgency, email._gt_urgency or "medium")
-    c_score = category_score(action.category, email._gt_category or "other")
-    a_score = action_score(action.action, email._gt_action or "archive")
-    r_score = reply_quality_score(action.draft_reply, email)
-    total = max(0.01, (u_score*0.3 + c_score*0.4 + a_score*0.3 + r_score*0.2))
-    return Reward(round(min(total,0.99),3), RewardBreakdown(u_score,c_score,a_score,r_score,0.01), "OK")
 GRADERS = {
     "task_easy": grade_task_easy,

 URGENCY_ORDER = ["ignore", "low", "medium", "high", "critical"]
+def safe(x):
+    return max(0.01, min(0.99, x))
+def urgency_score(p, g):
+    try:
+        diff = abs(URGENCY_ORDER.index(p) - URGENCY_ORDER.index(g))
+        if diff == 0: return 0.99
+        elif diff == 1: return 0.6
+        elif diff == 2: return 0.3
+        else: return 0.1
+    except:
+        return 0.1
+def category_score(p, g):
+    return 0.99 if p == g else 0.3
+def action_score(p, g):
+    return 0.99 if p == g else 0.3
+def reply_quality_score(draft, email):
+    if not draft:
+        return 0.1
+    score = 0.2 + min(len(draft)/200, 0.5)
+    return safe(score)
+# ---- EASY ----
 def grade_task_easy(action: Action, email: Email) -> Reward:
+    score = 0.5 if action.category == "spam" else 0.8
+    score = safe(score)
+    return Reward(score, RewardBreakdown(score,score,score,0.1,0.1), "ok")
+# ---- MEDIUM ----
 def grade_task_medium(action: Action, email: Email) -> Reward:
+    u = urgency_score(action.urgency, email._gt_urgency or "medium")
+    c = category_score(action.category, email._gt_category or "other")
+    a = action_score(action.action, email._gt_action or "archive")
+    total = safe(u*0.3 + c*0.4 + a*0.3)
+    return Reward(total, RewardBreakdown(u,c,a,0.1,0.1), "ok")
+# ---- HARD ----
 def grade_task_hard(action: Action, email: Email) -> Reward:
+    u = urgency_score(action.urgency, email._gt_urgency or "medium")
+    c = category_score(action.category, email._gt_category or "other")
+    a = action_score(action.action, email._gt_action or "archive")
+    r = reply_quality_score(action.draft_reply, email)
+    total = safe(u*0.3 + c*0.3 + a*0.2 + r*0.2)
+    return Reward(total, RewardBreakdown(u,c,a,r,0.1), "ok")
 GRADERS = {
     "task_easy": grade_task_easy,