Spaces:
Sleeping
Sleeping
Vighnesh commited on
Commit Β·
4744d17
1
Parent(s): 3d83a5d
Fix #2: cap _reply_quality at 0.25, add case-insensitive punctuation-stripped matching (weights now sum to exactly 1.0)
Browse files- graders.py +14 -6
graders.py
CHANGED
|
@@ -38,14 +38,22 @@ _KEYWORD_REWARDS: Dict[str, list[str]] = {
|
|
| 38 |
|
| 39 |
|
| 40 |
def _reply_quality(reply_text: str, category: str) -> float:
|
| 41 |
-
"""Return 0.0β0.
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 42 |
if not reply_text:
|
| 43 |
return 0.0
|
| 44 |
-
|
|
|
|
|
|
|
| 45 |
keywords = _KEYWORD_REWARDS.get(category, [])
|
| 46 |
-
hits = sum(1 for kw in keywords if kw in
|
| 47 |
-
# cap at 0.
|
| 48 |
-
return min(0.
|
| 49 |
|
| 50 |
|
| 51 |
# βββββββββββββββββββββββββββ Task 1 ββββββββββββββββββββββββββββ
|
|
@@ -103,7 +111,7 @@ def grade_task3(
|
|
| 103 |
Breakdown:
|
| 104 |
0.20 β classification correct
|
| 105 |
0.40 β action correct (0.20 if partial)
|
| 106 |
-
0.25 β reply quality (NLP keyword overlap)
|
| 107 |
0.15 β efficiency bonus (fewer steps β higher bonus)
|
| 108 |
"""
|
| 109 |
score = 0.0
|
|
|
|
| 38 |
|
| 39 |
|
| 40 |
def _reply_quality(reply_text: str, category: str) -> float:
|
| 41 |
+
"""Return 0.0β0.25 based on how relevant the reply text is.
|
| 42 |
+
|
| 43 |
+
Matching is case-insensitive and punctuation-stripped so that
|
| 44 |
+
replies like 'Resolved.' and 'resolved' score identically.
|
| 45 |
+
Each keyword hit = 0.05, capped at 0.25 (5 hits max).
|
| 46 |
+
Total grade_task3 weights: 0.20 + 0.40 + 0.25 + 0.15 = 1.00
|
| 47 |
+
"""
|
| 48 |
if not reply_text:
|
| 49 |
return 0.0
|
| 50 |
+
# Strip punctuation and lowercase for robust matching
|
| 51 |
+
import re
|
| 52 |
+
cleaned = re.sub(r'[^\w\s]', ' ', reply_text.lower())
|
| 53 |
keywords = _KEYWORD_REWARDS.get(category, [])
|
| 54 |
+
hits = sum(1 for kw in keywords if kw in cleaned)
|
| 55 |
+
# cap at 0.25 β reply quality component of grade_task3
|
| 56 |
+
return min(0.25, hits * 0.05)
|
| 57 |
|
| 58 |
|
| 59 |
# βββββββββββββββββββββββββββ Task 1 ββββββββββββββββββββββββββββ
|
|
|
|
| 111 |
Breakdown:
|
| 112 |
0.20 β classification correct
|
| 113 |
0.40 β action correct (0.20 if partial)
|
| 114 |
+
0.25 β reply quality (NLP keyword overlap, case-insensitive, punctuation-stripped)
|
| 115 |
0.15 β efficiency bonus (fewer steps β higher bonus)
|
| 116 |
"""
|
| 117 |
score = 0.0
|