Spaces:
Sleeping
Sleeping
Update grader.py
Browse files
grader.py
CHANGED
|
@@ -1,25 +1,57 @@
|
|
| 1 |
-
def grade_comment(comment: str, expected_keywords: list,
|
| 2 |
"""
|
| 3 |
-
Returns a score in [0,
|
| 4 |
"""
|
| 5 |
comment_lower = comment.lower()
|
| 6 |
-
# 1. Keyword coverage (primary)
|
| 7 |
matched = sum(1 for kw in expected_keywords if kw in comment_lower)
|
| 8 |
-
kw_score = min(1.0, matched / max(1, len(expected_keywords) // 2))
|
| 9 |
|
| 10 |
-
#
|
| 11 |
words = comment.split()
|
| 12 |
length_bonus = 0.1 if len(words) >= 15 else 0.0
|
| 13 |
|
| 14 |
-
#
|
| 15 |
-
if
|
| 16 |
-
return 0.0
|
| 17 |
-
|
| 18 |
-
# 4. Penalty for extremely short or generic comments
|
| 19 |
-
if len(words) < 5 or comment_lower in ["lgtm", "looks good", "good"]:
|
| 20 |
penalty = 0.2
|
| 21 |
else:
|
| 22 |
penalty = 0.0
|
| 23 |
|
| 24 |
-
|
| 25 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
def grade_comment(comment: str, expected_keywords: list, task: str) -> float:
|
| 2 |
"""
|
| 3 |
+
Returns a score in [0,1] based on keyword coverage and task-specific heuristics.
|
| 4 |
"""
|
| 5 |
comment_lower = comment.lower()
|
|
|
|
| 6 |
matched = sum(1 for kw in expected_keywords if kw in comment_lower)
|
| 7 |
+
kw_score = min(1.0, matched / max(1, len(expected_keywords) // 2))
|
| 8 |
|
| 9 |
+
# Bonus for length (≥ 15 words)
|
| 10 |
words = comment.split()
|
| 11 |
length_bonus = 0.1 if len(words) >= 15 else 0.0
|
| 12 |
|
| 13 |
+
# Penalty for very short comments
|
| 14 |
+
if len(words) < 5:
|
|
|
|
|
|
|
|
|
|
|
|
|
| 15 |
penalty = 0.2
|
| 16 |
else:
|
| 17 |
penalty = 0.0
|
| 18 |
|
| 19 |
+
# For hard tasks, also penalise if the comment is too vague
|
| 20 |
+
if task in ["harder", "hardest"] and "lock" not in comment_lower and "thread" not in comment_lower:
|
| 21 |
+
penalty += 0.1
|
| 22 |
+
|
| 23 |
+
final = kw_score + length_bonus - penalty
|
| 24 |
+
return max(0.0, min(1.0, final))
|
| 25 |
+
|
| 26 |
+
def grade_question(question: str) -> float:
|
| 27 |
+
"""
|
| 28 |
+
Simple heuristic: longer, more specific questions get higher score.
|
| 29 |
+
"""
|
| 30 |
+
words = question.split()
|
| 31 |
+
if len(words) < 3:
|
| 32 |
+
return 0.0
|
| 33 |
+
# Check for question words
|
| 34 |
+
if any(q in question.lower() for q in ["what", "how", "why", "where", "when", "does", "is"]):
|
| 35 |
+
return min(1.0, len(words) / 20) # up to 1.0
|
| 36 |
+
return 0.2
|
| 37 |
+
|
| 38 |
+
def grade_fix(proposed_fix: str, expected_fix_keywords: list, hidden_test: callable) -> float:
|
| 39 |
+
"""
|
| 40 |
+
Runs a simple test (if provided) and also checks keywords.
|
| 41 |
+
For demonstration, we'll use a keyword‑based check, but in a real
|
| 42 |
+
environment you'd execute tests.
|
| 43 |
+
"""
|
| 44 |
+
# Keyword check
|
| 45 |
+
matched = sum(1 for kw in expected_fix_keywords if kw in proposed_fix.lower())
|
| 46 |
+
kw_score = min(1.0, matched / max(1, len(expected_fix_keywords) // 2))
|
| 47 |
+
|
| 48 |
+
# If we have a real test function, run it
|
| 49 |
+
test_score = 0.0
|
| 50 |
+
if hidden_test is not None:
|
| 51 |
+
try:
|
| 52 |
+
test_score = hidden_test(proposed_fix) # should return 0.0–1.0
|
| 53 |
+
except Exception:
|
| 54 |
+
test_score = 0.0
|
| 55 |
+
|
| 56 |
+
# Weighted average: 60% tests, 40% keywords
|
| 57 |
+
return 0.6 * test_score + 0.4 * kw_score
|