Souravdanyal commited on
Commit
c21c4ee
·
1 Parent(s): d298b6d

Fix grader_hard explanation scoring - fairer partial credit

Browse files
Files changed (1) hide show
  1. server/graders/grader_hard.py +84 -1
server/graders/grader_hard.py CHANGED
@@ -10,6 +10,89 @@ def _score_explanation(explanation: Optional[str], keywords: List[str]) -> Tuple
10
  """
11
  Scores the explanation by checking for required conceptual keywords.
12
  Returns (score 0.0-1.0, feedback string).
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
  """
14
  if not explanation or len(explanation.strip()) < 10:
15
  return 0.0, "❌ No explanation provided. Hard tasks require an explanation field."
@@ -67,4 +150,4 @@ def grade_hard(fixed_code: str, task: dict, explanation: Optional[str] = None) -
67
  if passed < total and not explanation:
68
  feedback += "\n💡 Tip: Fix the code bugs AND provide a clear explanation for max reward."
69
 
70
- return final_reward, passed, total, feedback, results
 
10
  """
11
  Scores the explanation by checking for required conceptual keywords.
12
  Returns (score 0.0-1.0, feedback string).
13
+
14
+ Scoring:
15
+ - No explanation → 0.0
16
+ - At least 1 keyword hit → partial credit
17
+ - Half or more keywords → full credit 1.0
18
+ """
19
+ if not explanation or len(explanation.strip()) < 10:
20
+ return 0.0, "❌ No explanation provided. Hard tasks require an explanation field."
21
+
22
+ explanation_lower = explanation.lower()
23
+ hits = [kw for kw in keywords if kw.lower() in explanation_lower]
24
+
25
+ if not keywords:
26
+ # No keywords defined — give full credit for any explanation
27
+ score = 1.0 if len(explanation.strip()) > 20 else 0.5
28
+ else:
29
+ # Need at least 1 hit for partial, half for full
30
+ needed_for_full = max(1, len(keywords) // 2)
31
+ if len(hits) == 0:
32
+ score = 0.0
33
+ elif len(hits) >= needed_for_full:
34
+ score = 1.0
35
+ else:
36
+ # Partial credit proportional to hits
37
+ score = round(len(hits) / needed_for_full, 2)
38
+
39
+ if score == 1.0:
40
+ feedback = f"✅ Explanation excellent! Mentioned key concepts: {', '.join(hits)}"
41
+ elif score > 0:
42
+ missing = [kw for kw in keywords if kw.lower() not in explanation_lower]
43
+ feedback = (
44
+ f"⚠️ Partial explanation (score={score}). Mentioned: {', '.join(hits) if hits else 'none'}. "
45
+ f"Consider also discussing: {', '.join(missing[:3])}"
46
+ )
47
+ else:
48
+ feedback = (
49
+ f"❌ Explanation missing key concepts. "
50
+ f"Try to explain: {', '.join(keywords[:3])} in your analysis."
51
+ )
52
+
53
+ return round(score, 2), feedback
54
+
55
+
56
+ def grade_hard(fixed_code: str, task: dict, explanation: Optional[str] = None) -> Tuple[float, int, int, str, List[dict]]:
57
+ """
58
+ Grade a hard task submission.
59
+ Reward = 0.7 * test_score + 0.3 * explanation_score
60
+
61
+ Returns:
62
+ reward (float): 0.0 to 1.0
63
+ passed (int)
64
+ total (int)
65
+ feedback (str)
66
+ results (list)
67
+ """
68
+ # Grade code using easy grader (same test execution logic)
69
+ test_reward, passed, total, code_feedback, results = grade_easy(fixed_code, task)
70
+
71
+ # Grade explanation
72
+ keywords = task.get("explanation_keywords", [])
73
+ exp_score, exp_feedback = _score_explanation(explanation, keywords)
74
+
75
+ # Combined reward
76
+ final_reward = round(0.7 * test_reward + 0.3 * exp_score, 2)
77
+
78
+ feedback = (
79
+ f"--- Code Score (70% weight): {test_reward:.2f} ---\n"
80
+ f"{code_feedback}\n\n"
81
+ f"--- Explanation Score (30% weight): {exp_score:.2f} ---\n"
82
+ f"{exp_feedback}\n\n"
83
+ f"=== Final Reward: {final_reward:.2f} ==="
84
+ )
85
+
86
+ if passed < total and not explanation:
87
+ feedback += "\n💡 Tip: Fix the code bugs AND provide a clear explanation for max reward."
88
+
89
+ if passed == total and exp_score < 1.0:
90
+ feedback += f"\n💡 Tip: Your code is correct! Improve explanation by mentioning: {', '.join(keywords[:3])}"
91
+
92
+ return final_reward, passed, total, feedback, results
93
+ """
94
+ Scores the explanation by checking for required conceptual keywords.
95
+ Returns (score 0.0-1.0, feedback string).
96
  """
97
  if not explanation or len(explanation.strip()) < 10:
98
  return 0.0, "❌ No explanation provided. Hard tasks require an explanation field."
 
150
  if passed < total and not explanation:
151
  feedback += "\n💡 Tip: Fix the code bugs AND provide a clear explanation for max reward."
152
 
153
+ return final_reward, passed, total, feedback, results