100XZX001 commited on
Commit
0a5683a
·
verified ·
1 Parent(s): dcc3500

Update grader.py

Browse files
Files changed (1) hide show
  1. grader.py +45 -13
grader.py CHANGED
@@ -1,25 +1,57 @@
1
- def grade_comment(comment: str, expected_keywords: list, expert_comment: str) -> float:
2
  """
3
- Returns a score in [0, 1] based on keyword coverage and some simple heuristics.
4
  """
5
  comment_lower = comment.lower()
6
- # 1. Keyword coverage (primary)
7
  matched = sum(1 for kw in expected_keywords if kw in comment_lower)
8
- kw_score = min(1.0, matched / max(1, len(expected_keywords) // 2)) # partial if at least half
9
 
10
- # 2. Bonus for reasonable length (≥ 15 words)
11
  words = comment.split()
12
  length_bonus = 0.1 if len(words) >= 15 else 0.0
13
 
14
- # 3. Penalty if the comment contains "skip" or "done" (agent didn't really comment)
15
- if any(x in comment_lower for x in ["skip", "done", "no comment"]):
16
- return 0.0
17
-
18
- # 4. Penalty for extremely short or generic comments
19
- if len(words) < 5 or comment_lower in ["lgtm", "looks good", "good"]:
20
  penalty = 0.2
21
  else:
22
  penalty = 0.0
23
 
24
- final_score = kw_score + length_bonus - penalty
25
- return max(0.0, min(1.0, final_score))
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ def grade_comment(comment: str, expected_keywords: list, task: str) -> float:
2
  """
3
+ Returns a score in [0,1] based on keyword coverage and task-specific heuristics.
4
  """
5
  comment_lower = comment.lower()
 
6
  matched = sum(1 for kw in expected_keywords if kw in comment_lower)
7
+ kw_score = min(1.0, matched / max(1, len(expected_keywords) // 2))
8
 
9
+ # Bonus for length (≥ 15 words)
10
  words = comment.split()
11
  length_bonus = 0.1 if len(words) >= 15 else 0.0
12
 
13
+ # Penalty for very short comments
14
+ if len(words) < 5:
 
 
 
 
15
  penalty = 0.2
16
  else:
17
  penalty = 0.0
18
 
19
+ # For hard tasks, also penalise if the comment is too vague
20
+ if task in ["harder", "hardest"] and "lock" not in comment_lower and "thread" not in comment_lower:
21
+ penalty += 0.1
22
+
23
+ final = kw_score + length_bonus - penalty
24
+ return max(0.0, min(1.0, final))
25
+
26
+ def grade_question(question: str) -> float:
27
+ """
28
+ Simple heuristic: longer, more specific questions get higher score.
29
+ """
30
+ words = question.split()
31
+ if len(words) < 3:
32
+ return 0.0
33
+ # Check for question words
34
+ if any(q in question.lower() for q in ["what", "how", "why", "where", "when", "does", "is"]):
35
+ return min(1.0, len(words) / 20) # up to 1.0
36
+ return 0.2
37
+
38
+ def grade_fix(proposed_fix: str, expected_fix_keywords: list, hidden_test: callable) -> float:
39
+ """
40
+ Runs a simple test (if provided) and also checks keywords.
41
+ For demonstration, we'll use a keyword‑based check, but in a real
42
+ environment you'd execute tests.
43
+ """
44
+ # Keyword check
45
+ matched = sum(1 for kw in expected_fix_keywords if kw in proposed_fix.lower())
46
+ kw_score = min(1.0, matched / max(1, len(expected_fix_keywords) // 2))
47
+
48
+ # If we have a real test function, run it
49
+ test_score = 0.0
50
+ if hidden_test is not None:
51
+ try:
52
+ test_score = hidden_test(proposed_fix) # should return 0.0–1.0
53
+ except Exception:
54
+ test_score = 0.0
55
+
56
+ # Weighted average: 60% tests, 40% keywords
57
+ return 0.6 * test_score + 0.4 * kw_score