100XZX001 commited on
Commit
54fac60
·
verified ·
1 Parent(s): fcbfa5f

Update grader.py

Browse files
Files changed (1) hide show
  1. grader.py +19 -34
grader.py CHANGED
@@ -1,7 +1,15 @@
1
  import numpy as np
2
  from sentence_transformers import SentenceTransformer, util
3
 
4
- # Load model once globally
 
 
 
 
 
 
 
 
5
  _model = None
6
 
7
  def _get_model():
@@ -11,30 +19,16 @@ def _get_model():
11
  return _model
12
 
13
  def grade_comment(comment: str, expected_keywords: list, expert_comment: str) -> float:
14
- """
15
- Returns a score in [0,1] based on:
16
- - semantic similarity with expert comment (70%)
17
- - keyword coverage (30%)
18
- - length bonus/penalty
19
- """
20
  if not comment:
21
- return 0.0
22
-
23
- # 1. Semantic similarity
24
  model = _get_model()
25
  emb_comment = model.encode(comment, convert_to_tensor=True)
26
  emb_expert = model.encode(expert_comment, convert_to_tensor=True)
27
- sim = util.pytorch_cos_sim(emb_comment, emb_expert).item() # in [0,1]
28
-
29
- # 2. Keyword coverage
30
  comment_lower = comment.lower()
31
  matched = sum(1 for kw in expected_keywords if kw in comment_lower)
32
  kw_score = min(1.0, matched / max(1, len(expected_keywords) // 2))
33
-
34
- # 3. Combine (70% semantic, 30% keywords)
35
  combined = 0.7 * sim + 0.3 * kw_score
36
-
37
- # 4. Length bonus/penalty
38
  words = comment.split()
39
  if len(words) >= 15:
40
  length_bonus = 0.1
@@ -42,36 +36,27 @@ def grade_comment(comment: str, expected_keywords: list, expert_comment: str) ->
42
  length_bonus = -0.2
43
  else:
44
  length_bonus = 0.0
45
-
46
- # 5. Final score, clamped
47
  final = combined + length_bonus
48
- return max(0.0, min(1.0, final))
49
-
50
 
51
  def grade_question(question: str) -> float:
52
- """Simple heuristic for question quality."""
53
  words = question.split()
54
  if len(words) < 3:
55
- return 0.0
56
- # Check for question words
57
  if any(q in question.lower() for q in ["what", "how", "why", "where", "when", "does", "is"]):
58
- return min(1.0, len(words) / 20)
59
- return 0.2
60
-
61
 
62
  def grade_fix(proposed_fix: str, expected_fix_keywords: list, hidden_test: callable) -> float:
63
- """Evaluates a code fix. Hidden_test can be a function that runs unit tests."""
64
- # Keyword check (simplified)
65
  matched = sum(1 for kw in expected_fix_keywords if kw in proposed_fix.lower())
66
  kw_score = min(1.0, matched / max(1, len(expected_fix_keywords) // 2))
67
-
68
- # If we have a test function, run it
69
  test_score = 0.0
70
  if hidden_test is not None:
71
  try:
72
  test_score = hidden_test(proposed_fix)
73
  except Exception:
74
  test_score = 0.0
75
-
76
- # Weighted: 60% test, 40% keywords
77
- return 0.6 * test_score + 0.4 * kw_score
 
1
  import numpy as np
2
  from sentence_transformers import SentenceTransformer, util
3
 
4
+ EPS = 0.001
5
+
6
+ def clamp_score(score):
7
+ if score <= 0.0:
8
+ return EPS
9
+ if score >= 1.0:
10
+ return 1.0 - EPS
11
+ return score
12
+
13
  _model = None
14
 
15
  def _get_model():
 
19
  return _model
20
 
21
  def grade_comment(comment: str, expected_keywords: list, expert_comment: str) -> float:
 
 
 
 
 
 
22
  if not comment:
23
+ return clamp_score(0.0)
 
 
24
  model = _get_model()
25
  emb_comment = model.encode(comment, convert_to_tensor=True)
26
  emb_expert = model.encode(expert_comment, convert_to_tensor=True)
27
+ sim = util.pytorch_cos_sim(emb_comment, emb_expert).item()
 
 
28
  comment_lower = comment.lower()
29
  matched = sum(1 for kw in expected_keywords if kw in comment_lower)
30
  kw_score = min(1.0, matched / max(1, len(expected_keywords) // 2))
 
 
31
  combined = 0.7 * sim + 0.3 * kw_score
 
 
32
  words = comment.split()
33
  if len(words) >= 15:
34
  length_bonus = 0.1
 
36
  length_bonus = -0.2
37
  else:
38
  length_bonus = 0.0
 
 
39
  final = combined + length_bonus
40
+ # Clamp to (0,1) using EPS
41
+ return clamp_score(final)
42
 
43
  def grade_question(question: str) -> float:
 
44
  words = question.split()
45
  if len(words) < 3:
46
+ return clamp_score(0.0)
 
47
  if any(q in question.lower() for q in ["what", "how", "why", "where", "when", "does", "is"]):
48
+ score = min(1.0, len(words) / 20)
49
+ return clamp_score(score)
50
+ return clamp_score(0.2)
51
 
52
  def grade_fix(proposed_fix: str, expected_fix_keywords: list, hidden_test: callable) -> float:
 
 
53
  matched = sum(1 for kw in expected_fix_keywords if kw in proposed_fix.lower())
54
  kw_score = min(1.0, matched / max(1, len(expected_fix_keywords) // 2))
 
 
55
  test_score = 0.0
56
  if hidden_test is not None:
57
  try:
58
  test_score = hidden_test(proposed_fix)
59
  except Exception:
60
  test_score = 0.0
61
+ score = 0.6 * test_score + 0.4 * kw_score
62
+ return clamp_score(score)