ashishbaberwal commited on
Commit
71d64d0
·
1 Parent(s): e225fd7

Final Changes

Browse files
Files changed (2) hide show
  1. environment/graders.py +10 -1
  2. tests/test_env.py +4 -2
environment/graders.py CHANGED
@@ -11,6 +11,14 @@ class TaskGrader:
11
  def _normalize(text: str) -> str:
12
  return text.lower().strip()
13
 
 
 
 
 
 
 
 
 
14
  def _match_issue(self, expected: Dict[str, Any], comment: Comment) -> bool:
15
  expected_line = expected.get("line")
16
  expected_type = self._normalize(expected.get("type", ""))
@@ -156,7 +164,8 @@ class TaskGrader:
156
  efficiency_bonus = self.grade_efficiency(steps_taken, max_steps)
157
 
158
  raw_score = (detection_score * 0.4) + (suggestion_score * 0.3) + (decision_score * 0.3)
159
- final_score = max(0.0, min(1.0, raw_score - false_positive_penalty + efficiency_bonus))
 
160
 
161
  return {
162
  "expected_issue_count": expected_count,
 
11
  def _normalize(text: str) -> str:
12
  return text.lower().strip()
13
 
14
+ @staticmethod
15
+ def _to_open_interval(score: float, epsilon: float = 1e-4) -> float:
16
+ if score <= 0.0:
17
+ return epsilon
18
+ if score >= 1.0:
19
+ return 1.0 - epsilon
20
+ return score
21
+
22
  def _match_issue(self, expected: Dict[str, Any], comment: Comment) -> bool:
23
  expected_line = expected.get("line")
24
  expected_type = self._normalize(expected.get("type", ""))
 
164
  efficiency_bonus = self.grade_efficiency(steps_taken, max_steps)
165
 
166
  raw_score = (detection_score * 0.4) + (suggestion_score * 0.3) + (decision_score * 0.3)
167
+ bounded_score = max(0.0, min(1.0, raw_score - false_positive_penalty + efficiency_bonus))
168
+ final_score = self._to_open_interval(bounded_score)
169
 
170
  return {
171
  "expected_issue_count": expected_count,
tests/test_env.py CHANGED
@@ -222,7 +222,8 @@ class TestCodeReviewEnv(unittest.TestCase):
222
 
223
  self.assertTrue(done)
224
  self.assertEqual(obs["final_decision_made"], "approved")
225
- self.assertEqual(info["task_score"], 1.0)
 
226
  self.assertIn("diagnostics", info)
227
  self.assertEqual(info["diagnostics"]["false_positive_count"], 0)
228
 
@@ -363,7 +364,8 @@ class TestCodeReviewEnv(unittest.TestCase):
363
  obs, _, done, info = self.env.step(action.model_dump())
364
  self.assertTrue(done)
365
  self.assertEqual(obs["final_decision_made"], "approved")
366
- self.assertEqual(info["task_score"], 1.0)
 
367
 
368
  def test_new_task_categories_registered(self):
369
  task_ids = {t["task_id"] for t in TaskDefinitions.get_all_tasks()}
 
222
 
223
  self.assertTrue(done)
224
  self.assertEqual(obs["final_decision_made"], "approved")
225
+ self.assertGreater(info["task_score"], 0.0)
226
+ self.assertLess(info["task_score"], 1.0)
227
  self.assertIn("diagnostics", info)
228
  self.assertEqual(info["diagnostics"]["false_positive_count"], 0)
229
 
 
364
  obs, _, done, info = self.env.step(action.model_dump())
365
  self.assertTrue(done)
366
  self.assertEqual(obs["final_decision_made"], "approved")
367
+ self.assertGreater(info["task_score"], 0.0)
368
+ self.assertLess(info["task_score"], 1.0)
369
 
370
  def test_new_task_categories_registered(self):
371
  task_ids = {t["task_id"] for t in TaskDefinitions.get_all_tasks()}