Nitish commited on
Commit
65e5ed1
·
1 Parent(s): 0b0f159

fix(grader): clamp scores to (0.01, 0.99) — platform requires strictly between 0 and 1

Browse files
Files changed (2) hide show
  1. server/environment.py +2 -2
  2. server/grader.py +2 -2
server/environment.py CHANGED
@@ -65,7 +65,7 @@ class CodeSecurityEnv:
65
  if self.done:
66
  return StepResult(
67
  observation=self._make_observation(),
68
- reward=0.0,
69
  done=True,
70
  info={"error": ERROR_EPISODE_COMPLETED},
71
  )
@@ -89,7 +89,7 @@ class CodeSecurityEnv:
89
  try:
90
  reward, breakdown = grade_action(action.model_dump(), self.current_task)
91
  except Exception as e:
92
- reward, breakdown = 0.0, {"error": f"Evaluation error: {e}"}
93
 
94
  self.step_count += 1
95
  self.total_reward += reward
 
65
  if self.done:
66
  return StepResult(
67
  observation=self._make_observation(),
68
+ reward=0.01,
69
  done=True,
70
  info={"error": ERROR_EPISODE_COMPLETED},
71
  )
 
89
  try:
90
  reward, breakdown = grade_action(action.model_dump(), self.current_task)
91
  except Exception as e:
92
+ reward, breakdown = 0.01, {"error": f"Evaluation error: {e}"}
93
 
94
  self.step_count += 1
95
  self.total_reward += reward
server/grader.py CHANGED
@@ -40,7 +40,7 @@ def grade_action(action: Dict[str, Any], task: Dict[str, Any]) -> Tuple[float, D
40
  else:
41
  breakdown["bug_identified"] = 0.00
42
  # No bug found → no partial credit for anything else
43
- return max(0.0, min(1.0, reward)), breakdown
44
 
45
  # ── Component 2: Bug type match (0.20) ──────────────────────────────────
46
  action_type = action.get("bug_type", "").lower().replace("-", " ").replace("_", " ")
@@ -109,7 +109,7 @@ def grade_action(action: Dict[str, Any], task: Dict[str, Any]) -> Tuple[float, D
109
  if k != "stuffing_penalty_multiplier":
110
  breakdown[k] = round(breakdown[k] * PENALTY_MULTIPLIER, 4)
111
 
112
- return max(0.0, min(1.0, round(reward, 4))), breakdown
113
 
114
  except KeyError as exc:
115
  raise RuntimeError(f"Missing mandatory schema key in task definition: {exc}") from exc
 
40
  else:
41
  breakdown["bug_identified"] = 0.00
42
  # No bug found → no partial credit for anything else
43
+ return max(0.01, min(0.99, reward)), breakdown
44
 
45
  # ── Component 2: Bug type match (0.20) ──────────────────────────────────
46
  action_type = action.get("bug_type", "").lower().replace("-", " ").replace("_", " ")
 
109
  if k != "stuffing_penalty_multiplier":
110
  breakdown[k] = round(breakdown[k] * PENALTY_MULTIPLIER, 4)
111
 
112
+ return max(0.01, min(0.99, round(reward, 4))), breakdown
113
 
114
  except KeyError as exc:
115
  raise RuntimeError(f"Missing mandatory schema key in task definition: {exc}") from exc