Spaces:

inmodel
/

code-review-env

Sleeping

Nitish commited on Apr 8

Commit

65e5ed1

1 Parent(s): 0b0f159

fix(grader): clamp scores to (0.01, 0.99) — platform requires strictly between 0 and 1

Files changed (2) hide show

server/environment.py CHANGED Viewed

@@ -65,7 +65,7 @@ class CodeSecurityEnv:
         if self.done:
             return StepResult(
                 observation=self._make_observation(),
-                reward=0.0,
                 done=True,
                 info={"error": ERROR_EPISODE_COMPLETED},
             )
@@ -89,7 +89,7 @@ class CodeSecurityEnv:
         try:
             reward, breakdown = grade_action(action.model_dump(), self.current_task)
         except Exception as e:
-            reward, breakdown = 0.0, {"error": f"Evaluation error: {e}"}
         self.step_count += 1
         self.total_reward += reward

         if self.done:
             return StepResult(
                 observation=self._make_observation(),
+                reward=0.01,
                 done=True,
                 info={"error": ERROR_EPISODE_COMPLETED},
             )
         try:
             reward, breakdown = grade_action(action.model_dump(), self.current_task)
         except Exception as e:
+            reward, breakdown = 0.01, {"error": f"Evaluation error: {e}"}
         self.step_count += 1
         self.total_reward += reward

server/grader.py CHANGED Viewed

@@ -40,7 +40,7 @@ def grade_action(action: Dict[str, Any], task: Dict[str, Any]) -> Tuple[float, D
         else:
             breakdown["bug_identified"] = 0.00
             # No bug found → no partial credit for anything else
-            return max(0.0, min(1.0, reward)), breakdown
         # ── Component 2: Bug type match (0.20) ──────────────────────────────────
         action_type = action.get("bug_type", "").lower().replace("-", " ").replace("_", " ")
@@ -109,7 +109,7 @@ def grade_action(action: Dict[str, Any], task: Dict[str, Any]) -> Tuple[float, D
                 if k != "stuffing_penalty_multiplier":
                     breakdown[k] = round(breakdown[k] * PENALTY_MULTIPLIER, 4)
-        return max(0.0, min(1.0, round(reward, 4))), breakdown
     except KeyError as exc:
         raise RuntimeError(f"Missing mandatory schema key in task definition: {exc}") from exc

         else:
             breakdown["bug_identified"] = 0.00
             # No bug found → no partial credit for anything else
+            return max(0.01, min(0.99, reward)), breakdown
         # ── Component 2: Bug type match (0.20) ──────────────────────────────────
         action_type = action.get("bug_type", "").lower().replace("-", " ").replace("_", " ")
                 if k != "stuffing_penalty_multiplier":
                     breakdown[k] = round(breakdown[k] * PENALTY_MULTIPLIER, 4)
+        return max(0.01, min(0.99, round(reward, 4))), breakdown
     except KeyError as exc:
         raise RuntimeError(f"Missing mandatory schema key in task definition: {exc}") from exc