Spaces:
Running
Running
Nitish commited on
Commit ·
65e5ed1
1
Parent(s): 0b0f159
fix(grader): clamp scores to (0.01, 0.99) — platform requires strictly between 0 and 1
Browse files- server/environment.py +2 -2
- server/grader.py +2 -2
server/environment.py
CHANGED
|
@@ -65,7 +65,7 @@ class CodeSecurityEnv:
|
|
| 65 |
if self.done:
|
| 66 |
return StepResult(
|
| 67 |
observation=self._make_observation(),
|
| 68 |
-
reward=0.
|
| 69 |
done=True,
|
| 70 |
info={"error": ERROR_EPISODE_COMPLETED},
|
| 71 |
)
|
|
@@ -89,7 +89,7 @@ class CodeSecurityEnv:
|
|
| 89 |
try:
|
| 90 |
reward, breakdown = grade_action(action.model_dump(), self.current_task)
|
| 91 |
except Exception as e:
|
| 92 |
-
reward, breakdown = 0.
|
| 93 |
|
| 94 |
self.step_count += 1
|
| 95 |
self.total_reward += reward
|
|
|
|
| 65 |
if self.done:
|
| 66 |
return StepResult(
|
| 67 |
observation=self._make_observation(),
|
| 68 |
+
reward=0.01,
|
| 69 |
done=True,
|
| 70 |
info={"error": ERROR_EPISODE_COMPLETED},
|
| 71 |
)
|
|
|
|
| 89 |
try:
|
| 90 |
reward, breakdown = grade_action(action.model_dump(), self.current_task)
|
| 91 |
except Exception as e:
|
| 92 |
+
reward, breakdown = 0.01, {"error": f"Evaluation error: {e}"}
|
| 93 |
|
| 94 |
self.step_count += 1
|
| 95 |
self.total_reward += reward
|
server/grader.py
CHANGED
|
@@ -40,7 +40,7 @@ def grade_action(action: Dict[str, Any], task: Dict[str, Any]) -> Tuple[float, D
|
|
| 40 |
else:
|
| 41 |
breakdown["bug_identified"] = 0.00
|
| 42 |
# No bug found → no partial credit for anything else
|
| 43 |
-
return max(0.
|
| 44 |
|
| 45 |
# ── Component 2: Bug type match (0.20) ──────────────────────────────────
|
| 46 |
action_type = action.get("bug_type", "").lower().replace("-", " ").replace("_", " ")
|
|
@@ -109,7 +109,7 @@ def grade_action(action: Dict[str, Any], task: Dict[str, Any]) -> Tuple[float, D
|
|
| 109 |
if k != "stuffing_penalty_multiplier":
|
| 110 |
breakdown[k] = round(breakdown[k] * PENALTY_MULTIPLIER, 4)
|
| 111 |
|
| 112 |
-
return max(0.
|
| 113 |
|
| 114 |
except KeyError as exc:
|
| 115 |
raise RuntimeError(f"Missing mandatory schema key in task definition: {exc}") from exc
|
|
|
|
| 40 |
else:
|
| 41 |
breakdown["bug_identified"] = 0.00
|
| 42 |
# No bug found → no partial credit for anything else
|
| 43 |
+
return max(0.01, min(0.99, reward)), breakdown
|
| 44 |
|
| 45 |
# ── Component 2: Bug type match (0.20) ──────────────────────────────────
|
| 46 |
action_type = action.get("bug_type", "").lower().replace("-", " ").replace("_", " ")
|
|
|
|
| 109 |
if k != "stuffing_penalty_multiplier":
|
| 110 |
breakdown[k] = round(breakdown[k] * PENALTY_MULTIPLIER, 4)
|
| 111 |
|
| 112 |
+
return max(0.01, min(0.99, round(reward, 4))), breakdown
|
| 113 |
|
| 114 |
except KeyError as exc:
|
| 115 |
raise RuntimeError(f"Missing mandatory schema key in task definition: {exc}") from exc
|