Spaces:
Running
Running
Nitish commited on
Commit ·
c04a5c5
1
Parent(s): 65e5ed1
fix: clamp ALL score outputs to (0.01, 0.99) — inference.py score + environment total_reward
Browse files- inference.py +1 -1
- server/environment.py +1 -1
inference.py
CHANGED
|
@@ -157,7 +157,7 @@ def run_task(task_id: str, task_num: int, client: OpenAI) -> dict:
|
|
| 157 |
except Exception as exc:
|
| 158 |
print(f"[ERROR] task={task_id} exception: {exc}", flush=True)
|
| 159 |
finally:
|
| 160 |
-
clamped_score = round(min(
|
| 161 |
log_end(success=success, steps=step_num, score=clamped_score, rewards=all_rewards)
|
| 162 |
|
| 163 |
return {
|
|
|
|
| 157 |
except Exception as exc:
|
| 158 |
print(f"[ERROR] task={task_id} exception: {exc}", flush=True)
|
| 159 |
finally:
|
| 160 |
+
clamped_score = round(min(0.99, max(0.01, cumulative_reward)), 3)
|
| 161 |
log_end(success=success, steps=step_num, score=clamped_score, rewards=all_rewards)
|
| 162 |
|
| 163 |
return {
|
server/environment.py
CHANGED
|
@@ -113,7 +113,7 @@ class CodeSecurityEnv:
|
|
| 113 |
task_id=current_id,
|
| 114 |
step=self.step_count,
|
| 115 |
done=self.done,
|
| 116 |
-
total_reward=self.total_reward,
|
| 117 |
)
|
| 118 |
|
| 119 |
def _make_observation(self) -> Observation:
|
|
|
|
| 113 |
task_id=current_id,
|
| 114 |
step=self.step_count,
|
| 115 |
done=self.done,
|
| 116 |
+
total_reward=max(0.01, min(0.99, self.total_reward)),
|
| 117 |
)
|
| 118 |
|
| 119 |
def _make_observation(self) -> Observation:
|