Nitish commited on
Commit
c04a5c5
·
1 Parent(s): 65e5ed1

fix: clamp ALL score outputs to (0.01, 0.99) — inference.py score + environment total_reward

Browse files
Files changed (2) hide show
  1. inference.py +1 -1
  2. server/environment.py +1 -1
inference.py CHANGED
@@ -157,7 +157,7 @@ def run_task(task_id: str, task_num: int, client: OpenAI) -> dict:
157
  except Exception as exc:
158
  print(f"[ERROR] task={task_id} exception: {exc}", flush=True)
159
  finally:
160
- clamped_score = round(min(1.0, max(0.0, cumulative_reward)), 3)
161
  log_end(success=success, steps=step_num, score=clamped_score, rewards=all_rewards)
162
 
163
  return {
 
157
  except Exception as exc:
158
  print(f"[ERROR] task={task_id} exception: {exc}", flush=True)
159
  finally:
160
+ clamped_score = round(min(0.99, max(0.01, cumulative_reward)), 3)
161
  log_end(success=success, steps=step_num, score=clamped_score, rewards=all_rewards)
162
 
163
  return {
server/environment.py CHANGED
@@ -113,7 +113,7 @@ class CodeSecurityEnv:
113
  task_id=current_id,
114
  step=self.step_count,
115
  done=self.done,
116
- total_reward=self.total_reward,
117
  )
118
 
119
  def _make_observation(self) -> Observation:
 
113
  task_id=current_id,
114
  step=self.step_count,
115
  done=self.done,
116
+ total_reward=max(0.01, min(0.99, self.total_reward)),
117
  )
118
 
119
  def _make_observation(self) -> Observation: