Siteshcodes commited on
Commit
9fecec8
·
1 Parent(s): 02c65a9

Fix: clamp all scores/rewards strictly to (0,1) exclusive range

Browse files
Files changed (3) hide show
  1. app.py +3 -1
  2. inference.py +4 -4
  3. server/app.py +4 -2
app.py CHANGED
@@ -65,6 +65,8 @@ def state():
65
  def step(request: ActionRequest):
66
  action = request.action
67
  observation, reward, done, info = env.step(action)
 
 
68
  return {
69
  "observation": {
70
  "step": observation.step,
@@ -75,7 +77,7 @@ def step(request: ActionRequest):
75
  "fix_applied": observation.fix_applied,
76
  "is_resolved": observation.is_resolved,
77
  },
78
- "reward": reward,
79
  "done": done,
80
  "info": info,
81
  }
 
65
  def step(request: ActionRequest):
66
  action = request.action
67
  observation, reward, done, info = env.step(action)
68
+ # Clamp reward to strictly (0, 1) for OpenEnv compliance
69
+ clamped_reward = min(max(reward / 20.5, 0.001), 0.999)
70
  return {
71
  "observation": {
72
  "step": observation.step,
 
77
  "fix_applied": observation.fix_applied,
78
  "is_resolved": observation.is_resolved,
79
  },
80
+ "reward": clamped_reward,
81
  "done": done,
82
  "info": info,
83
  }
inference.py CHANGED
@@ -117,7 +117,7 @@ def run_task(client: OpenAI, task_id: str) -> None:
117
  history: List[str] = []
118
  rewards: List[float] = []
119
  steps_taken = 0
120
- score = 0.0
121
  success = False
122
 
123
  log_start(task=task_id, env=BENCHMARK, model=MODEL_NAME)
@@ -149,9 +149,9 @@ def run_task(client: OpenAI, task_id: str) -> None:
149
  success = info.get("resolution") == "success"
150
  break
151
 
152
- # Compute score from actual rewards, clamped to [0, 1]
153
- score = sum(rewards) / MAX_TOTAL_REWARD if MAX_TOTAL_REWARD > 0 else 0.0
154
- score = min(max(score, 0.0), 1.0)
155
  success = score >= SUCCESS_SCORE_THRESHOLD
156
 
157
  except Exception as e:
 
117
  history: List[str] = []
118
  rewards: List[float] = []
119
  steps_taken = 0
120
+ score = 0.001
121
  success = False
122
 
123
  log_start(task=task_id, env=BENCHMARK, model=MODEL_NAME)
 
149
  success = info.get("resolution") == "success"
150
  break
151
 
152
+ # Compute score from actual rewards, clamped strictly to (0, 1)
153
+ score = sum(rewards) / MAX_TOTAL_REWARD if MAX_TOTAL_REWARD > 0 else 0.001
154
+ score = min(max(score, 0.001), 0.999)
155
  success = score >= SUCCESS_SCORE_THRESHOLD
156
 
157
  except Exception as e:
server/app.py CHANGED
@@ -65,6 +65,8 @@ def state():
65
  def step(request: ActionRequest):
66
  action = request.action
67
  observation, reward, done, info = env.step(action)
 
 
68
  return {
69
  "observation": {
70
  "step": observation.step,
@@ -75,7 +77,7 @@ def step(request: ActionRequest):
75
  "fix_applied": observation.fix_applied,
76
  "is_resolved": observation.is_resolved,
77
  },
78
- "reward": reward,
79
  "done": done,
80
  "info": info,
81
  }
@@ -121,4 +123,4 @@ def main():
121
 
122
 
123
  if __name__ == "__main__":
124
- main()
 
65
  def step(request: ActionRequest):
66
  action = request.action
67
  observation, reward, done, info = env.step(action)
68
+ # Clamp reward to strictly (0, 1) for OpenEnv compliance
69
+ clamped_reward = min(max(reward / 20.5, 0.001), 0.999)
70
  return {
71
  "observation": {
72
  "step": observation.step,
 
77
  "fix_applied": observation.fix_applied,
78
  "is_resolved": observation.is_resolved,
79
  },
80
+ "reward": clamped_reward,
81
  "done": done,
82
  "info": info,
83
  }
 
123
 
124
 
125
  if __name__ == "__main__":
126
+ main()