junaid0600 commited on
Commit
7dff36b
·
1 Parent(s): 11dd1d6

Fix rewards never exactly 0.0 or 1.0 using proper normalization

Browse files
Files changed (1) hide show
  1. inference.py +3 -2
inference.py CHANGED
@@ -228,7 +228,8 @@ def run_episode(client: OpenAI, difficulty: str, task_id: str) -> dict:
228
  # Score strictly between 0 and 1 exclusive
229
  # Score strictly between 0 and 1 exclusive — never 0.0 or 1.0
230
  if rewards:
231
- raw_score = sum(rewards) / len(rewards)
 
232
  else:
233
  raw_score = 0.5
234
 
@@ -242,7 +243,7 @@ def run_episode(client: OpenAI, difficulty: str, task_id: str) -> dict:
242
 
243
  finally:
244
  # Ensure rewards list for log_end is never empty
245
- safe_rewards = rewards if rewards else [0.5]
246
  log_end(
247
  success = success,
248
  steps = steps,
 
228
  # Score strictly between 0 and 1 exclusive
229
  # Score strictly between 0 and 1 exclusive — never 0.0 or 1.0
230
  if rewards:
231
+ shifted = [max(0.01, min(0.99, (r + 1.0) / 2.0)) for r in rewards]
232
+ raw_score = sum(shifted) / len(shifted)
233
  else:
234
  raw_score = 0.5
235
 
 
243
 
244
  finally:
245
  # Ensure rewards list for log_end is never empty
246
+ safe_rewards = [max(0.01, min(0.99, (r + 1.0) / 2.0)) for r in rewards] if rewards else [0.5]
247
  log_end(
248
  success = success,
249
  steps = steps,