havinashpatil commited on
Commit
59fd9d3
·
1 Parent(s): 82e39c9

fix: clamp reward to [0.01,0.99] so .2f never rounds to 0.00 or 1.00

Browse files
Files changed (1) hide show
  1. inference.py +3 -3
inference.py CHANGED
@@ -107,8 +107,8 @@ def run_task(task_id: str, backend: str):
107
  if error_msg == "null":
108
  error_msg = str(e).replace("\n", " ").replace("\r", "")
109
 
110
- # 3e. Clamp it
111
- reward = max(0.001, min(0.999, float(raw_reward)))
112
  rewards.append(reward)
113
 
114
  # 3f. Print [STEP] line immediately
@@ -136,7 +136,7 @@ def run_task(task_id: str, backend: str):
136
  success = any(r > 0.5 for r in rewards)
137
  success_str = "true" if success else "false"
138
  rewards_str = ",".join([f"{r:.2f}" for r in rewards])
139
- score = max(0.001, min(0.999, (sum(rewards) / len(rewards)) if rewards else 0.5))
140
  print(f"[END] success={success_str} steps={step} score={score:.2f} rewards={rewards_str}")
141
 
142
  def main():
 
107
  if error_msg == "null":
108
  error_msg = str(e).replace("\n", " ").replace("\r", "")
109
 
110
+ # 3e. Clamp it — bounds chosen so :.2f never rounds to 0.00 or 1.00
111
+ reward = max(0.01, min(0.99, float(raw_reward)))
112
  rewards.append(reward)
113
 
114
  # 3f. Print [STEP] line immediately
 
136
  success = any(r > 0.5 for r in rewards)
137
  success_str = "true" if success else "false"
138
  rewards_str = ",".join([f"{r:.2f}" for r in rewards])
139
+ score = max(0.01, min(0.99, (sum(rewards) / len(rewards)) if rewards else 0.5))
140
  print(f"[END] success={success_str} steps={step} score={score:.2f} rewards={rewards_str}")
141
 
142
  def main():