3v324v23 commited on
Commit
030cdd8
·
1 Parent(s): 929006e

Clamp scores strictly between 0.01 and 0.99 for Scaler grader

Browse files
Files changed (1) hide show
  1. inference.py +10 -6
inference.py CHANGED
@@ -85,7 +85,8 @@ def main() -> None:
85
  res = requests.post(f"{ENV_URL}/reset", json={"task_id": task_id})
86
  if res.status_code != 200:
87
  log_step(step=1, action="reset_failed", reward=0.0, done=True, error=f"HTTP {res.status_code}")
88
- log_end(success=False, steps=0, score=0.0, rewards=[])
 
89
  continue
90
 
91
  data = res.json()
@@ -111,15 +112,18 @@ def main() -> None:
111
  rewards.append(reward)
112
  log_step(step=steps_taken, action=action_str, reward=reward, done=done, error=None)
113
 
114
- # Calculate final score (Clamp between 0 and 1)
115
- score = min(max(sum(rewards), 0.0), 1.0)
116
- success = score > 0.0
 
 
117
 
118
  log_end(success=success, steps=steps_taken, score=score, rewards=rewards)
119
 
120
  except Exception as e:
121
  log_step(step=steps_taken+1, action="exception", reward=0.0, done=True, error=str(e).replace("\n", " "))
122
- log_end(success=False, steps=steps_taken, score=0.0, rewards=rewards)
 
123
 
124
  if __name__ == "__main__":
125
- main()
 
85
  res = requests.post(f"{ENV_URL}/reset", json={"task_id": task_id})
86
  if res.status_code != 200:
87
  log_step(step=1, action="reset_failed", reward=0.0, done=True, error=f"HTTP {res.status_code}")
88
+ # Forced score to 0.01 instead of 0.0
89
+ log_end(success=False, steps=0, score=0.01, rewards=[])
90
  continue
91
 
92
  data = res.json()
 
112
  rewards.append(reward)
113
  log_step(step=steps_taken, action=action_str, reward=reward, done=done, error=None)
114
 
115
+ # --- THE FIX IS HERE ---
116
+ # Calculate final score and forcefully clamp it strictly between 0.01 and 0.99
117
+ raw_score = sum(rewards)
118
+ score = min(max(raw_score, 0.01), 0.99)
119
+ success = score > 0.01
120
 
121
  log_end(success=success, steps=steps_taken, score=score, rewards=rewards)
122
 
123
  except Exception as e:
124
  log_step(step=steps_taken+1, action="exception", reward=0.0, done=True, error=str(e).replace("\n", " "))
125
+ # Forced score to 0.01 instead of 0.0
126
+ log_end(success=False, steps=steps_taken, score=0.01, rewards=rewards)
127
 
128
  if __name__ == "__main__":
129
+ main()