rohitc1612 commited on
Commit
9604368
·
1 Parent(s): 7d727c2

fix: clamp scores strictly within open interval (0, 1) per updated evaluator rules

Browse files
Files changed (2) hide show
  1. environment.py +2 -2
  2. inference.py +5 -2
environment.py CHANGED
@@ -216,8 +216,8 @@ class VulnPatchEnv:
216
  self.done = True
217
  reward_val -= 0.2
218
 
219
- # Ensure reward is strictly between 0.0 and 1.0 per OpenEnv spec requirement
220
- reward_val = min(max(reward_val, 0.0), 1.0)
221
 
222
  return self.state(), Reward(value=reward_val), self.done, Info()
223
 
 
216
  self.done = True
217
  reward_val -= 0.2
218
 
219
+ # Clamp reward strictly within open interval (0, 1) — 0.0 and 1.0 are not allowed
220
+ reward_val = min(max(reward_val, 0.01), 0.99)
221
 
222
  return self.state(), Reward(value=reward_val), self.done, Info()
223
 
inference.py CHANGED
@@ -82,8 +82,11 @@ def run_episode(task_name: str):
82
  finally:
83
  env.close()
84
  # REQUIRED [END] line — always emitted even on exception, score to 2 decimal places
85
- score = rewards[-1] if rewards else 0.0
86
- score = min(max(score, 0.0), 1.0) # Clamp score to 0.0 - 1.0
 
 
 
87
  success_str = "true" if score >= 0.8 else "false"
88
  rewards_str = ",".join([f"{r:.2f}" for r in rewards])
89
  print(
 
82
  finally:
83
  env.close()
84
  # REQUIRED [END] line — always emitted even on exception, score to 2 decimal places
85
+ score = rewards[-1] if rewards else 0.01
86
+ score = min(
87
+ max(score, 0.01), 0.99
88
+ ) # Strictly within (0, 1) — exclusive of 0 and 1
89
+
90
  success_str = "true" if score >= 0.8 else "false"
91
  rewards_str = ",".join([f"{r:.2f}" for r in rewards])
92
  print(