eastbrick Copilot commited on
Commit
fdd0183
·
1 Parent(s): e5868d7

Enforce exclusive score bounds in grader and inference

Browse files

Co-authored-by: Copilot <223556219+Copilot@users.noreply.github.com>

Files changed (2) hide show
  1. inference.py +5 -1
  2. server/releaseops_environment.py +5 -0
inference.py CHANGED
@@ -297,12 +297,16 @@ def run_task(llm: OpenAI, task_id: str) -> dict:
297
  break
298
 
299
  score = obs_dict.get("final_score") or 0.0
 
 
 
 
300
  success = score >= 0.5
301
 
302
  except Exception as e:
303
  print(f"[DEBUG] Task {task_id} failed with error: {e}", flush=True)
304
  success = False
305
- score = 0.0
306
  finally:
307
  log_end(success, step, score, rewards)
308
 
 
297
  break
298
 
299
  score = obs_dict.get("final_score") or 0.0
300
+ if score <= 0.0:
301
+ score = 0.001
302
+ elif score >= 1.0:
303
+ score = 0.999
304
  success = score >= 0.5
305
 
306
  except Exception as e:
307
  print(f"[DEBUG] Task {task_id} failed with error: {e}", flush=True)
308
  success = False
309
+ score = 0.001
310
  finally:
311
  log_end(success, step, score, rewards)
312
 
server/releaseops_environment.py CHANGED
@@ -881,6 +881,11 @@ class ReleaseOpsEnvironment(Environment):
881
  + 0.10 * efficiency
882
  )
883
  score = max(0.0, min(1.0, raw_score - forbidden_penalty))
 
 
 
 
 
884
 
885
  return {
886
  "score": round(score, 3),
 
881
  + 0.10 * efficiency
882
  )
883
  score = max(0.0, min(1.0, raw_score - forbidden_penalty))
884
+ # Hackathon validator requires strict bounds: 0 < score < 1
885
+ if score <= 0.0:
886
+ score = 0.001
887
+ elif score >= 1.0:
888
+ score = 0.999
889
 
890
  return {
891
  "score": round(score, 3),