CreativeEngineer commited on
Commit
57cd1b0
·
1 Parent(s): a05574f

Guard best_cycles with correctness

Browse files
Files changed (1) hide show
  1. app.py +2 -1
app.py CHANGED
@@ -483,8 +483,9 @@ def perf_takehome_reward_fn(completions, prompts=None, **kwargs):
483
  if code.strip() in EXAMPLE_CODE_SET:
484
  reward = max(0.0, reward - COPY_PENALTY)
485
  cycles = result.get("cycles")
 
486
  with state_lock:
487
- if isinstance(cycles, int) and cycles < training_state["best_cycles"]:
488
  training_state["best_cycles"] = cycles
489
  training_state["best_code"] = code
490
  rewards.append(float(reward))
 
483
  if code.strip() in EXAMPLE_CODE_SET:
484
  reward = max(0.0, reward - COPY_PENALTY)
485
  cycles = result.get("cycles")
486
+ correctness = result.get("correctness", 0.0)
487
  with state_lock:
488
+ if isinstance(cycles, int) and isinstance(correctness, (int, float)) and correctness > 0 and cycles < training_state["best_cycles"]:
489
  training_state["best_cycles"] = cycles
490
  training_state["best_code"] = code
491
  rewards.append(float(reward))