Commit
·
57cd1b0
1
Parent(s):
a05574f
Guard best_cycles with correctness
Browse files
app.py
CHANGED
|
@@ -483,8 +483,9 @@ def perf_takehome_reward_fn(completions, prompts=None, **kwargs):
|
|
| 483 |
if code.strip() in EXAMPLE_CODE_SET:
|
| 484 |
reward = max(0.0, reward - COPY_PENALTY)
|
| 485 |
cycles = result.get("cycles")
|
|
|
|
| 486 |
with state_lock:
|
| 487 |
-
if isinstance(cycles, int) and cycles < training_state["best_cycles"]:
|
| 488 |
training_state["best_cycles"] = cycles
|
| 489 |
training_state["best_code"] = code
|
| 490 |
rewards.append(float(reward))
|
|
|
|
| 483 |
if code.strip() in EXAMPLE_CODE_SET:
|
| 484 |
reward = max(0.0, reward - COPY_PENALTY)
|
| 485 |
cycles = result.get("cycles")
|
| 486 |
+
correctness = result.get("correctness", 0.0)
|
| 487 |
with state_lock:
|
| 488 |
+
if isinstance(cycles, int) and isinstance(correctness, (int, float)) and correctness > 0 and cycles < training_state["best_cycles"]:
|
| 489 |
training_state["best_cycles"] = cycles
|
| 490 |
training_state["best_code"] = code
|
| 491 |
rewards.append(float(reward))
|