Commit
·
a05574f
1
Parent(s):
d9908db
Fix best_cycles update
Browse files
app.py
CHANGED
|
@@ -482,11 +482,11 @@ def perf_takehome_reward_fn(completions, prompts=None, **kwargs):
|
|
| 482 |
reward += EXEC_REWARD
|
| 483 |
if code.strip() in EXAMPLE_CODE_SET:
|
| 484 |
reward = max(0.0, reward - COPY_PENALTY)
|
| 485 |
-
|
| 486 |
-
|
| 487 |
-
|
| 488 |
-
|
| 489 |
-
|
| 490 |
rewards.append(float(reward))
|
| 491 |
return rewards
|
| 492 |
|
|
|
|
| 482 |
reward += EXEC_REWARD
|
| 483 |
if code.strip() in EXAMPLE_CODE_SET:
|
| 484 |
reward = max(0.0, reward - COPY_PENALTY)
|
| 485 |
+
cycles = result.get("cycles")
|
| 486 |
+
with state_lock:
|
| 487 |
+
if isinstance(cycles, int) and cycles < training_state["best_cycles"]:
|
| 488 |
+
training_state["best_cycles"] = cycles
|
| 489 |
+
training_state["best_code"] = code
|
| 490 |
rewards.append(float(reward))
|
| 491 |
return rewards
|
| 492 |
|