Spaces:
Running
Running
Commit ·
fe9aa5c
1
Parent(s): 3dfb5fe
Fix score aggregation: use max(rewards) for discriminative multi-turn scoring
Browse files- inference.py +3 -3
- server/app.py +3 -3
inference.py
CHANGED
|
@@ -301,9 +301,9 @@ def run_task(client: OpenAI, task_id: str) -> float:
|
|
| 301 |
if done:
|
| 302 |
break
|
| 303 |
|
| 304 |
-
#
|
| 305 |
-
|
| 306 |
-
score = round(min(max(
|
| 307 |
success = score > 0.0
|
| 308 |
rewards_str = ",".join(f"{r:.2f}" for r in rewards)
|
| 309 |
|
|
|
|
| 301 |
if done:
|
| 302 |
break
|
| 303 |
|
| 304 |
+
# Best single-step reward — discriminative for multi-turn tasks
|
| 305 |
+
best_reward = max(rewards) if rewards else 0.01
|
| 306 |
+
score = round(min(max(best_reward, 0.01), 0.99), 4)
|
| 307 |
success = score > 0.0
|
| 308 |
rewards_str = ",".join(f"{r:.2f}" for r in rewards)
|
| 309 |
|
server/app.py
CHANGED
|
@@ -528,9 +528,9 @@ def _run_single_task_inline(task_id, api_base, api_key, model_id, system_prompt)
|
|
| 528 |
logs.append(msg)
|
| 529 |
yield {'type': 'log', 'level': 'info', 'msg': msg}
|
| 530 |
|
| 531 |
-
#
|
| 532 |
-
|
| 533 |
-
score = round(min(max(
|
| 534 |
success = score > 0.0
|
| 535 |
rewards_str = ','.join(f'{r:.2f}' for r in rewards)
|
| 536 |
|
|
|
|
| 528 |
logs.append(msg)
|
| 529 |
yield {'type': 'log', 'level': 'info', 'msg': msg}
|
| 530 |
|
| 531 |
+
# Best single-step reward — same logic as inference.py
|
| 532 |
+
best_reward = max(rewards) if rewards else 0.01
|
| 533 |
+
score = round(min(max(best_reward, 0.01), 0.99), 4)
|
| 534 |
success = score > 0.0
|
| 535 |
rewards_str = ','.join(f'{r:.2f}' for r in rewards)
|
| 536 |
|