immortalindeed commited on
Commit
fe9aa5c
·
1 Parent(s): 3dfb5fe

Fix score aggregation: use max(rewards) for discriminative multi-turn scoring

Browse files
Files changed (2) hide show
  1. inference.py +3 -3
  2. server/app.py +3 -3
inference.py CHANGED
@@ -301,9 +301,9 @@ def run_task(client: OpenAI, task_id: str) -> float:
301
  if done:
302
  break
303
 
304
- # Clamped sumaccumulate multi-turn rewards, cap at 0.99
305
- total_reward = sum(rewards) if rewards else 0.01
306
- score = round(min(max(total_reward, 0.01), 0.99), 4)
307
  success = score > 0.0
308
  rewards_str = ",".join(f"{r:.2f}" for r in rewards)
309
 
 
301
  if done:
302
  break
303
 
304
+ # Best single-step reward discriminative for multi-turn tasks
305
+ best_reward = max(rewards) if rewards else 0.01
306
+ score = round(min(max(best_reward, 0.01), 0.99), 4)
307
  success = score > 0.0
308
  rewards_str = ",".join(f"{r:.2f}" for r in rewards)
309
 
server/app.py CHANGED
@@ -528,9 +528,9 @@ def _run_single_task_inline(task_id, api_base, api_key, model_id, system_prompt)
528
  logs.append(msg)
529
  yield {'type': 'log', 'level': 'info', 'msg': msg}
530
 
531
- # Clamped sum — same logic as inference.py
532
- total_reward = sum(rewards) if rewards else 0.01
533
- score = round(min(max(total_reward, 0.01), 0.99), 4)
534
  success = score > 0.0
535
  rewards_str = ','.join(f'{r:.2f}' for r in rewards)
536
 
 
528
  logs.append(msg)
529
  yield {'type': 'log', 'level': 'info', 'msg': msg}
530
 
531
+ # Best single-step reward — same logic as inference.py
532
+ best_reward = max(rewards) if rewards else 0.01
533
+ score = round(min(max(best_reward, 0.01), 0.99), 4)
534
  success = score > 0.0
535
  rewards_str = ','.join(f'{r:.2f}' for r in rewards)
536