ping98k commited on
Commit
c2b0ecd
·
1 Parent(s): 82471ed

Show raw score for top players

Browse files
Files changed (1) hide show
  1. main.py +17 -6
main.py CHANGED
@@ -140,6 +140,7 @@ def run_tournament(
140
  prompt_tokens = 0
141
  completion_tokens = 0
142
  score_outputs: list[str] = []
 
143
  pairwise_outputs: list[str] = []
144
  match_cache: dict[tuple[str, str], str] = {}
145
 
@@ -216,17 +217,27 @@ def run_tournament(
216
  add_usage(usage)
217
  score_outputs.append((idx, text))
218
  data = _parse_verdict(text)
 
219
  if "scores" in data and isinstance(data["scores"], list):
220
- vals = data["scores"]
221
- return sum(vals) / len(vals) if vals else 0.0
222
- return float(data.get("score", 0))
 
 
 
 
 
 
 
223
 
224
  yield from log("Histogram generating")
225
  with ThreadPoolExecutor(max_workers=max_workers) as ex:
226
  prog = SimpleProgress(len(all_players), "Scoring")
227
  scores = {}
228
- for (idx, p), s in zip(players_with_ids, ex.map(score, players_with_ids)):
229
- scores[p] = s
 
 
230
  yield from log(prog.step())
231
  hist_fig = plt.figure()
232
  plt.hist(list(scores.values()), bins=10)
@@ -301,7 +312,7 @@ def run_tournament(
301
  for i, txt in enumerate(pairwise_outputs, 1):
302
  yield from log_completion(f"Pairwise completion {i}: ", txt)
303
  top_picks_str = "\n\n\n=====================================================\n\n\n".join(
304
- f"{p}\nElo: {rating[p]:.1f}" for p in top_k
305
  )
306
  else:
307
  top_k = top_players[:num_top_picks]
 
140
  prompt_tokens = 0
141
  completion_tokens = 0
142
  score_outputs: list[str] = []
143
+ raw_scores: dict[str, list] = {}
144
  pairwise_outputs: list[str] = []
145
  match_cache: dict[tuple[str, str], str] = {}
146
 
 
217
  add_usage(usage)
218
  score_outputs.append((idx, text))
219
  data = _parse_verdict(text)
220
+ raw_vals = None
221
  if "scores" in data and isinstance(data["scores"], list):
222
+ raw_vals = data["scores"]
223
+ avg = sum(raw_vals) / len(raw_vals) if raw_vals else 0.0
224
+ else:
225
+ try:
226
+ avg = float(data.get("score", 0))
227
+ raw_vals = [avg]
228
+ except Exception:
229
+ avg = 0.0
230
+ raw_vals = None
231
+ return avg, raw_vals
232
 
233
  yield from log("Histogram generating")
234
  with ThreadPoolExecutor(max_workers=max_workers) as ex:
235
  prog = SimpleProgress(len(all_players), "Scoring")
236
  scores = {}
237
+ for (idx, p), (s_val, raw_val) in zip(players_with_ids, ex.map(score, players_with_ids)):
238
+ scores[p] = s_val
239
+ if raw_val is not None:
240
+ raw_scores[p] = raw_val
241
  yield from log(prog.step())
242
  hist_fig = plt.figure()
243
  plt.hist(list(scores.values()), bins=10)
 
312
  for i, txt in enumerate(pairwise_outputs, 1):
313
  yield from log_completion(f"Pairwise completion {i}: ", txt)
314
  top_picks_str = "\n\n\n=====================================================\n\n\n".join(
315
+ f"{p}\nElo: {rating[p]:.1f}" + (f"\nScore: {raw_scores.get(p)}" if p in raw_scores else "") for p in top_k
316
  )
317
  else:
318
  top_k = top_players[:num_top_picks]