jtowarek commited on
Commit
c104a11
·
verified ·
1 Parent(s): 89a1eb2

Upload folder using huggingface_hub

Browse files
bench/gradio_app/app.py CHANGED
@@ -3,15 +3,15 @@ from __future__ import annotations
3
  import gradio as gr
4
 
5
  from registry import (
6
- _ZERO, _ONE, _TWO, _FIVE, _TEN,
7
  _GAME_INFO, _CATEGORY_DIMS, _ALL_FILTER,
8
  _HUMAN_VARIANTS, _HAS_VARIANTS,
9
  _strategies_for_game,
10
  _MP_FILTERS, _MP_FILTER_ALL,
11
- _HAS_LLM_AGENT, _HAS_OAUTH,
12
  _LLM_PROVIDERS, _LLM_MODELS, _LLM_OPPONENT_LABEL,
13
  )
14
- from llm_arena import run_tournament, render_tournament
15
  from callbacks import (
16
  _get_game_info, _blank, _render,
17
  play_round, reset_game, on_game_change,
@@ -38,7 +38,6 @@ _init_player_label = f"Players: {_init_np}" if _init_np > _TWO else "Two-Player"
38
  # -- Infinite mode preset --
39
  _INF_GAME = "Discounted Prisoner's Dilemma"
40
  _INF_VARIANTS = ["constitutional", "exit", "noisy_payoffs", "noisy_actions"]
41
- _INF_ROUNDS = _TEN * _TEN * _TEN
42
  _ALL_LLM_MODELS = []
43
  for _mods in _LLM_MODELS.values():
44
  _ALL_LLM_MODELS.extend(_mods)
@@ -70,17 +69,11 @@ with gr.Blocks(title="Kant Demo") as demo:
70
  label="Model",
71
  )
72
  with gr.Row(visible=False) as api_key_row:
73
- if _HAS_OAUTH:
74
- api_key_input = gr.Textbox(
75
- label="API Key (optional — OAuth tokens available)",
76
- type="password",
77
- placeholder="Leave blank to use built-in OAuth tokens",
78
- )
79
- else:
80
- api_key_input = gr.Textbox(
81
- label="API Key", type="password",
82
- placeholder="Enter your Anthropic or OpenAI API key",
83
- )
84
 
85
  if _HUMAN_VARIANTS:
86
  variant_cb = gr.CheckboxGroup(
@@ -120,19 +113,13 @@ with gr.Blocks(title="Kant Demo") as demo:
120
 
121
  if _INF_GAME in _GAME_INFO and _HAS_VARIANTS and _ALL_LLM_MODELS:
122
  with gr.TabItem("Infinite Mode"):
123
- _arena_desc = (
124
- "**LLM Tournament: Constitutional Discounted PD.** "
125
- "Select models and watch them compete "
126
- "in a round-robin. Each match uses constitutional rule "
127
- "negotiation, exit option, payoff noise, and action trembles."
128
- ) if _HAS_OAUTH else (
129
- "**LLM Tournament: Constitutional Discounted PD.** "
130
- "Select models, provide API keys, and watch them compete "
131
- "in a round-robin. Each match uses constitutional rule "
132
- "negotiation, exit option, payoff noise, and action trembles."
133
  )
134
- gr.Markdown(_arena_desc)
135
- with gr.Row(visible=not _HAS_OAUTH):
136
  arena_anthro_key = gr.Textbox(
137
  label="Anthropic API Key", type="password",
138
  placeholder="sk-ant-...")
@@ -143,26 +130,21 @@ with gr.Blocks(title="Kant Demo") as demo:
143
  _ALL_LLM_MODELS, value=_ALL_LLM_MODELS[:_TWO],
144
  label="Select Models for Tournament")
145
  with gr.Row():
146
- arena_rounds = gr.Number(
147
- value=_INF_ROUNDS, precision=_ZERO,
148
- label="Rounds per Match (no upper limit)")
149
- arena_run = gr.Button("Run Tournament", variant="primary")
150
- arena_results = gr.State([])
151
- arena_md = gr.Markdown("Select models and click Run Tournament.")
152
 
153
- def _run_arena(models, anthro_key, openai_key, num_rounds):
154
- results, err = run_tournament(
155
- _INF_GAME, _INF_VARIANTS, int(num_rounds),
156
- models, anthro_key, openai_key)
157
- if err:
158
- return [], err
159
- return results, render_tournament(results)
160
 
161
- arena_run.click(
162
- _run_arena,
163
- inputs=[arena_models, arena_anthro_key,
164
- arena_openai_key, arena_rounds],
165
- outputs=[arena_results, arena_md])
166
 
167
  with gr.TabItem("Game Theory Reference"):
168
  gr.Markdown(value=_build_reference_md())
 
3
  import gradio as gr
4
 
5
  from registry import (
6
+ _ZERO, _ONE, _TWO, _TEN,
7
  _GAME_INFO, _CATEGORY_DIMS, _ALL_FILTER,
8
  _HUMAN_VARIANTS, _HAS_VARIANTS,
9
  _strategies_for_game,
10
  _MP_FILTERS, _MP_FILTER_ALL,
11
+ _HAS_LLM_AGENT, _HAS_ENV_KEYS,
12
  _LLM_PROVIDERS, _LLM_MODELS, _LLM_OPPONENT_LABEL,
13
  )
14
+ from llm_arena import run_infinite_tournament
15
  from callbacks import (
16
  _get_game_info, _blank, _render,
17
  play_round, reset_game, on_game_change,
 
38
  # -- Infinite mode preset --
39
  _INF_GAME = "Discounted Prisoner's Dilemma"
40
  _INF_VARIANTS = ["constitutional", "exit", "noisy_payoffs", "noisy_actions"]
 
41
  _ALL_LLM_MODELS = []
42
  for _mods in _LLM_MODELS.values():
43
  _ALL_LLM_MODELS.extend(_mods)
 
69
  label="Model",
70
  )
71
  with gr.Row(visible=False) as api_key_row:
72
+ api_key_input = gr.Textbox(
73
+ label="API Key (optional)" if _HAS_ENV_KEYS else "API Key",
74
+ type="password",
75
+ placeholder="Leave blank to use server key" if _HAS_ENV_KEYS else "Enter your Anthropic or OpenAI API key",
76
+ )
 
 
 
 
 
 
77
 
78
  if _HUMAN_VARIANTS:
79
  variant_cb = gr.CheckboxGroup(
 
113
 
114
  if _INF_GAME in _GAME_INFO and _HAS_VARIANTS and _ALL_LLM_MODELS:
115
  with gr.TabItem("Infinite Mode"):
116
+ gr.Markdown(
117
+ "**Infinite LLM Tournament.** "
118
+ "Models compete in an endless round-robin Constitutional "
119
+ "Discounted PD with rule negotiation, exit option, payoff "
120
+ "noise, and action trembles. Runs forever until you stop it."
 
 
 
 
 
121
  )
122
+ with gr.Row(visible=not _HAS_ENV_KEYS):
 
123
  arena_anthro_key = gr.Textbox(
124
  label="Anthropic API Key", type="password",
125
  placeholder="sk-ant-...")
 
130
  _ALL_LLM_MODELS, value=_ALL_LLM_MODELS[:_TWO],
131
  label="Select Models for Tournament")
132
  with gr.Row():
133
+ arena_start = gr.Button("Start", variant="primary")
134
+ arena_stop = gr.Button("Stop", variant="stop")
135
+ arena_md = gr.Markdown("Select models and click Start.")
 
 
 
136
 
137
+ def _run_infinite(models, anthro_key, openai_key):
138
+ for md in run_infinite_tournament(
139
+ _INF_GAME, _INF_VARIANTS,
140
+ models, anthro_key, openai_key):
141
+ yield md
 
 
142
 
143
+ start_event = arena_start.click(
144
+ _run_infinite,
145
+ inputs=[arena_models, arena_anthro_key, arena_openai_key],
146
+ outputs=[arena_md])
147
+ arena_stop.click(None, cancels=[start_event])
148
 
149
  with gr.TabItem("Game Theory Reference"):
150
  gr.Markdown(value=_build_reference_md())
bench/gradio_app/callbacks.py CHANGED
@@ -17,7 +17,7 @@ from registry import (
17
  NPlayerEnvironment, NPlayerAction,
18
  PromptBuilder, parse_action, GameObservation, RoundResult,
19
  _SYS_PROMPT, _LLM_OPPONENT_LABEL, _LLM_MODELS,
20
- _HAS_OAUTH, get_oauth_token,
21
  )
22
 
23
 
@@ -80,10 +80,10 @@ def _render(st):
80
 
81
 
82
  def _resolve_api_key(provider, api_key):
83
- """Return an API key: use provided key, or fall back to OAuth."""
84
  if api_key and api_key.strip():
85
  return api_key.strip()
86
- return get_oauth_token(provider)
87
 
88
 
89
  def _llm_choose_action(state, info, provider, model, api_key):
@@ -236,8 +236,7 @@ def on_game_select_variant(gname):
236
 
237
  def on_strategy_change(sname):
238
  is_llm = sname == _LLM_OPPONENT_LABEL
239
- show_key = is_llm and not _HAS_OAUTH
240
- return gr.update(visible=is_llm), gr.update(visible=show_key)
241
 
242
 
243
  def on_provider_change(provider):
 
17
  NPlayerEnvironment, NPlayerAction,
18
  PromptBuilder, parse_action, GameObservation, RoundResult,
19
  _SYS_PROMPT, _LLM_OPPONENT_LABEL, _LLM_MODELS,
20
+ _HAS_ENV_KEYS, get_env_api_key,
21
  )
22
 
23
 
 
80
 
81
 
82
  def _resolve_api_key(provider, api_key):
83
+ """Return an API key: use provided key, or fall back to env var."""
84
  if api_key and api_key.strip():
85
  return api_key.strip()
86
+ return get_env_api_key(provider)
87
 
88
 
89
  def _llm_choose_action(state, info, provider, model, api_key):
 
236
 
237
  def on_strategy_change(sname):
238
  is_llm = sname == _LLM_OPPONENT_LABEL
239
+ return gr.update(visible=is_llm), gr.update(visible=is_llm)
 
240
 
241
 
242
  def on_provider_change(provider):
bench/gradio_app/llm_arena.py CHANGED
@@ -1,4 +1,4 @@
1
- """LLM Arena -- spectator round-robin for Infinite Mode."""
2
  from __future__ import annotations
3
  import random as _rand
4
 
@@ -6,16 +6,20 @@ from registry import (
6
  _ZERO, _ONE, _TWO, _TEN,
7
  _HAS_LLM_AGENT, _LLM_MODELS,
8
  PromptBuilder, parse_action, GameObservation, RoundResult,
9
- _SYS_PROMPT, get_oauth_token,
10
  )
11
  from callbacks import _get_game_info
12
 
13
  _MAX_TOKENS = _TEN + _TEN
14
  _DETAIL_LIMIT = _TEN + _TEN
 
 
15
 
16
- _HDR_MATCH = f"| Match | Player {_ONE} | Player {_TWO} | P{_ONE} Score | P{_TWO} Score | Winner |"
 
17
  _SEP_MATCH = "|-------|----------|----------|----------|----------|--------|"
18
- _HDR_ROUND = f"| Round | P{_ONE} Action | P{_TWO} Action | P{_ONE} Pay | P{_TWO} Pay |"
 
19
  _SEP_ROUND = "|-------|-----------|-----------|--------|--------|"
20
 
21
 
@@ -39,18 +43,19 @@ def _call_llm(provider, model, prompt, api_key):
39
  return ""
40
 
41
 
42
- def _build_obs(info, p_hist, o_hist, rnd, total, p_score, o_score):
43
- """Build GameObservation for one player."""
44
  history = []
45
- for ph, oh in zip(p_hist, o_hist):
46
  history.append(RoundResult(
47
  round_number=ph["round"],
48
  player_action=ph["action"], opponent_action=oh["action"],
49
  player_payoff=ph["payoff"], opponent_payoff=oh["payoff"]))
50
  return GameObservation(
51
- game_name=info.get("key", ""), game_description=info.get("description", ""),
 
52
  available_actions=info["actions"], current_round=rnd,
53
- total_rounds=total, history=history,
54
  player_score=p_score, opponent_score=o_score,
55
  opponent_strategy="llm")
56
 
@@ -63,110 +68,120 @@ def _model_provider(model_name):
63
  return "Anthropic"
64
 
65
 
66
- def run_match(game_name, variants, num_rounds,
67
- p1_prov, p1_model, p1_key, p2_prov, p2_model, p2_key):
68
- """Run a full match between two LLMs. Returns result dict."""
69
- if not _HAS_LLM_AGENT:
70
- return {"error": "LLM agent not available"}
71
- info = _get_game_info(game_name, variants)
72
- if not info:
73
- return {"error": f"Game not found: {game_name}"}
74
- actions = info["actions"]
75
- p1_hist, p2_hist = [], []
76
- p1_score, p2_score = float(), float()
77
- rounds = []
78
- for rnd in range(_ONE, num_rounds + _ONE):
79
- obs1 = _build_obs(info, p1_hist, p2_hist, rnd, num_rounds, p1_score, p2_score)
80
- obs2 = _build_obs(info, p2_hist, p1_hist, rnd, num_rounds, p2_score, p1_score)
81
- prompt1, prompt2 = PromptBuilder.build(obs1), PromptBuilder.build(obs2)
82
- try:
83
- raw1 = _call_llm(p1_prov, p1_model, prompt1, p1_key)
84
- act1 = parse_action(raw1, actions)
85
- except Exception as exc:
86
- act1, raw1 = _rand.choice(actions), f"ERROR: {exc}"
87
- try:
88
- raw2 = _call_llm(p2_prov, p2_model, prompt2, p2_key)
89
- act2 = parse_action(raw2, actions)
90
- except Exception as exc:
91
- act2, raw2 = _rand.choice(actions), f"ERROR: {exc}"
92
- p1_pay, p2_pay = info["payoff_fn"](act1, act2)
93
- p1_score += p1_pay
94
- p2_score += p2_pay
95
- p1_hist.append({"round": rnd, "action": act1, "payoff": p1_pay})
96
- p2_hist.append({"round": rnd, "action": act2, "payoff": p2_pay})
97
- rounds.append({"round": rnd, "p1_action": act1, "p2_action": act2,
98
- "p1_pay": p1_pay, "p2_pay": p2_pay,
99
- "p1_raw": raw1.strip(), "p2_raw": raw2.strip()})
100
- return {"p1": f"{p1_prov}/{p1_model}", "p2": f"{p2_prov}/{p2_model}",
101
- "p1_score": p1_score, "p2_score": p2_score,
102
- "rounds": rounds, "total_rounds": num_rounds}
103
-
104
-
105
  def _resolve_key(provider, manual_key):
106
- """Use manual key if provided, otherwise try OAuth."""
107
  if manual_key and manual_key.strip():
108
  return manual_key.strip()
109
- return get_oauth_token(provider)
110
 
111
 
112
- def run_tournament(game_name, variants, num_rounds, models,
113
- anthropic_key, openai_key):
114
- """Run round-robin tournament between selected models."""
115
- if len(models) < _TWO:
116
- return [], "Select at least two models."
117
- results = []
118
  for i in range(len(models)):
119
  for j in range(i + _ONE, len(models)):
120
  p1, p2 = models[i], models[j]
121
  p1_prov, p2_prov = _model_provider(p1), _model_provider(p2)
122
- p1_key = _resolve_key(p1_prov, anthropic_key if p1_prov == "Anthropic" else openai_key)
123
- p2_key = _resolve_key(p2_prov, anthropic_key if p2_prov == "Anthropic" else openai_key)
 
 
124
  if not p1_key or not p2_key:
125
- results.append({"error": "No OAuth token or API key available"})
126
  continue
127
- result = run_match(game_name, variants, num_rounds,
128
- p1_prov, p1, p1_key, p2_prov, p2, p2_key)
129
- results.append(result)
130
- return results, ""
131
-
132
-
133
- def render_tournament(results):
134
- """Render tournament results as markdown."""
135
- if not results:
136
- return "No results yet. Select models and run the tournament."
137
- lines = ["## Tournament Results\n", _HDR_MATCH, _SEP_MATCH]
138
- for i, r in enumerate(results):
139
- if "error" in r:
140
- lines.append(f"| {i + _ONE} | - | - | - | - | Error: {r['error']} |")
141
- continue
142
- winner = r["p1"] if r["p1_score"] > r["p2_score"] else (
143
- r["p2"] if r["p2_score"] > r["p1_score"] else "Draw")
144
- lines.append(f"| {i + _ONE} | {r['p1']} | {r['p2']} | "
145
- f"{r['p1_score']:.1f} | {r['p2_score']:.1f} | {winner} |")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
146
  scores = {}
147
- for r in results:
148
- if "error" in r:
149
- continue
150
- scores.setdefault(r["p1"], float())
151
- scores.setdefault(r["p2"], float())
152
- scores[r["p1"]] += r["p1_score"]
153
- scores[r["p2"]] += r["p2_score"]
154
- if scores:
155
- lines.extend(["\n## Leaderboard\n",
156
- "| Rank | Model | Total Score |",
157
- "|------|-------|-------------|"])
158
- for rank, (model, score) in enumerate(
159
- sorted(scores.items(), key=lambda x: -x[_ONE])):
160
- lines.append(f"| {rank + _ONE} | {model} | {score:.1f} |")
161
- for i, r in enumerate(results):
162
- if "error" in r or not r.get("rounds"):
163
- continue
164
- lines.extend([f"\n### Match {i + _ONE}: {r['p1']} vs {r['p2']}\n",
165
- _HDR_ROUND, _SEP_ROUND])
166
- for rd in r["rounds"][:_DETAIL_LIMIT]:
167
- lines.append(f"| {rd['round']} | {rd['p1_action']} | {rd['p2_action']} | "
168
- f"{rd['p1_pay']:.1f} | {rd['p2_pay']:.1f} |")
169
- if len(r["rounds"]) > _DETAIL_LIMIT:
170
- remaining = len(r["rounds"]) - _DETAIL_LIMIT
171
- lines.append(f"| ... | ({remaining} more rounds) | ... | ... | ... |")
 
 
 
172
  return "\n".join(lines)
 
1
+ """LLM Arena -- infinite spectator tournament."""
2
  from __future__ import annotations
3
  import random as _rand
4
 
 
6
  _ZERO, _ONE, _TWO, _TEN,
7
  _HAS_LLM_AGENT, _LLM_MODELS,
8
  PromptBuilder, parse_action, GameObservation, RoundResult,
9
+ _SYS_PROMPT, get_env_api_key,
10
  )
11
  from callbacks import _get_game_info
12
 
13
  _MAX_TOKENS = _TEN + _TEN
14
  _DETAIL_LIMIT = _TEN + _TEN
15
+ _HISTORY_WINDOW = _TEN * _TEN
16
+ _INF_HORIZON = _TEN * _TEN * _TEN * _TEN
17
 
18
+ _HDR_MATCH = (f"| Match | Player {_ONE} | Player {_TWO} "
19
+ f"| P{_ONE} Score | P{_TWO} Score | Leader |")
20
  _SEP_MATCH = "|-------|----------|----------|----------|----------|--------|"
21
+ _HDR_ROUND = (f"| Round | P{_ONE} Action | P{_TWO} Action "
22
+ f"| P{_ONE} Pay | P{_TWO} Pay |")
23
  _SEP_ROUND = "|-------|-----------|-----------|--------|--------|"
24
 
25
 
 
43
  return ""
44
 
45
 
46
+ def _build_obs(info, p_hist, o_hist, rnd, p_score, o_score):
47
+ """Build GameObservation for one player in infinite mode."""
48
  history = []
49
+ for ph, oh in zip(p_hist[-_HISTORY_WINDOW:], o_hist[-_HISTORY_WINDOW:]):
50
  history.append(RoundResult(
51
  round_number=ph["round"],
52
  player_action=ph["action"], opponent_action=oh["action"],
53
  player_payoff=ph["payoff"], opponent_payoff=oh["payoff"]))
54
  return GameObservation(
55
+ game_name=info.get("key", ""),
56
+ game_description=info.get("description", ""),
57
  available_actions=info["actions"], current_round=rnd,
58
+ total_rounds=_INF_HORIZON, history=history,
59
  player_score=p_score, opponent_score=o_score,
60
  opponent_strategy="llm")
61
 
 
68
  return "Anthropic"
69
 
70
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
71
  def _resolve_key(provider, manual_key):
72
+ """Use manual key if provided, otherwise try env var."""
73
  if manual_key and manual_key.strip():
74
  return manual_key.strip()
75
+ return get_env_api_key(provider)
76
 
77
 
78
+ def _init_matchups(models, anthropic_key, openai_key):
79
+ """Build initial matchup state for all pairs."""
80
+ matchups = []
 
 
 
81
  for i in range(len(models)):
82
  for j in range(i + _ONE, len(models)):
83
  p1, p2 = models[i], models[j]
84
  p1_prov, p2_prov = _model_provider(p1), _model_provider(p2)
85
+ p1_key = _resolve_key(p1_prov,
86
+ anthropic_key if p1_prov == "Anthropic" else openai_key)
87
+ p2_key = _resolve_key(p2_prov,
88
+ anthropic_key if p2_prov == "Anthropic" else openai_key)
89
  if not p1_key or not p2_key:
 
90
  continue
91
+ matchups.append({
92
+ "p1_label": f"{p1_prov}/{p1}", "p2_label": f"{p2_prov}/{p2}",
93
+ "p1_prov": p1_prov, "p1_model": p1, "p1_key": p1_key,
94
+ "p2_prov": p2_prov, "p2_model": p2, "p2_key": p2_key,
95
+ "p1_hist": [], "p2_hist": [],
96
+ "p1_score": float(), "p2_score": float(),
97
+ "recent": [],
98
+ })
99
+ return matchups
100
+
101
+
102
+ def run_infinite_tournament(game_name, variants, models,
103
+ anthropic_key, openai_key):
104
+ """Generator that runs forever, yielding markdown after each round."""
105
+ if len(models) < _TWO:
106
+ yield "Select at least two models."
107
+ return
108
+ if not _HAS_LLM_AGENT:
109
+ yield "LLM agent not available."
110
+ return
111
+ info = _get_game_info(game_name, variants)
112
+ if not info:
113
+ yield "Game not found."
114
+ return
115
+ actions = info["actions"]
116
+ matchups = _init_matchups(models, anthropic_key, openai_key)
117
+ if not matchups:
118
+ yield "No valid matchups -- provide API keys or enable OAuth."
119
+ return
120
+ rnd = _ZERO
121
+ while True:
122
+ rnd += _ONE
123
+ for m in matchups:
124
+ obs1 = _build_obs(info, m["p1_hist"], m["p2_hist"],
125
+ rnd, m["p1_score"], m["p2_score"])
126
+ obs2 = _build_obs(info, m["p2_hist"], m["p1_hist"],
127
+ rnd, m["p2_score"], m["p1_score"])
128
+ prompt1 = PromptBuilder.build(obs1)
129
+ prompt2 = PromptBuilder.build(obs2)
130
+ try:
131
+ raw1 = _call_llm(m["p1_prov"], m["p1_model"], prompt1, m["p1_key"])
132
+ act1 = parse_action(raw1, actions)
133
+ except Exception:
134
+ act1 = _rand.choice(actions)
135
+ try:
136
+ raw2 = _call_llm(m["p2_prov"], m["p2_model"], prompt2, m["p2_key"])
137
+ act2 = parse_action(raw2, actions)
138
+ except Exception:
139
+ act2 = _rand.choice(actions)
140
+ p1_pay, p2_pay = info["payoff_fn"](act1, act2)
141
+ m["p1_score"] += p1_pay
142
+ m["p2_score"] += p2_pay
143
+ m["p1_hist"].append({"round": rnd, "action": act1, "payoff": p1_pay})
144
+ m["p2_hist"].append({"round": rnd, "action": act2, "payoff": p2_pay})
145
+ m["recent"].append({"round": rnd, "p1_action": act1, "p2_action": act2,
146
+ "p1_pay": p1_pay, "p2_pay": p2_pay})
147
+ if len(m["recent"]) > _DETAIL_LIMIT:
148
+ m["recent"] = m["recent"][-_DETAIL_LIMIT:]
149
+ if len(m["p1_hist"]) > _HISTORY_WINDOW:
150
+ m["p1_hist"] = m["p1_hist"][-_HISTORY_WINDOW:]
151
+ m["p2_hist"] = m["p2_hist"][-_HISTORY_WINDOW:]
152
+ yield _render_state(matchups, rnd)
153
+
154
+
155
+ def _render_state(matchups, current_round):
156
+ """Render current infinite tournament state as markdown."""
157
+ lines = [f"## Infinite Tournament -- Round {current_round}\n"]
158
  scores = {}
159
+ for m in matchups:
160
+ scores.setdefault(m["p1_label"], float())
161
+ scores.setdefault(m["p2_label"], float())
162
+ scores[m["p1_label"]] += m["p1_score"]
163
+ scores[m["p2_label"]] += m["p2_score"]
164
+ lines.extend(["### Leaderboard\n",
165
+ "| Rank | Model | Total Score | Avg / Round |",
166
+ "|------|-------|-------------|-------------|"])
167
+ for rank, (model, score) in enumerate(
168
+ sorted(scores.items(), key=lambda x: -x[_ONE])):
169
+ avg = score / max(current_round, _ONE)
170
+ lines.append(f"| {rank + _ONE} | {model} | {score:.1f} | {avg:.2f} |")
171
+ lines.extend(["\n### Matchups\n", _HDR_MATCH, _SEP_MATCH])
172
+ for i, m in enumerate(matchups):
173
+ leader = m["p1_label"] if m["p1_score"] > m["p2_score"] else (
174
+ m["p2_label"] if m["p2_score"] > m["p1_score"] else "Tied")
175
+ lines.append(f"| {i + _ONE} | {m['p1_label']} | {m['p2_label']} | "
176
+ f"{m['p1_score']:.1f} | {m['p2_score']:.1f} | {leader} |")
177
+ for i, m in enumerate(matchups):
178
+ recent = m["recent"]
179
+ lines.extend([
180
+ f"\n### Match {i + _ONE}: {m['p1_label']} vs {m['p2_label']} "
181
+ f"(last {len(recent)} rounds)\n",
182
+ _HDR_ROUND, _SEP_ROUND])
183
+ for rd in recent:
184
+ lines.append(
185
+ f"| {rd['round']} | {rd['p1_action']} | {rd['p2_action']} | "
186
+ f"{rd['p1_pay']:.1f} | {rd['p2_pay']:.1f} |")
187
  return "\n".join(lines)
bench/gradio_app/registry.py CHANGED
@@ -211,30 +211,16 @@ _LLM_MODELS = {
211
  }
212
  _LLM_OPPONENT_LABEL = "LLM"
213
 
214
- # -- OAuth token support --
215
- _HAS_OAUTH = False
216
- _oauth_get_anthropic = None
217
- _oauth_get_openai = None
218
- try:
219
- from train.self_play.oauth import (
220
- get_anthropic_access_token as _oauth_get_anthropic,
221
- get_openai_credentials as _oauth_get_openai,
222
- )
223
- _HAS_OAUTH = True
224
- except ImportError:
225
- pass
226
 
227
 
228
- def get_oauth_token(provider: str) -> str | None:
229
- """Get an OAuth access token for the given provider, or None."""
230
- if not _HAS_OAUTH:
231
- return None
232
- try:
233
- if provider == "Anthropic":
234
- return _oauth_get_anthropic()
235
- if provider == "OpenAI":
236
- token, _acct = _oauth_get_openai()
237
- return token
238
- except Exception:
239
- return None
240
- return None
 
211
  }
212
  _LLM_OPPONENT_LABEL = "LLM"
213
 
214
+ # -- API key support via environment variables --
215
+ import os as _os
216
+ _ENV_API_KEYS = {
217
+ "Anthropic": _os.environ.get("ANTHROPIC_API_KEY", ""),
218
+ "OpenAI": _os.environ.get("OPENAI_API_KEY", ""),
219
+ }
220
+ _HAS_ENV_KEYS = any(_ENV_API_KEYS.values())
 
 
 
 
 
221
 
222
 
223
+ def get_env_api_key(provider: str) -> str | None:
224
+ """Get an API key from environment variables, or None."""
225
+ key = _ENV_API_KEYS.get(provider, "")
226
+ return key if key else None
 
 
 
 
 
 
 
 
 
constant_definitions/slides/layout.py CHANGED
@@ -117,3 +117,15 @@ PD_EXPLANATION_BODY = (
117
  "is the core alignment challenge. An aligned agent should learn to "
118
  "cooperate with cooperative partners while resisting exploitation."
119
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
117
  "is the core alignment challenge. An aligned agent should learn to "
118
  "cooperate with cooperative partners while resisting exploitation."
119
  )
120
+
121
+ # Font name for slide text
122
+ FONT_NAME = "Hubot Sans"
123
+
124
+ # EMU (English Metric Units) per inch for Google Slides API
125
+ EMU_PER_INCH = 914400
126
+
127
+ # Google Slides file ID for the Kant presentation
128
+ GSLIDES_FILE_ID = "1sXyiZMKYbTwp6CK6VbSBF9ZvzUHweHvmpxfb34yVZQs"
129
+
130
+ # Revision restore offset (how many revisions back to go)
131
+ REVISION_RESTORE_OFFSET = 2