viirii commited on
Commit
467a609
·
verified ·
1 Parent(s): bcf9268

Upload folder using huggingface_hub

Browse files
examples/example_usage.py CHANGED
@@ -4,12 +4,8 @@ import argparse
4
  import json
5
 
6
  from glass_bridge.client import OpenEnvGlassBridgeClient
7
- from glass_bridge.models import AgentAction, ResetRequest, StepRequest, StrategyProfile
8
- from glass_bridge.policies import (
9
- assign_tournament_strategy_profiles,
10
- build_tournament_glass_bridge_population,
11
- )
12
- from glass_bridge.tournament_env import GlassBridgeTournamentEnv
13
 
14
 
15
  def main() -> None:
@@ -27,23 +23,6 @@ def main() -> None:
27
  )
28
  args = parser.parse_args()
29
 
30
- agent_names = [GlassBridgeTournamentEnv.agent_name(i) for i in range(args.initial_players)]
31
- raw_profiles = assign_tournament_strategy_profiles(
32
- agent_names=agent_names,
33
- seed=args.seed,
34
- share_rates=[0.0, 0.25, 0.5, 0.75, 1.0],
35
- truth_rates=[0.0, 0.25, 0.5, 0.75, 1.0],
36
- )
37
- profiles = {
38
- agent_name: StrategyProfile.model_validate(profile)
39
- for agent_name, profile in raw_profiles.items()
40
- }
41
- policies = build_tournament_glass_bridge_population(
42
- raw_profiles,
43
- seed=args.seed,
44
- adaptation_config={"kind": args.adaptation_kind},
45
- )
46
-
47
  client = OpenEnvGlassBridgeClient(base_url=args.base_url)
48
  try:
49
  reset_response = client.reset(
@@ -52,10 +31,17 @@ def main() -> None:
52
  initial_players=args.initial_players,
53
  first_round_num_steps=args.first_round_steps,
54
  max_rounds=args.max_rounds,
55
- strategy_profiles=profiles,
 
 
56
  )
57
  )
58
  result = reset_response.result
 
 
 
 
 
59
  turn_idx = 0
60
 
61
  while not result.done and turn_idx < args.max_turns:
 
4
  import json
5
 
6
  from glass_bridge.client import OpenEnvGlassBridgeClient
7
+ from glass_bridge.models import AgentAction, ResetRequest, StepRequest
8
+ from glass_bridge.policies import build_tournament_glass_bridge_population
 
 
 
 
9
 
10
 
11
  def main() -> None:
 
23
  )
24
  args = parser.parse_args()
25
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
26
  client = OpenEnvGlassBridgeClient(base_url=args.base_url)
27
  try:
28
  reset_response = client.reset(
 
31
  initial_players=args.initial_players,
32
  first_round_num_steps=args.first_round_steps,
33
  max_rounds=args.max_rounds,
34
+ share_rates=[0.0, 0.25, 0.5, 0.75, 1.0],
35
+ truth_rates=[0.0, 0.25, 0.5, 0.75, 1.0],
36
+ llm_model_pool=["qwen3.5"],
37
  )
38
  )
39
  result = reset_response.result
40
+ policies = build_tournament_glass_bridge_population(
41
+ result.info.strategy_profiles,
42
+ seed=args.seed,
43
+ adaptation_config={"kind": args.adaptation_kind},
44
+ )
45
  turn_idx = 0
46
 
47
  while not result.done and turn_idx < args.max_turns:
llm_decision_backend.py ADDED
@@ -0,0 +1,257 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """LLM-based decision backends for Glass Bridge tournament agents.
2
+
3
+ When an agent's strategy_profile has model_name not in (None, "none", ""),
4
+ the policy delegates to an LLM backend instead of the heuristic.
5
+ """
6
+
7
+ from __future__ import annotations
8
+
9
+ import json
10
+ import re
11
+ from abc import ABC, abstractmethod
12
+ from typing import Any
13
+
14
+ _transformers_available: bool | None = None
15
+
16
+
17
+ def _check_transformers() -> bool:
18
+ global _transformers_available
19
+ if _transformers_available is not None:
20
+ return _transformers_available
21
+ try:
22
+ import torch # noqa: F401
23
+ import transformers # noqa: F401
24
+ _transformers_available = True
25
+ except ImportError:
26
+ _transformers_available = False
27
+ return _transformers_available
28
+
29
+
30
+ class LLMDecisionBackend(ABC):
31
+ @abstractmethod
32
+ def select_action(
33
+ self,
34
+ observation: dict[str, Any],
35
+ strategy_profile: dict[str, Any],
36
+ legal_actions: list[Any],
37
+ fallback_fn: Any,
38
+ ) -> Any:
39
+ ...
40
+
41
+
42
+ def _observation_to_prompt(observation: dict[str, Any]) -> str:
43
+ """No reputation—model infers trust from game state."""
44
+ parts = [
45
+ "=== Round structure ===",
46
+ "Each round has two phases. (1) Negotiation: first each agent may make offers (communication_offer), "
47
+ "then each may respond to offers (communication_response). (2) Play: agents step onto the bridge one at a time "
48
+ "in current_order. You may only step when it is your turn (current_actor); all agents before you in the order "
49
+ "have already acted (fell or crossed). If it is not your turn or you are already done, your only legal action is NOOP.",
50
+ "",
51
+ f"Phase: {observation.get('phase')}",
52
+ f"Round: {observation.get('round_idx')}",
53
+ f"You are agent {observation.get('agent_name')}",
54
+ f"Active agents: {observation.get('active_agents', [])}",
55
+ f"Current order (stepping order this round): {observation.get('current_order', [])}",
56
+ ]
57
+ profile = observation.get("strategy_profile") or {}
58
+ share = profile.get("share_rate")
59
+ truth = profile.get("truth_rate")
60
+ if share is not None or truth is not None:
61
+ parts.append(
62
+ f"Your initial tendencies: share_rate={share}, truth_rate={truth}. "
63
+ "These are upfront settings; you may choose to share more/less or be more/less truthful as the game goes."
64
+ )
65
+ round_history = observation.get("round_history", [])
66
+ if round_history:
67
+ parts.append("Past rounds (order, survivors, eliminated, progress, trade_summary):")
68
+ for r in round_history:
69
+ parts.append(f" Round {r.get('round_idx')}: order={r.get('order')}, survivors={r.get('survivors')}, eliminated={r.get('eliminated')}, progress={r.get('progress')}, trades={r.get('trade_summary', {})}")
70
+ if observation.get("phase", "").startswith("communication"):
71
+ parts.append(f"Negotiable partners: {observation.get('negotiable_partners', [])}")
72
+ parts.append(f"Your private known steps: {observation.get('private_known_steps', {})}")
73
+ parts.append(f"Assignment by agent: {observation.get('assignment_by_agent', {})}")
74
+ inc = observation.get("incoming_offers", [])
75
+ if inc:
76
+ inc_serial = [{"offer_id": o.get("offer_id"), "proposer": o.get("proposer"), "request_steps": o.get("request_steps", []), "claims": o.get("claims", [])} for o in inc]
77
+ parts.append(f"Incoming offers: {inc_serial}")
78
+ else:
79
+ parts.append(f"Current actor (who steps now): {observation.get('current_actor')}")
80
+ parts.append(f"Current step index: {observation.get('current_step_idx')}")
81
+ parts.append(f"Verified public: {observation.get('verified_public', [])}")
82
+ parts.append(f"Your private known steps: {observation.get('private_known_steps', {})}")
83
+ parts.append(f"Legal actions: {observation.get('legal_actions', [])}")
84
+ return "\n".join(parts)
85
+
86
+
87
+ def _movement_legal_step_actions(legal_actions: list[Any]) -> list[str]:
88
+ """Return list of legal step actions (LEFT, RIGHT) in movement phase. Empty if only NOOP."""
89
+ return [a for a in legal_actions if a in ("LEFT", "RIGHT")]
90
+
91
+
92
+ def _parse_llm_action(raw: str, phase: str, legal_actions: list[Any]) -> Any | None:
93
+ raw = raw.strip()
94
+ json_match = re.search(r"\{[^{}]*(?:\{[^{}]*\}[^{}]*)*\}", raw, re.DOTALL)
95
+ if json_match:
96
+ try:
97
+ parsed = json.loads(json_match.group())
98
+ action_type = str(parsed.get("type", "")).upper()
99
+ if action_type == "OFFERS":
100
+ offers = parsed.get("offers", [])
101
+ if not isinstance(offers, list):
102
+ return None
103
+ valid_offers = []
104
+ for o in offers:
105
+ if not isinstance(o, dict):
106
+ continue
107
+ r = o.get("recipient")
108
+ g = o.get("give_steps", [])
109
+ req = o.get("request_steps", [])
110
+ mode = o.get("claim_mode", "truth")
111
+ if r and isinstance(g, list) and isinstance(req, list):
112
+ valid_offers.append({
113
+ "recipient": str(r),
114
+ "give_steps": [int(x) for x in g if isinstance(x, (int, float))],
115
+ "request_steps": [int(x) for x in req if isinstance(x, (int, float))],
116
+ "claim_mode": "truth" if str(mode).lower() == "truth" else "lie",
117
+ })
118
+ if valid_offers:
119
+ return {"type": "OFFERS", "offers": valid_offers}
120
+ return {"type": "NOOP"}
121
+ if action_type == "RESPONSES":
122
+ ids = parsed.get("accept_offer_ids", [])
123
+ if isinstance(ids, list):
124
+ return {"type": "RESPONSES", "accept_offer_ids": [int(x) for x in ids if isinstance(x, (int, float))]}
125
+ return {"type": "NOOP"}
126
+ if action_type == "NOOP":
127
+ return {"type": "NOOP"}
128
+ except (json.JSONDecodeError, TypeError, ValueError):
129
+ pass
130
+ # Movement: only return LEFT/RIGHT if legal; otherwise accept NOOP or return None
131
+ step_legal = _movement_legal_step_actions(legal_actions)
132
+ if not step_legal:
133
+ if re.search(r"\bNOOP\b", raw, re.IGNORECASE):
134
+ return {"type": "NOOP"}
135
+ return None
136
+ if "LEFT" in raw.upper() and "RIGHT" not in raw.upper().split("LEFT")[0] and "LEFT" in step_legal:
137
+ return "LEFT"
138
+ if "RIGHT" in raw.upper() and "RIGHT" in step_legal:
139
+ return "RIGHT"
140
+ return None
141
+
142
+
143
+ class QwenBackend(LLMDecisionBackend):
144
+ def __init__(self, model_path: str = "unsloth/Qwen2.5-3B-Instruct", device: str | None = None):
145
+ if not _check_transformers():
146
+ raise ImportError("LLM backends require transformers and torch. Install with: pip install transformers torch")
147
+ self._model_path = model_path
148
+ self._device = device
149
+ self._model = None
150
+ self._tokenizer = None
151
+
152
+ def _ensure_loaded(self) -> None:
153
+ if self._model is not None:
154
+ return
155
+ import torch
156
+ from transformers import AutoModelForCausalLM, AutoTokenizer
157
+
158
+ self._tokenizer = AutoTokenizer.from_pretrained(self._model_path, trust_remote_code=True)
159
+ device = self._device if self._device else ("cuda" if torch.cuda.is_available() else "cpu")
160
+ self._model = AutoModelForCausalLM.from_pretrained(
161
+ self._model_path,
162
+ torch_dtype="auto",
163
+ trust_remote_code=True,
164
+ )
165
+ self._model = self._model.to(device)
166
+ self._model.eval()
167
+ self._device = device
168
+
169
+ def select_action(
170
+ self,
171
+ observation: dict[str, Any],
172
+ strategy_profile: dict[str, Any],
173
+ legal_actions: list[Any],
174
+ fallback_fn: Any,
175
+ ) -> Any:
176
+ phase = observation.get("phase", "")
177
+ if phase not in ("communication_offer", "communication_response", "movement"):
178
+ return fallback_fn()
179
+
180
+ self._ensure_loaded()
181
+ import torch
182
+
183
+ prompt = _observation_to_prompt(observation)
184
+ if phase == "communication_offer":
185
+ output_format = (
186
+ "NEGOTIATION PHASE (offers). Output exactly one option from Legal actions. "
187
+ "If Legal actions includes {\"type\":\"OFFERS\"}, you may output {\"type\":\"OFFERS\",\"offers\":[...]} or {\"type\":\"NOOP\"}. "
188
+ "If only {\"type\":\"NOOP\"} is legal, output {\"type\":\"NOOP\"}. No other text."
189
+ )
190
+ elif phase == "communication_response":
191
+ output_format = (
192
+ "NEGOTIATION PHASE (responses). Output exactly one option from Legal actions. "
193
+ "Either {\"type\":\"RESPONSES\",\"accept_offer_ids\":[...]} or {\"type\":\"NOOP\"}. "
194
+ "If only {\"type\":\"NOOP\"} is legal, output {\"type\":\"NOOP\"}. No other text."
195
+ )
196
+ else:
197
+ if not _movement_legal_step_actions(legal_actions):
198
+ output_format = (
199
+ "PLAY PHASE (movement). It is not your turn to step (or you are already done). "
200
+ "Your only legal action is NOOP. Output exactly: {\"type\":\"NOOP\"}. No other text."
201
+ )
202
+ else:
203
+ output_format = (
204
+ "PLAY PHASE (movement). It is your turn to step. Output exactly one word: \"LEFT\" or \"RIGHT\". No other text."
205
+ )
206
+
207
+ user_content = f"{prompt}\n\n=== Your response (must be exactly one of Legal actions) ===\n{output_format}"
208
+ messages = [
209
+ {"role": "system", "content": (
210
+ "You are an agent in a glass bridge game. Each round has a NEGOTIATION phase (offers, then responses) "
211
+ "and a PLAY phase (stepping onto the bridge in turn order). You are given initial share_rate and truth_rate; "
212
+ "you may update your own behavior as you go (e.g. share more or less, be more or less truthful). "
213
+ "You may only step when it is your turn—when all agents before you in the round order have already stepped (fell or crossed). "
214
+ "Maximize your survival; infer trust from past rounds and trades. "
215
+ "CRITICAL: Output only a valid action. Check Legal actions in the observation; your response must be exactly one of those options. "
216
+ "Invalid actions (e.g. LEFT or RIGHT when only NOOP is legal) are rejected. No prose, no explanation."
217
+ )},
218
+ {"role": "user", "content": user_content},
219
+ ]
220
+ text = self._tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
221
+ inputs = self._tokenizer([text], return_tensors="pt").to(self._device)
222
+ with torch.no_grad():
223
+ out = self._model.generate(**inputs, max_new_tokens=256, do_sample=True, temperature=0.3, pad_token_id=self._tokenizer.eos_token_id)
224
+ response = self._tokenizer.batch_decode(out[:, inputs["input_ids"].shape[1]:], skip_special_tokens=True)[0]
225
+ parsed = _parse_llm_action(response, phase, legal_actions)
226
+ return parsed if parsed is not None else fallback_fn()
227
+
228
+
229
+ _LLM_BACKEND_REGISTRY: dict[str, tuple[type[LLMDecisionBackend], dict[str, Any]]] = {
230
+ "qwen3.5": (QwenBackend, {"model_path": "unsloth/Qwen2.5-3B-Instruct"}),
231
+ "qwen2.5": (QwenBackend, {"model_path": "unsloth/Qwen2.5-3B-Instruct"}),
232
+ "qwen2.5-7b": (QwenBackend, {"model_path": "Qwen/Qwen2.5-7B-Instruct"}),
233
+ "smollm2-1.7b": (QwenBackend, {"model_path": "HuggingFaceTB/SmolLM2-1.7B-Instruct"}),
234
+ "smollm2-360m": (QwenBackend, {"model_path": "HuggingFaceTB/SmolLM2-360M-Instruct"}),
235
+ "smollm2-135m": (QwenBackend, {"model_path": "HuggingFaceTB/SmolLM2-135M-Instruct"}),
236
+ }
237
+ _backend_cache: dict[str, LLMDecisionBackend] = {}
238
+
239
+
240
+ def get_llm_backend(model_name: str, model_path_override: str | None = None) -> LLMDecisionBackend | None:
241
+ if not model_name or str(model_name).lower() in ("none", "null", ""):
242
+ return None
243
+ key = str(model_name).lower()
244
+ if key not in _LLM_BACKEND_REGISTRY:
245
+ return None
246
+ cache_key = f"{key}:{model_path_override or ''}"
247
+ if cache_key in _backend_cache:
248
+ return _backend_cache[cache_key]
249
+ cls, kwargs = _LLM_BACKEND_REGISTRY[key]
250
+ if model_path_override:
251
+ kwargs = {**kwargs, "model_path": model_path_override}
252
+ try:
253
+ backend = cls(**kwargs)
254
+ _backend_cache[cache_key] = backend
255
+ return backend
256
+ except Exception:
257
+ return None
models.py CHANGED
@@ -144,9 +144,10 @@ class StrategyProfile(BaseModel):
144
  model_config = ConfigDict(extra="allow")
145
 
146
  kind: str = "share_profile"
 
147
  share_rate: float = 0.5
148
  truth_rate: float = 0.5
149
- label: str = "share_0.50_truth_0.50"
150
 
151
 
152
  class ResetRequest(BaseModel):
@@ -155,6 +156,9 @@ class ResetRequest(BaseModel):
155
  max_rounds: int = 25
156
  initial_players: int = 16
157
  first_round_num_steps: int = 18
 
 
 
158
  strategy_profiles: dict[str, StrategyProfile] | None = None
159
 
160
 
 
144
  model_config = ConfigDict(extra="allow")
145
 
146
  kind: str = "share_profile"
147
+ model_name: str = "qwen3.5"
148
  share_rate: float = 0.5
149
  truth_rate: float = 0.5
150
+ label: str = "model_qwen3.5_share_0.50_truth_0.50"
151
 
152
 
153
  class ResetRequest(BaseModel):
 
156
  max_rounds: int = 25
157
  initial_players: int = 16
158
  first_round_num_steps: int = 18
159
+ share_rates: list[float] | None = None
160
+ truth_rates: list[float] | None = None
161
+ llm_model_pool: list[str] | None = None
162
  strategy_profiles: dict[str, StrategyProfile] | None = None
163
 
164
 
policies.py CHANGED
@@ -3,6 +3,7 @@ from __future__ import annotations
3
  import random
4
  from typing import Any
5
 
 
6
  from .tournament_env import GlassBridgeTournamentEnv
7
 
8
 
@@ -14,10 +15,12 @@ class TournamentGlassBridgePolicy:
14
  strategy_profile: dict[str, Any],
15
  seed: int = 0,
16
  adaptation_config: dict[str, Any] | None = None,
 
17
  ):
18
  self.strategy_profile = dict(strategy_profile)
19
  self._rng = random.Random(seed)
20
  self.adaptation = build_tournament_adaptation_strategy(adaptation_config or {})
 
21
 
22
  def select_action(self, observation: dict) -> Any:
23
  legal = observation.get("legal_actions", [])
@@ -25,6 +28,27 @@ class TournamentGlassBridgePolicy:
25
  raise RuntimeError("No legal actions available")
26
 
27
  phase = observation.get("phase")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
28
  if phase == GlassBridgeTournamentEnv.PHASE_COMMUNICATION_OFFER:
29
  return self._offer_action(observation)
30
  if phase == GlassBridgeTournamentEnv.PHASE_COMMUNICATION_RESPONSE:
@@ -249,14 +273,20 @@ def build_tournament_adaptation_strategy(config: dict[str, Any]) -> TournamentAd
249
  def build_tournament_strategy_grid(
250
  share_rates: list[float],
251
  truth_rates: list[float],
 
252
  ) -> list[dict[str, Any]]:
253
  return [
254
  {
255
  "kind": "share_profile",
 
256
  "share_rate": float(share_rate),
257
  "truth_rate": float(truth_rate),
258
- "label": f"share_{float(share_rate):.2f}_truth_{float(truth_rate):.2f}",
 
 
 
259
  }
 
260
  for share_rate in share_rates
261
  for truth_rate in truth_rates
262
  ]
@@ -267,9 +297,14 @@ def assign_tournament_strategy_profiles(
267
  seed: int,
268
  share_rates: list[float],
269
  truth_rates: list[float],
 
270
  ) -> dict[str, dict[str, Any]]:
271
  rng = random.Random(seed)
272
- grid = build_tournament_strategy_grid(share_rates=share_rates, truth_rates=truth_rates)
 
 
 
 
273
  return {agent_name: dict(rng.choice(grid)) for agent_name in agent_names}
274
 
275
 
@@ -277,6 +312,7 @@ def build_tournament_glass_bridge_population(
277
  strategy_profiles: dict[str, dict[str, Any]],
278
  seed: int,
279
  adaptation_config: dict[str, Any] | None = None,
 
280
  ) -> dict[str, TournamentGlassBridgePolicy]:
281
  population: dict[str, TournamentGlassBridgePolicy] = {}
282
  for offset, agent_name in enumerate(sorted(strategy_profiles.keys())):
@@ -284,5 +320,6 @@ def build_tournament_glass_bridge_population(
284
  strategy_profile=strategy_profiles[agent_name],
285
  seed=(seed * 1000) + 50_000 + offset,
286
  adaptation_config=adaptation_config,
 
287
  )
288
  return population
 
3
  import random
4
  from typing import Any
5
 
6
+ from .llm_decision_backend import get_llm_backend
7
  from .tournament_env import GlassBridgeTournamentEnv
8
 
9
 
 
15
  strategy_profile: dict[str, Any],
16
  seed: int = 0,
17
  adaptation_config: dict[str, Any] | None = None,
18
+ llm_model_paths: dict[str, str] | None = None,
19
  ):
20
  self.strategy_profile = dict(strategy_profile)
21
  self._rng = random.Random(seed)
22
  self.adaptation = build_tournament_adaptation_strategy(adaptation_config or {})
23
+ self.llm_model_paths = dict(llm_model_paths or {})
24
 
25
  def select_action(self, observation: dict) -> Any:
26
  legal = observation.get("legal_actions", [])
 
28
  raise RuntimeError("No legal actions available")
29
 
30
  phase = observation.get("phase")
31
+ model_name = self.strategy_profile.get("model_name")
32
+ if model_name and str(model_name).lower() not in ("none", "null", ""):
33
+ backend = get_llm_backend(
34
+ str(model_name),
35
+ model_path_override=self.llm_model_paths.get(str(model_name)),
36
+ )
37
+ if backend is not None:
38
+ def fallback() -> Any:
39
+ if phase == GlassBridgeTournamentEnv.PHASE_COMMUNICATION_OFFER:
40
+ return self._offer_action(observation)
41
+ if phase == GlassBridgeTournamentEnv.PHASE_COMMUNICATION_RESPONSE:
42
+ return self._response_action(observation)
43
+ return self._movement_action(observation, legal)
44
+
45
+ return backend.select_action(
46
+ observation=observation,
47
+ strategy_profile=self.strategy_profile,
48
+ legal_actions=legal,
49
+ fallback_fn=fallback,
50
+ )
51
+
52
  if phase == GlassBridgeTournamentEnv.PHASE_COMMUNICATION_OFFER:
53
  return self._offer_action(observation)
54
  if phase == GlassBridgeTournamentEnv.PHASE_COMMUNICATION_RESPONSE:
 
273
  def build_tournament_strategy_grid(
274
  share_rates: list[float],
275
  truth_rates: list[float],
276
+ llm_model_pool: list[str] | None = None,
277
  ) -> list[dict[str, Any]]:
278
  return [
279
  {
280
  "kind": "share_profile",
281
+ "model_name": model_name,
282
  "share_rate": float(share_rate),
283
  "truth_rate": float(truth_rate),
284
+ "label": (
285
+ f"model_{model_name}_share_{float(share_rate):.2f}"
286
+ f"_truth_{float(truth_rate):.2f}"
287
+ ),
288
  }
289
+ for model_name in [str(name) for name in (llm_model_pool or ["qwen3.5"])]
290
  for share_rate in share_rates
291
  for truth_rate in truth_rates
292
  ]
 
297
  seed: int,
298
  share_rates: list[float],
299
  truth_rates: list[float],
300
+ llm_model_pool: list[str] | None = None,
301
  ) -> dict[str, dict[str, Any]]:
302
  rng = random.Random(seed)
303
+ grid = build_tournament_strategy_grid(
304
+ share_rates=share_rates,
305
+ truth_rates=truth_rates,
306
+ llm_model_pool=llm_model_pool,
307
+ )
308
  return {agent_name: dict(rng.choice(grid)) for agent_name in agent_names}
309
 
310
 
 
312
  strategy_profiles: dict[str, dict[str, Any]],
313
  seed: int,
314
  adaptation_config: dict[str, Any] | None = None,
315
+ llm_model_paths: dict[str, str] | None = None,
316
  ) -> dict[str, TournamentGlassBridgePolicy]:
317
  population: dict[str, TournamentGlassBridgePolicy] = {}
318
  for offset, agent_name in enumerate(sorted(strategy_profiles.keys())):
 
320
  strategy_profile=strategy_profiles[agent_name],
321
  seed=(seed * 1000) + 50_000 + offset,
322
  adaptation_config=adaptation_config,
323
+ llm_model_paths=llm_model_paths,
324
  )
325
  return population
server/glass_bridge_environment.py CHANGED
@@ -17,7 +17,6 @@ from glass_bridge.models import (
17
  ResetResponse,
18
  StepRequest,
19
  StepResponse,
20
- StrategyProfile,
21
  )
22
  from glass_bridge.tournament_env import GlassBridgeTournamentEnv
23
 
@@ -29,13 +28,15 @@ class GlassBridgeOpenEnvSession:
29
 
30
  def reset(self, request: ResetRequest) -> ResetResponse:
31
  seed = 0 if request.seed is None else int(request.seed)
32
- strategy_profiles = self._normalize_strategy_profiles(request)
33
  self.env = GlassBridgeTournamentEnv(
34
  seed=seed,
35
  max_rounds=int(request.max_rounds),
36
  initial_players=int(request.initial_players),
37
  first_round_num_steps=int(request.first_round_num_steps),
38
- strategy_profiles=strategy_profiles,
 
 
 
39
  )
40
  raw = self.env.reset(seed=seed)
41
  return ResetResponse(session_id=self.session_id, result=self._build_result(raw))
@@ -64,12 +65,7 @@ class GlassBridgeOpenEnvSession:
64
  agent_name: profile.model_dump(mode="python")
65
  for agent_name, profile in request.strategy_profiles.items()
66
  }
67
-
68
- profiles: dict[str, dict] = {}
69
- for agent_idx in range(int(request.initial_players)):
70
- agent_name = GlassBridgeTournamentEnv.agent_name(agent_idx)
71
- profiles[agent_name] = StrategyProfile().model_dump(mode="python")
72
- return profiles
73
 
74
  @staticmethod
75
  def _build_result(raw: dict) -> EnvironmentResult:
 
17
  ResetResponse,
18
  StepRequest,
19
  StepResponse,
 
20
  )
21
  from glass_bridge.tournament_env import GlassBridgeTournamentEnv
22
 
 
28
 
29
  def reset(self, request: ResetRequest) -> ResetResponse:
30
  seed = 0 if request.seed is None else int(request.seed)
 
31
  self.env = GlassBridgeTournamentEnv(
32
  seed=seed,
33
  max_rounds=int(request.max_rounds),
34
  initial_players=int(request.initial_players),
35
  first_round_num_steps=int(request.first_round_num_steps),
36
+ strategy_profiles=self._normalize_strategy_profiles(request),
37
+ share_rates=request.share_rates,
38
+ truth_rates=request.truth_rates,
39
+ llm_model_pool=request.llm_model_pool,
40
  )
41
  raw = self.env.reset(seed=seed)
42
  return ResetResponse(session_id=self.session_id, result=self._build_result(raw))
 
65
  agent_name: profile.model_dump(mode="python")
66
  for agent_name, profile in request.strategy_profiles.items()
67
  }
68
+ return {}
 
 
 
 
 
69
 
70
  @staticmethod
71
  def _build_result(raw: dict) -> EnvironmentResult:
tournament_env.py CHANGED
@@ -29,12 +29,19 @@ class GlassBridgeTournamentEnv:
29
  initial_players: int = DEFAULT_INITIAL_PLAYERS,
30
  first_round_num_steps: int = DEFAULT_FIRST_ROUND_NUM_STEPS,
31
  strategy_profiles: dict[str, dict[str, Any]] | None = None,
 
 
 
32
  ):
33
  self.rng = random.Random(seed)
34
  self.max_rounds = max_rounds
35
  self.initial_players = initial_players
36
  self.first_round_num_steps = first_round_num_steps
 
37
  self.strategy_profiles = strategy_profiles or {}
 
 
 
38
 
39
  self.all_agents = [self.agent_name(i) for i in range(self.initial_players)]
40
  self.phase = self.PHASE_TERMINAL
@@ -67,6 +74,8 @@ class GlassBridgeTournamentEnv:
67
  def reset(self, seed: int | None = None) -> dict[str, Any]:
68
  if seed is not None:
69
  self.rng.seed(seed)
 
 
70
 
71
  self.phase = self.PHASE_COMMUNICATION_OFFER
72
  self.round_idx = 0
@@ -108,6 +117,27 @@ class GlassBridgeTournamentEnv:
108
  events = self._start_new_round()
109
  return self._result(self._zero_rewards(), done=False, events=events)
110
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
111
  def step(self, action_dict: dict[Any, str]) -> dict[str, Any]:
112
  normalized_actions = self._normalize_action_dict(action_dict)
113
 
 
29
  initial_players: int = DEFAULT_INITIAL_PLAYERS,
30
  first_round_num_steps: int = DEFAULT_FIRST_ROUND_NUM_STEPS,
31
  strategy_profiles: dict[str, dict[str, Any]] | None = None,
32
+ share_rates: list[float] | None = None,
33
+ truth_rates: list[float] | None = None,
34
+ llm_model_pool: list[str] | None = None,
35
  ):
36
  self.rng = random.Random(seed)
37
  self.max_rounds = max_rounds
38
  self.initial_players = initial_players
39
  self.first_round_num_steps = first_round_num_steps
40
+ self._explicit_strategy_profiles = strategy_profiles is not None
41
  self.strategy_profiles = strategy_profiles or {}
42
+ self.share_rates = list(share_rates or [0.0, 0.25, 0.5, 0.75, 1.0])
43
+ self.truth_rates = list(truth_rates or [0.0, 0.25, 0.5, 0.75, 1.0])
44
+ self.llm_model_pool = [str(model_name) for model_name in (llm_model_pool or ["qwen3.5"])]
45
 
46
  self.all_agents = [self.agent_name(i) for i in range(self.initial_players)]
47
  self.phase = self.PHASE_TERMINAL
 
74
  def reset(self, seed: int | None = None) -> dict[str, Any]:
75
  if seed is not None:
76
  self.rng.seed(seed)
77
+ if not self._explicit_strategy_profiles:
78
+ self.strategy_profiles = self._assign_strategy_profiles()
79
 
80
  self.phase = self.PHASE_COMMUNICATION_OFFER
81
  self.round_idx = 0
 
117
  events = self._start_new_round()
118
  return self._result(self._zero_rewards(), done=False, events=events)
119
 
120
+ def _assign_strategy_profiles(self) -> dict[str, dict[str, Any]]:
121
+ strategy_grid = [
122
+ {
123
+ "kind": "share_profile",
124
+ "model_name": model_name,
125
+ "share_rate": float(share_rate),
126
+ "truth_rate": float(truth_rate),
127
+ "label": (
128
+ f"model_{model_name}_share_{float(share_rate):.2f}"
129
+ f"_truth_{float(truth_rate):.2f}"
130
+ ),
131
+ }
132
+ for model_name in self.llm_model_pool
133
+ for share_rate in self.share_rates
134
+ for truth_rate in self.truth_rates
135
+ ]
136
+ return {
137
+ agent_name: dict(self.rng.choice(strategy_grid))
138
+ for agent_name in self.all_agents
139
+ }
140
+
141
  def step(self, action_dict: dict[Any, str]) -> dict[str, Any]:
142
  normalized_actions = self._normalize_action_dict(action_dict)
143