Spaces:

viirii
/

glass_bridge

Runtime error

App Files Files Community

viirii commited on Mar 8

Commit

467a609

verified ·

1 Parent(s): bcf9268

Upload folder using huggingface_hub

Browse files

Files changed (6) hide show

examples/example_usage.py +10 -24
llm_decision_backend.py +257 -0
models.py +5 -1
policies.py +39 -2
server/glass_bridge_environment.py +5 -9
tournament_env.py +30 -0

examples/example_usage.py CHANGED Viewed

@@ -4,12 +4,8 @@ import argparse
 import json
 from glass_bridge.client import OpenEnvGlassBridgeClient
-from glass_bridge.models import AgentAction, ResetRequest, StepRequest, StrategyProfile
-from glass_bridge.policies import (
-    assign_tournament_strategy_profiles,
-    build_tournament_glass_bridge_population,
-)
-from glass_bridge.tournament_env import GlassBridgeTournamentEnv
 def main() -> None:
@@ -27,23 +23,6 @@ def main() -> None:
     )
     args = parser.parse_args()
-    agent_names = [GlassBridgeTournamentEnv.agent_name(i) for i in range(args.initial_players)]
-    raw_profiles = assign_tournament_strategy_profiles(
-        agent_names=agent_names,
-        seed=args.seed,
-        share_rates=[0.0, 0.25, 0.5, 0.75, 1.0],
-        truth_rates=[0.0, 0.25, 0.5, 0.75, 1.0],
-    )
-    profiles = {
-        agent_name: StrategyProfile.model_validate(profile)
-        for agent_name, profile in raw_profiles.items()
-    }
-    policies = build_tournament_glass_bridge_population(
-        raw_profiles,
-        seed=args.seed,
-        adaptation_config={"kind": args.adaptation_kind},
-    )
     client = OpenEnvGlassBridgeClient(base_url=args.base_url)
     try:
         reset_response = client.reset(
@@ -52,10 +31,17 @@ def main() -> None:
                 initial_players=args.initial_players,
                 first_round_num_steps=args.first_round_steps,
                 max_rounds=args.max_rounds,
-                strategy_profiles=profiles,
             )
         )
         result = reset_response.result
         turn_idx = 0
         while not result.done and turn_idx < args.max_turns:

 import json
 from glass_bridge.client import OpenEnvGlassBridgeClient
+from glass_bridge.models import AgentAction, ResetRequest, StepRequest
+from glass_bridge.policies import build_tournament_glass_bridge_population
 def main() -> None:
     )
     args = parser.parse_args()
     client = OpenEnvGlassBridgeClient(base_url=args.base_url)
     try:
         reset_response = client.reset(
                 initial_players=args.initial_players,
                 first_round_num_steps=args.first_round_steps,
                 max_rounds=args.max_rounds,
+                share_rates=[0.0, 0.25, 0.5, 0.75, 1.0],
+                truth_rates=[0.0, 0.25, 0.5, 0.75, 1.0],
+                llm_model_pool=["qwen3.5"],
             )
         )
         result = reset_response.result
+        policies = build_tournament_glass_bridge_population(
+            result.info.strategy_profiles,
+            seed=args.seed,
+            adaptation_config={"kind": args.adaptation_kind},
+        )
         turn_idx = 0
         while not result.done and turn_idx < args.max_turns:

llm_decision_backend.py ADDED Viewed

	@@ -0,0 +1,257 @@

+"""LLM-based decision backends for Glass Bridge tournament agents.
+When an agent's strategy_profile has model_name not in (None, "none", ""),
+the policy delegates to an LLM backend instead of the heuristic.
+"""
+from __future__ import annotations
+import json
+import re
+from abc import ABC, abstractmethod
+from typing import Any
+_transformers_available: bool | None = None
+def _check_transformers() -> bool:
+    global _transformers_available
+    if _transformers_available is not None:
+        return _transformers_available
+    try:
+        import torch  # noqa: F401
+        import transformers  # noqa: F401
+        _transformers_available = True
+    except ImportError:
+        _transformers_available = False
+    return _transformers_available
+class LLMDecisionBackend(ABC):
+    @abstractmethod
+    def select_action(
+        self,
+        observation: dict[str, Any],
+        strategy_profile: dict[str, Any],
+        legal_actions: list[Any],
+        fallback_fn: Any,
+    ) -> Any:
+        ...
+def _observation_to_prompt(observation: dict[str, Any]) -> str:
+    """No reputation—model infers trust from game state."""
+    parts = [
+        "=== Round structure ===",
+        "Each round has two phases. (1) Negotiation: first each agent may make offers (communication_offer), "
+        "then each may respond to offers (communication_response). (2) Play: agents step onto the bridge one at a time "
+        "in current_order. You may only step when it is your turn (current_actor); all agents before you in the order "
+        "have already acted (fell or crossed). If it is not your turn or you are already done, your only legal action is NOOP.",
+        "",
+        f"Phase: {observation.get('phase')}",
+        f"Round: {observation.get('round_idx')}",
+        f"You are agent {observation.get('agent_name')}",
+        f"Active agents: {observation.get('active_agents', [])}",
+        f"Current order (stepping order this round): {observation.get('current_order', [])}",
+    ]
+    profile = observation.get("strategy_profile") or {}
+    share = profile.get("share_rate")
+    truth = profile.get("truth_rate")
+    if share is not None or truth is not None:
+        parts.append(
+            f"Your initial tendencies: share_rate={share}, truth_rate={truth}. "
+            "These are upfront settings; you may choose to share more/less or be more/less truthful as the game goes."
+        )
+    round_history = observation.get("round_history", [])
+    if round_history:
+        parts.append("Past rounds (order, survivors, eliminated, progress, trade_summary):")
+        for r in round_history:
+            parts.append(f"  Round {r.get('round_idx')}: order={r.get('order')}, survivors={r.get('survivors')}, eliminated={r.get('eliminated')}, progress={r.get('progress')}, trades={r.get('trade_summary', {})}")
+    if observation.get("phase", "").startswith("communication"):
+        parts.append(f"Negotiable partners: {observation.get('negotiable_partners', [])}")
+        parts.append(f"Your private known steps: {observation.get('private_known_steps', {})}")
+        parts.append(f"Assignment by agent: {observation.get('assignment_by_agent', {})}")
+        inc = observation.get("incoming_offers", [])
+        if inc:
+            inc_serial = [{"offer_id": o.get("offer_id"), "proposer": o.get("proposer"), "request_steps": o.get("request_steps", []), "claims": o.get("claims", [])} for o in inc]
+            parts.append(f"Incoming offers: {inc_serial}")
+    else:
+        parts.append(f"Current actor (who steps now): {observation.get('current_actor')}")
+        parts.append(f"Current step index: {observation.get('current_step_idx')}")
+        parts.append(f"Verified public: {observation.get('verified_public', [])}")
+        parts.append(f"Your private known steps: {observation.get('private_known_steps', {})}")
+    parts.append(f"Legal actions: {observation.get('legal_actions', [])}")
+    return "\n".join(parts)
+def _movement_legal_step_actions(legal_actions: list[Any]) -> list[str]:
+    """Return list of legal step actions (LEFT, RIGHT) in movement phase. Empty if only NOOP."""
+    return [a for a in legal_actions if a in ("LEFT", "RIGHT")]
+def _parse_llm_action(raw: str, phase: str, legal_actions: list[Any]) -> Any | None:
+    raw = raw.strip()
+    json_match = re.search(r"\{[^{}]*(?:\{[^{}]*\}[^{}]*)*\}", raw, re.DOTALL)
+    if json_match:
+        try:
+            parsed = json.loads(json_match.group())
+            action_type = str(parsed.get("type", "")).upper()
+            if action_type == "OFFERS":
+                offers = parsed.get("offers", [])
+                if not isinstance(offers, list):
+                    return None
+                valid_offers = []
+                for o in offers:
+                    if not isinstance(o, dict):
+                        continue
+                    r = o.get("recipient")
+                    g = o.get("give_steps", [])
+                    req = o.get("request_steps", [])
+                    mode = o.get("claim_mode", "truth")
+                    if r and isinstance(g, list) and isinstance(req, list):
+                        valid_offers.append({
+                            "recipient": str(r),
+                            "give_steps": [int(x) for x in g if isinstance(x, (int, float))],
+                            "request_steps": [int(x) for x in req if isinstance(x, (int, float))],
+                            "claim_mode": "truth" if str(mode).lower() == "truth" else "lie",
+                        })
+                if valid_offers:
+                    return {"type": "OFFERS", "offers": valid_offers}
+                return {"type": "NOOP"}
+            if action_type == "RESPONSES":
+                ids = parsed.get("accept_offer_ids", [])
+                if isinstance(ids, list):
+                    return {"type": "RESPONSES", "accept_offer_ids": [int(x) for x in ids if isinstance(x, (int, float))]}
+                return {"type": "NOOP"}
+            if action_type == "NOOP":
+                return {"type": "NOOP"}
+        except (json.JSONDecodeError, TypeError, ValueError):
+            pass
+    # Movement: only return LEFT/RIGHT if legal; otherwise accept NOOP or return None
+    step_legal = _movement_legal_step_actions(legal_actions)
+    if not step_legal:
+        if re.search(r"\bNOOP\b", raw, re.IGNORECASE):
+            return {"type": "NOOP"}
+        return None
+    if "LEFT" in raw.upper() and "RIGHT" not in raw.upper().split("LEFT")[0] and "LEFT" in step_legal:
+        return "LEFT"
+    if "RIGHT" in raw.upper() and "RIGHT" in step_legal:
+        return "RIGHT"
+    return None
+class QwenBackend(LLMDecisionBackend):
+    def __init__(self, model_path: str = "unsloth/Qwen2.5-3B-Instruct", device: str | None = None):
+        if not _check_transformers():
+            raise ImportError("LLM backends require transformers and torch. Install with: pip install transformers torch")
+        self._model_path = model_path
+        self._device = device
+        self._model = None
+        self._tokenizer = None
+    def _ensure_loaded(self) -> None:
+        if self._model is not None:
+            return
+        import torch
+        from transformers import AutoModelForCausalLM, AutoTokenizer
+        self._tokenizer = AutoTokenizer.from_pretrained(self._model_path, trust_remote_code=True)
+        device = self._device if self._device else ("cuda" if torch.cuda.is_available() else "cpu")
+        self._model = AutoModelForCausalLM.from_pretrained(
+            self._model_path,
+            torch_dtype="auto",
+            trust_remote_code=True,
+        )
+        self._model = self._model.to(device)
+        self._model.eval()
+        self._device = device
+    def select_action(
+        self,
+        observation: dict[str, Any],
+        strategy_profile: dict[str, Any],
+        legal_actions: list[Any],
+        fallback_fn: Any,
+    ) -> Any:
+        phase = observation.get("phase", "")
+        if phase not in ("communication_offer", "communication_response", "movement"):
+            return fallback_fn()
+        self._ensure_loaded()
+        import torch
+        prompt = _observation_to_prompt(observation)
+        if phase == "communication_offer":
+            output_format = (
+                "NEGOTIATION PHASE (offers). Output exactly one option from Legal actions. "
+                "If Legal actions includes {\"type\":\"OFFERS\"}, you may output {\"type\":\"OFFERS\",\"offers\":[...]} or {\"type\":\"NOOP\"}. "
+                "If only {\"type\":\"NOOP\"} is legal, output {\"type\":\"NOOP\"}. No other text."
+            )
+        elif phase == "communication_response":
+            output_format = (
+                "NEGOTIATION PHASE (responses). Output exactly one option from Legal actions. "
+                "Either {\"type\":\"RESPONSES\",\"accept_offer_ids\":[...]} or {\"type\":\"NOOP\"}. "
+                "If only {\"type\":\"NOOP\"} is legal, output {\"type\":\"NOOP\"}. No other text."
+            )
+        else:
+            if not _movement_legal_step_actions(legal_actions):
+                output_format = (
+                    "PLAY PHASE (movement). It is not your turn to step (or you are already done). "
+                    "Your only legal action is NOOP. Output exactly: {\"type\":\"NOOP\"}. No other text."
+                )
+            else:
+                output_format = (
+                    "PLAY PHASE (movement). It is your turn to step. Output exactly one word: \"LEFT\" or \"RIGHT\". No other text."
+                )
+        user_content = f"{prompt}\n\n=== Your response (must be exactly one of Legal actions) ===\n{output_format}"
+        messages = [
+            {"role": "system", "content": (
+                "You are an agent in a glass bridge game. Each round has a NEGOTIATION phase (offers, then responses) "
+                "and a PLAY phase (stepping onto the bridge in turn order). You are given initial share_rate and truth_rate; "
+                "you may update your own behavior as you go (e.g. share more or less, be more or less truthful). "
+                "You may only step when it is your turn—when all agents before you in the round order have already stepped (fell or crossed). "
+                "Maximize your survival; infer trust from past rounds and trades. "
+                "CRITICAL: Output only a valid action. Check Legal actions in the observation; your response must be exactly one of those options. "
+                "Invalid actions (e.g. LEFT or RIGHT when only NOOP is legal) are rejected. No prose, no explanation."
+            )},
+            {"role": "user", "content": user_content},
+        ]
+        text = self._tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
+        inputs = self._tokenizer([text], return_tensors="pt").to(self._device)
+        with torch.no_grad():
+            out = self._model.generate(**inputs, max_new_tokens=256, do_sample=True, temperature=0.3, pad_token_id=self._tokenizer.eos_token_id)
+        response = self._tokenizer.batch_decode(out[:, inputs["input_ids"].shape[1]:], skip_special_tokens=True)[0]
+        parsed = _parse_llm_action(response, phase, legal_actions)
+        return parsed if parsed is not None else fallback_fn()
+_LLM_BACKEND_REGISTRY: dict[str, tuple[type[LLMDecisionBackend], dict[str, Any]]] = {
+    "qwen3.5": (QwenBackend, {"model_path": "unsloth/Qwen2.5-3B-Instruct"}),
+    "qwen2.5": (QwenBackend, {"model_path": "unsloth/Qwen2.5-3B-Instruct"}),
+    "qwen2.5-7b": (QwenBackend, {"model_path": "Qwen/Qwen2.5-7B-Instruct"}),
+    "smollm2-1.7b": (QwenBackend, {"model_path": "HuggingFaceTB/SmolLM2-1.7B-Instruct"}),
+    "smollm2-360m": (QwenBackend, {"model_path": "HuggingFaceTB/SmolLM2-360M-Instruct"}),
+    "smollm2-135m": (QwenBackend, {"model_path": "HuggingFaceTB/SmolLM2-135M-Instruct"}),
+}
+_backend_cache: dict[str, LLMDecisionBackend] = {}
+def get_llm_backend(model_name: str, model_path_override: str | None = None) -> LLMDecisionBackend | None:
+    if not model_name or str(model_name).lower() in ("none", "null", ""):
+        return None
+    key = str(model_name).lower()
+    if key not in _LLM_BACKEND_REGISTRY:
+        return None
+    cache_key = f"{key}:{model_path_override or ''}"
+    if cache_key in _backend_cache:
+        return _backend_cache[cache_key]
+    cls, kwargs = _LLM_BACKEND_REGISTRY[key]
+    if model_path_override:
+        kwargs = {**kwargs, "model_path": model_path_override}
+    try:
+        backend = cls(**kwargs)
+        _backend_cache[cache_key] = backend
+        return backend
+    except Exception:
+        return None

models.py CHANGED Viewed

@@ -144,9 +144,10 @@ class StrategyProfile(BaseModel):
     model_config = ConfigDict(extra="allow")
     kind: str = "share_profile"
     share_rate: float = 0.5
     truth_rate: float = 0.5
-    label: str = "share_0.50_truth_0.50"
 class ResetRequest(BaseModel):
@@ -155,6 +156,9 @@ class ResetRequest(BaseModel):
     max_rounds: int = 25
     initial_players: int = 16
     first_round_num_steps: int = 18
     strategy_profiles: dict[str, StrategyProfile] | None = None

     model_config = ConfigDict(extra="allow")
     kind: str = "share_profile"
+    model_name: str = "qwen3.5"
     share_rate: float = 0.5
     truth_rate: float = 0.5
+    label: str = "model_qwen3.5_share_0.50_truth_0.50"
 class ResetRequest(BaseModel):
     max_rounds: int = 25
     initial_players: int = 16
     first_round_num_steps: int = 18
+    share_rates: list[float] | None = None
+    truth_rates: list[float] | None = None
+    llm_model_pool: list[str] | None = None
     strategy_profiles: dict[str, StrategyProfile] | None = None

policies.py CHANGED Viewed

@@ -3,6 +3,7 @@ from __future__ import annotations
 import random
 from typing import Any
 from .tournament_env import GlassBridgeTournamentEnv
@@ -14,10 +15,12 @@ class TournamentGlassBridgePolicy:
         strategy_profile: dict[str, Any],
         seed: int = 0,
         adaptation_config: dict[str, Any] | None = None,
     ):
         self.strategy_profile = dict(strategy_profile)
         self._rng = random.Random(seed)
         self.adaptation = build_tournament_adaptation_strategy(adaptation_config or {})
     def select_action(self, observation: dict) -> Any:
         legal = observation.get("legal_actions", [])
@@ -25,6 +28,27 @@ class TournamentGlassBridgePolicy:
             raise RuntimeError("No legal actions available")
         phase = observation.get("phase")
         if phase == GlassBridgeTournamentEnv.PHASE_COMMUNICATION_OFFER:
             return self._offer_action(observation)
         if phase == GlassBridgeTournamentEnv.PHASE_COMMUNICATION_RESPONSE:
@@ -249,14 +273,20 @@ def build_tournament_adaptation_strategy(config: dict[str, Any]) -> TournamentAd
 def build_tournament_strategy_grid(
     share_rates: list[float],
     truth_rates: list[float],
 ) -> list[dict[str, Any]]:
     return [
         {
             "kind": "share_profile",
             "share_rate": float(share_rate),
             "truth_rate": float(truth_rate),
-            "label": f"share_{float(share_rate):.2f}_truth_{float(truth_rate):.2f}",
         }
         for share_rate in share_rates
         for truth_rate in truth_rates
     ]
@@ -267,9 +297,14 @@ def assign_tournament_strategy_profiles(
     seed: int,
     share_rates: list[float],
     truth_rates: list[float],
 ) -> dict[str, dict[str, Any]]:
     rng = random.Random(seed)
-    grid = build_tournament_strategy_grid(share_rates=share_rates, truth_rates=truth_rates)
     return {agent_name: dict(rng.choice(grid)) for agent_name in agent_names}
@@ -277,6 +312,7 @@ def build_tournament_glass_bridge_population(
     strategy_profiles: dict[str, dict[str, Any]],
     seed: int,
     adaptation_config: dict[str, Any] | None = None,
 ) -> dict[str, TournamentGlassBridgePolicy]:
     population: dict[str, TournamentGlassBridgePolicy] = {}
     for offset, agent_name in enumerate(sorted(strategy_profiles.keys())):
@@ -284,5 +320,6 @@ def build_tournament_glass_bridge_population(
             strategy_profile=strategy_profiles[agent_name],
             seed=(seed * 1000) + 50_000 + offset,
             adaptation_config=adaptation_config,
         )
     return population

 import random
 from typing import Any
+from .llm_decision_backend import get_llm_backend
 from .tournament_env import GlassBridgeTournamentEnv
         strategy_profile: dict[str, Any],
         seed: int = 0,
         adaptation_config: dict[str, Any] | None = None,
+        llm_model_paths: dict[str, str] | None = None,
     ):
         self.strategy_profile = dict(strategy_profile)
         self._rng = random.Random(seed)
         self.adaptation = build_tournament_adaptation_strategy(adaptation_config or {})
+        self.llm_model_paths = dict(llm_model_paths or {})
     def select_action(self, observation: dict) -> Any:
         legal = observation.get("legal_actions", [])
             raise RuntimeError("No legal actions available")
         phase = observation.get("phase")
+        model_name = self.strategy_profile.get("model_name")
+        if model_name and str(model_name).lower() not in ("none", "null", ""):
+            backend = get_llm_backend(
+                str(model_name),
+                model_path_override=self.llm_model_paths.get(str(model_name)),
+            )
+            if backend is not None:
+                def fallback() -> Any:
+                    if phase == GlassBridgeTournamentEnv.PHASE_COMMUNICATION_OFFER:
+                        return self._offer_action(observation)
+                    if phase == GlassBridgeTournamentEnv.PHASE_COMMUNICATION_RESPONSE:
+                        return self._response_action(observation)
+                    return self._movement_action(observation, legal)
+                return backend.select_action(
+                    observation=observation,
+                    strategy_profile=self.strategy_profile,
+                    legal_actions=legal,
+                    fallback_fn=fallback,
+                )
         if phase == GlassBridgeTournamentEnv.PHASE_COMMUNICATION_OFFER:
             return self._offer_action(observation)
         if phase == GlassBridgeTournamentEnv.PHASE_COMMUNICATION_RESPONSE:
 def build_tournament_strategy_grid(
     share_rates: list[float],
     truth_rates: list[float],
+    llm_model_pool: list[str] | None = None,
 ) -> list[dict[str, Any]]:
     return [
         {
             "kind": "share_profile",
+            "model_name": model_name,
             "share_rate": float(share_rate),
             "truth_rate": float(truth_rate),
+            "label": (
+                f"model_{model_name}_share_{float(share_rate):.2f}"
+                f"_truth_{float(truth_rate):.2f}"
+            ),
         }
+        for model_name in [str(name) for name in (llm_model_pool or ["qwen3.5"])]
         for share_rate in share_rates
         for truth_rate in truth_rates
     ]
     seed: int,
     share_rates: list[float],
     truth_rates: list[float],
+    llm_model_pool: list[str] | None = None,
 ) -> dict[str, dict[str, Any]]:
     rng = random.Random(seed)
+    grid = build_tournament_strategy_grid(
+        share_rates=share_rates,
+        truth_rates=truth_rates,
+        llm_model_pool=llm_model_pool,
+    )
     return {agent_name: dict(rng.choice(grid)) for agent_name in agent_names}
     strategy_profiles: dict[str, dict[str, Any]],
     seed: int,
     adaptation_config: dict[str, Any] | None = None,
+    llm_model_paths: dict[str, str] | None = None,
 ) -> dict[str, TournamentGlassBridgePolicy]:
     population: dict[str, TournamentGlassBridgePolicy] = {}
     for offset, agent_name in enumerate(sorted(strategy_profiles.keys())):
             strategy_profile=strategy_profiles[agent_name],
             seed=(seed * 1000) + 50_000 + offset,
             adaptation_config=adaptation_config,
+            llm_model_paths=llm_model_paths,
         )
     return population

server/glass_bridge_environment.py CHANGED Viewed

@@ -17,7 +17,6 @@ from glass_bridge.models import (
     ResetResponse,
     StepRequest,
     StepResponse,
-    StrategyProfile,
 )
 from glass_bridge.tournament_env import GlassBridgeTournamentEnv
@@ -29,13 +28,15 @@ class GlassBridgeOpenEnvSession:
     def reset(self, request: ResetRequest) -> ResetResponse:
         seed = 0 if request.seed is None else int(request.seed)
-        strategy_profiles = self._normalize_strategy_profiles(request)
         self.env = GlassBridgeTournamentEnv(
             seed=seed,
             max_rounds=int(request.max_rounds),
             initial_players=int(request.initial_players),
             first_round_num_steps=int(request.first_round_num_steps),
-            strategy_profiles=strategy_profiles,
         )
         raw = self.env.reset(seed=seed)
         return ResetResponse(session_id=self.session_id, result=self._build_result(raw))
@@ -64,12 +65,7 @@ class GlassBridgeOpenEnvSession:
                 agent_name: profile.model_dump(mode="python")
                 for agent_name, profile in request.strategy_profiles.items()
             }
-        profiles: dict[str, dict] = {}
-        for agent_idx in range(int(request.initial_players)):
-            agent_name = GlassBridgeTournamentEnv.agent_name(agent_idx)
-            profiles[agent_name] = StrategyProfile().model_dump(mode="python")
-        return profiles
     @staticmethod
     def _build_result(raw: dict) -> EnvironmentResult:

     ResetResponse,
     StepRequest,
     StepResponse,
 )
 from glass_bridge.tournament_env import GlassBridgeTournamentEnv
     def reset(self, request: ResetRequest) -> ResetResponse:
         seed = 0 if request.seed is None else int(request.seed)
         self.env = GlassBridgeTournamentEnv(
             seed=seed,
             max_rounds=int(request.max_rounds),
             initial_players=int(request.initial_players),
             first_round_num_steps=int(request.first_round_num_steps),
+            strategy_profiles=self._normalize_strategy_profiles(request),
+            share_rates=request.share_rates,
+            truth_rates=request.truth_rates,
+            llm_model_pool=request.llm_model_pool,
         )
         raw = self.env.reset(seed=seed)
         return ResetResponse(session_id=self.session_id, result=self._build_result(raw))
                 agent_name: profile.model_dump(mode="python")
                 for agent_name, profile in request.strategy_profiles.items()
             }
+        return {}
     @staticmethod
     def _build_result(raw: dict) -> EnvironmentResult:

tournament_env.py CHANGED Viewed

@@ -29,12 +29,19 @@ class GlassBridgeTournamentEnv:
         initial_players: int = DEFAULT_INITIAL_PLAYERS,
         first_round_num_steps: int = DEFAULT_FIRST_ROUND_NUM_STEPS,
         strategy_profiles: dict[str, dict[str, Any]] | None = None,
     ):
         self.rng = random.Random(seed)
         self.max_rounds = max_rounds
         self.initial_players = initial_players
         self.first_round_num_steps = first_round_num_steps
         self.strategy_profiles = strategy_profiles or {}
         self.all_agents = [self.agent_name(i) for i in range(self.initial_players)]
         self.phase = self.PHASE_TERMINAL
@@ -67,6 +74,8 @@ class GlassBridgeTournamentEnv:
     def reset(self, seed: int | None = None) -> dict[str, Any]:
         if seed is not None:
             self.rng.seed(seed)
         self.phase = self.PHASE_COMMUNICATION_OFFER
         self.round_idx = 0
@@ -108,6 +117,27 @@ class GlassBridgeTournamentEnv:
         events = self._start_new_round()
         return self._result(self._zero_rewards(), done=False, events=events)
     def step(self, action_dict: dict[Any, str]) -> dict[str, Any]:
         normalized_actions = self._normalize_action_dict(action_dict)

         initial_players: int = DEFAULT_INITIAL_PLAYERS,
         first_round_num_steps: int = DEFAULT_FIRST_ROUND_NUM_STEPS,
         strategy_profiles: dict[str, dict[str, Any]] | None = None,
+        share_rates: list[float] | None = None,
+        truth_rates: list[float] | None = None,
+        llm_model_pool: list[str] | None = None,
     ):
         self.rng = random.Random(seed)
         self.max_rounds = max_rounds
         self.initial_players = initial_players
         self.first_round_num_steps = first_round_num_steps
+        self._explicit_strategy_profiles = strategy_profiles is not None
         self.strategy_profiles = strategy_profiles or {}
+        self.share_rates = list(share_rates or [0.0, 0.25, 0.5, 0.75, 1.0])
+        self.truth_rates = list(truth_rates or [0.0, 0.25, 0.5, 0.75, 1.0])
+        self.llm_model_pool = [str(model_name) for model_name in (llm_model_pool or ["qwen3.5"])]
         self.all_agents = [self.agent_name(i) for i in range(self.initial_players)]
         self.phase = self.PHASE_TERMINAL
     def reset(self, seed: int | None = None) -> dict[str, Any]:
         if seed is not None:
             self.rng.seed(seed)
+        if not self._explicit_strategy_profiles:
+            self.strategy_profiles = self._assign_strategy_profiles()
         self.phase = self.PHASE_COMMUNICATION_OFFER
         self.round_idx = 0
         events = self._start_new_round()
         return self._result(self._zero_rewards(), done=False, events=events)
+    def _assign_strategy_profiles(self) -> dict[str, dict[str, Any]]:
+        strategy_grid = [
+            {
+                "kind": "share_profile",
+                "model_name": model_name,
+                "share_rate": float(share_rate),
+                "truth_rate": float(truth_rate),
+                "label": (
+                    f"model_{model_name}_share_{float(share_rate):.2f}"
+                    f"_truth_{float(truth_rate):.2f}"
+                ),
+            }
+            for model_name in self.llm_model_pool
+            for share_rate in self.share_rates
+            for truth_rate in self.truth_rates
+        ]
+        return {
+            agent_name: dict(self.rng.choice(strategy_grid))
+            for agent_name in self.all_agents
+        }
     def step(self, action_dict: dict[Any, str]) -> dict[str, Any]:
         normalized_actions = self._normalize_action_dict(action_dict)