glass_bridge / models.py
viirii's picture
Upload folder using huggingface_hub
467a609 verified
# Copyright (c) Meta Platforms, Inc. and affiliates.
# All rights reserved.
#
# This source code is licensed under the BSD-style license found in the
# LICENSE file in the root directory of this source tree.
from __future__ import annotations
from typing import Any, Literal
from pydantic import BaseModel, ConfigDict, Field
class ReputationMemory(BaseModel):
truth_count: int = 0
lie_count: int = 0
share_count: int = 0
class NegotiationClaim(BaseModel):
step_idx: int
claimed_side: int
class IncomingOffer(BaseModel):
offer_id: int
proposer: str
claims: list[NegotiationClaim] = Field(default_factory=list)
request_steps: list[int] = Field(default_factory=list)
pair_rejections: int = 0
class NegotiationOfferProposal(BaseModel):
recipient: str
give_steps: list[int] = Field(default_factory=list)
request_steps: list[int] = Field(default_factory=list)
claim_mode: Literal["truth", "lie"] = "truth"
class AgentAction(BaseModel):
action_type: Literal["NOOP", "LEFT", "RIGHT", "OFFERS", "RESPONSES"] = "NOOP"
offers: list[NegotiationOfferProposal] = Field(default_factory=list)
accept_offer_ids: list[int] = Field(default_factory=list)
@classmethod
def from_policy_output(cls, action: Any) -> "AgentAction":
if isinstance(action, cls):
return action
if isinstance(action, str):
if action in {"LEFT", "RIGHT"}:
return cls(action_type=action)
return cls(action_type="NOOP")
if isinstance(action, dict):
action_type = str(action.get("type", "NOOP")).upper()
if action_type == "OFFERS":
offers = [
NegotiationOfferProposal.model_validate(offer)
for offer in action.get("offers", [])
]
return cls(action_type="OFFERS", offers=offers)
if action_type == "RESPONSES":
accept_offer_ids = [int(offer_id) for offer_id in action.get("accept_offer_ids", [])]
return cls(action_type="RESPONSES", accept_offer_ids=accept_offer_ids)
return cls(action_type="NOOP")
raise TypeError(f"Unsupported action payload: {action!r}")
def to_env_action(self) -> Any:
if self.action_type in {"LEFT", "RIGHT"}:
return self.action_type
if self.action_type == "OFFERS":
return {
"type": "OFFERS",
"offers": [offer.model_dump(mode="python") for offer in self.offers],
}
if self.action_type == "RESPONSES":
return {
"type": "RESPONSES",
"accept_offer_ids": self.accept_offer_ids[:],
}
return {"type": "NOOP"}
class AgentObservation(BaseModel):
model_config = ConfigDict(extra="allow")
agent_name: str
self_id: int
phase: str
round_idx: int
initial_players: int
round_num_steps: int
active_agents: list[str] = Field(default_factory=list)
current_order: list[str] = Field(default_factory=list)
position_map: dict[str, int] = Field(default_factory=dict)
assignment_by_agent: dict[str, list[int]] = Field(default_factory=dict)
current_actor: str | None = None
current_position: int | None = None
round_alive: dict[str, bool] = Field(default_factory=dict)
round_finished: dict[str, bool] = Field(default_factory=dict)
round_progress: dict[str, int] = Field(default_factory=dict)
verified_public: list[int] = Field(default_factory=list)
private_known_steps: dict[int, int] = Field(default_factory=dict)
owned_steps: list[int] = Field(default_factory=list)
owned_sides: list[int] = Field(default_factory=list)
current_step_idx: int | None = None
reputation: dict[str, ReputationMemory] = Field(default_factory=dict)
pair_rejections: dict[str, int] = Field(default_factory=dict)
negotiable_partners: list[str] = Field(default_factory=list)
incoming_offers: list[IncomingOffer] = Field(default_factory=list)
strategy_profile: dict[str, Any] = Field(default_factory=dict)
round_history: list[dict[str, Any]] = Field(default_factory=list)
legal_actions: list[Any] = Field(default_factory=list)
class EnvironmentInfo(BaseModel):
model_config = ConfigDict(extra="allow")
phase: str
round_idx: int
initial_players: int
round_num_steps: int
active_agents: list[str] = Field(default_factory=list)
current_order: list[str] = Field(default_factory=list)
current_actor: str | None = None
verified_public_count: int = 0
pair_offer_counts: dict[str, int] = Field(default_factory=dict)
pair_rejections: dict[str, int] = Field(default_factory=dict)
round_history: list[dict[str, Any]] = Field(default_factory=list)
strategy_profiles: dict[str, dict[str, Any]] = Field(default_factory=dict)
cumulative_stats: dict[str, dict[str, int]] = Field(default_factory=dict)
winner: str | None = None
winner_strategy: dict[str, Any] = Field(default_factory=dict)
events: list[dict[str, Any]] = Field(default_factory=list)
class EnvironmentResult(BaseModel):
observations: dict[str, AgentObservation]
rewards: dict[str, float]
done: bool
info: EnvironmentInfo
class StrategyProfile(BaseModel):
model_config = ConfigDict(extra="allow")
kind: str = "share_profile"
model_name: str = "qwen3.5"
share_rate: float = 0.5
truth_rate: float = 0.5
label: str = "model_qwen3.5_share_0.50_truth_0.50"
class ResetRequest(BaseModel):
session_id: str | None = None
seed: int | None = None
max_rounds: int = 25
initial_players: int = 16
first_round_num_steps: int = 18
share_rates: list[float] | None = None
truth_rates: list[float] | None = None
llm_model_pool: list[str] | None = None
strategy_profiles: dict[str, StrategyProfile] | None = None
class ResetResponse(BaseModel):
session_id: str
result: EnvironmentResult
class StepRequest(BaseModel):
session_id: str
actions: dict[str, AgentAction] = Field(default_factory=dict)
class StepResponse(BaseModel):
session_id: str
result: EnvironmentResult
class CloseResponse(BaseModel):
session_id: str
closed: bool