File size: 8,836 Bytes
312c390 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 | """End-to-end environment tests for the BoardSim OpenEnv environment.
Covers:
* deterministic reset/step contract
* observation schema and required fields
* 10-round episode termination
* reward bounds (per-step dense, terminal spikes)
* vote resolution on weighted CEO + NPC tally
* format penalty fires for invalid decisions
* pitch bootstrap fires for non-empty pitch
* runway-exhaustion bankruptcy path
* trust dynamics persist and update bidirectionally
* event order is shuffled per seed (no trajectory memorisation)
"""
from __future__ import annotations
import statistics
import pytest
from board_sim_env.models import BoardSimAction
from board_sim_env.server.board_sim_env_environment import (
BoardSimEnvironment,
EVENTS,
NPC_AGENDAS,
ROLE_WEIGHT,
compute_profitability_score,
)
# ─── Determinism ──────────────────────────────────────────────────────────
def test_reset_is_deterministic_per_seed():
e1, e2 = BoardSimEnvironment(), BoardSimEnvironment()
o1, o2 = e1.reset(seed=7), e2.reset(seed=7)
assert o1.event == o2.event
assert o1.options == o2.options
assert [s["statement"] for s in o1.npc_statements] == [s["statement"] for s in o2.npc_statements]
def test_different_seeds_produce_different_event_orders():
seen_first_events = set()
for s in range(20):
env = BoardSimEnvironment()
seen_first_events.add(env.reset(seed=s).event)
assert len(seen_first_events) >= 4, "event shuffling should produce variety across seeds"
# ─── Schema ───────────────────────────────────────────────────────────────
def test_observation_schema():
env = BoardSimEnvironment()
obs = env.reset(seed=0)
for key in ("revenue", "burn_rate", "runway_months", "profitability_score", "trust"):
assert key in obs.state, f"observation.state missing {key}"
assert obs.round == 1
assert len(obs.options) == 3
assert len(obs.npc_statements) == 4
for s in obs.npc_statements:
assert {"role", "vote", "confidence", "statement"}.issubset(s.keys())
assert s["role"] in NPC_AGENDAS
assert s["vote"] in obs.options
def test_npc_role_set_and_weights():
assert set(NPC_AGENDAS.keys()) == {"CTO", "CFO", "Investor Rep", "Independent"}
assert ROLE_WEIGHT["CEO"] == 2.5
for role in NPC_AGENDAS:
assert ROLE_WEIGHT[role] > 0
# ─── Episode lifecycle ───────────────────────────────────────────────────
def test_episode_terminates_at_or_before_ten_rounds():
env = BoardSimEnvironment()
obs = env.reset(seed=42)
n = 0
while not obs.done and n < 15:
obs = env.step(BoardSimAction(decision=obs.options[0], coalition_pitch=""))
n += 1
assert obs.done
assert n <= 10
assert obs.state["done_reason"] in {"runway_exhausted", "acquisition", "ipo", "stay_private", "finished_10"}
def test_step_returns_required_fields():
env = BoardSimEnvironment()
obs = env.reset(seed=1)
res = env.step(BoardSimAction(decision=obs.options[0], coalition_pitch=""))
assert hasattr(res, "reward")
assert hasattr(res, "done")
assert hasattr(res, "state")
# ─── Reward bounds ───────────────────────────────────────────────────────
def test_per_step_reward_dense_and_bounded_until_terminal():
"""Non-terminal step rewards live in roughly [-3, +3]; terminal step can spike (+/-30 ish)."""
env = BoardSimEnvironment()
obs = env.reset(seed=11)
rewards = []
while not obs.done:
obs = env.step(BoardSimAction(decision=obs.options[0], coalition_pitch="runway and morale matter"))
rewards.append(float(obs.reward or 0.0))
if obs.done:
break
assert len(rewards) >= 1
nonterm = rewards[:-1] if len(rewards) > 1 else []
for r in nonterm:
assert -3.0 <= r <= 3.0, f"per-step reward {r} outside dense band"
def test_format_penalty_for_invalid_decision():
"""Format penalty (-0.5) should fire when action.decision is not in options.
Pick a non-terminal first round so terminal bonuses don't dominate the
measurement; compare same-seed paired (valid vs invalid) reward.
"""
invalid_drops = 0
n = 0
for s in range(40):
e_valid, e_invalid = BoardSimEnvironment(), BoardSimEnvironment()
o_valid = e_valid.reset(seed=s)
e_invalid.reset(seed=s)
r_valid = e_valid.step(BoardSimAction(decision=o_valid.options[0], coalition_pitch=""))
r_invalid = e_invalid.step(BoardSimAction(decision="NOT_A_VALID_OPTION", coalition_pitch=""))
if r_valid.done or r_invalid.done:
continue
n += 1
if (r_invalid.reward or 0.0) < (r_valid.reward or 0.0):
invalid_drops += 1
assert n >= 5, "needed enough non-terminal first rounds to compare"
assert invalid_drops / n >= 0.7, "invalid decisions should reduce reward most of the time (format penalty)"
def test_pitch_bootstrap_increases_reward_vs_empty_pitch():
"""Non-empty pitch on a contested round earns the +0.05 bootstrap and >=0 persuasion bonus."""
seeds_with_lift = 0
for s in range(20):
e1, e2 = BoardSimEnvironment(), BoardSimEnvironment()
o1, o2 = e1.reset(seed=s), e2.reset(seed=s)
r1 = e1.step(BoardSimAction(decision=o1.options[0], coalition_pitch=""))
r2 = e2.step(BoardSimAction(
decision=o2.options[0],
coalition_pitch="runway discipline and engineering quality argue for this",
))
if (r2.reward or 0.0) >= (r1.reward or 0.0):
seeds_with_lift += 1
assert seeds_with_lift >= 14, "pitch should generally not hurt reward (>=70% of seeds non-decreasing)"
# ─── Profitability score ─────────────────────────────────────────────────
def test_profitability_score_in_range():
env = BoardSimEnvironment()
env.reset(seed=0)
score = env.state.state_dict["profitability_score"]
assert 0.0 <= score <= 100.0
score2 = compute_profitability_score(env.state.state_dict)
assert abs(score - score2) < 1e-6
# ─── Trust dynamics ──────────────────────────────────────────────────────
def test_trust_persists_and_updates():
env = BoardSimEnvironment()
obs0 = env.reset(seed=3)
init_trust = dict(obs0.state["trust"])
obs1 = env.step(BoardSimAction(decision=obs0.options[0], coalition_pitch="strong product readiness"))
after = obs1.state["trust"]
assert set(after.keys()) == set(init_trust.keys())
assert any(abs(after[r] - init_trust[r]) > 1e-6 for r in init_trust), "at least one NPC's trust should move"
for v in after.values():
assert 0.1 <= v <= 1.0
# ─── Vote resolution ────────────────────────────────────────────────────
def test_ceo_weight_dominates_when_no_persuasion():
"""CEO weight 2.5 + at least one aligned NPC should usually carry the vote."""
wins = 0
for s in range(50):
env = BoardSimEnvironment()
obs = env.reset(seed=s)
target = obs.options[0]
env.step(BoardSimAction(decision=target, coalition_pitch=""))
history = env.state.state_dict["history"]
if history and history[-1]["winning_decision"] == target:
wins += 1
assert wins / 50 >= 0.6, "CEO weight should win >=60% of single-step votes without pitch"
# ─── Sanity smoke ────────────────────────────────────────────────────────
def test_random_policy_survives_majority_of_episodes():
import random
rng = random.Random(123)
survived = 0
n = 30
for ep in range(n):
env = BoardSimEnvironment()
obs = env.reset(seed=ep)
while not obs.done:
obs = env.step(BoardSimAction(decision=rng.choice(obs.options), coalition_pitch=""))
if env.state.state_dict.get("done_reason") != "runway_exhausted":
survived += 1
assert survived / n >= 0.6, "random policy should survive >=60% of episodes (env-health floor)"
|