| """End-to-end environment tests for the BoardSim OpenEnv environment. |
| |
| Covers: |
| * deterministic reset/step contract |
| * observation schema and required fields |
| * 10-round episode termination |
| * reward bounds (per-step dense, terminal spikes) |
| * vote resolution on weighted CEO + NPC tally |
| * format penalty fires for invalid decisions |
| * pitch bootstrap fires for non-empty pitch |
| * runway-exhaustion bankruptcy path |
| * trust dynamics persist and update bidirectionally |
| * event order is shuffled per seed (no trajectory memorisation) |
| """ |
| from __future__ import annotations |
|
|
| import statistics |
|
|
| import pytest |
|
|
| from board_sim_env.models import BoardSimAction |
| from board_sim_env.server.board_sim_env_environment import ( |
| BoardSimEnvironment, |
| EVENTS, |
| NPC_AGENDAS, |
| ROLE_WEIGHT, |
| compute_profitability_score, |
| ) |
|
|
|
|
| |
|
|
| def test_reset_is_deterministic_per_seed(): |
| e1, e2 = BoardSimEnvironment(), BoardSimEnvironment() |
| o1, o2 = e1.reset(seed=7), e2.reset(seed=7) |
| assert o1.event == o2.event |
| assert o1.options == o2.options |
| assert [s["statement"] for s in o1.npc_statements] == [s["statement"] for s in o2.npc_statements] |
|
|
|
|
| def test_different_seeds_produce_different_event_orders(): |
| seen_first_events = set() |
| for s in range(20): |
| env = BoardSimEnvironment() |
| seen_first_events.add(env.reset(seed=s).event) |
| assert len(seen_first_events) >= 4, "event shuffling should produce variety across seeds" |
|
|
|
|
| |
|
|
| def test_observation_schema(): |
| env = BoardSimEnvironment() |
| obs = env.reset(seed=0) |
| for key in ("revenue", "burn_rate", "runway_months", "profitability_score", "trust"): |
| assert key in obs.state, f"observation.state missing {key}" |
| assert obs.round == 1 |
| assert len(obs.options) == 3 |
| assert len(obs.npc_statements) == 4 |
| for s in obs.npc_statements: |
| assert {"role", "vote", "confidence", "statement"}.issubset(s.keys()) |
| assert s["role"] in NPC_AGENDAS |
| assert s["vote"] in obs.options |
|
|
|
|
| def test_npc_role_set_and_weights(): |
| assert set(NPC_AGENDAS.keys()) == {"CTO", "CFO", "Investor Rep", "Independent"} |
| assert ROLE_WEIGHT["CEO"] == 2.5 |
| for role in NPC_AGENDAS: |
| assert ROLE_WEIGHT[role] > 0 |
|
|
|
|
| |
|
|
| def test_episode_terminates_at_or_before_ten_rounds(): |
| env = BoardSimEnvironment() |
| obs = env.reset(seed=42) |
| n = 0 |
| while not obs.done and n < 15: |
| obs = env.step(BoardSimAction(decision=obs.options[0], coalition_pitch="")) |
| n += 1 |
| assert obs.done |
| assert n <= 10 |
| assert obs.state["done_reason"] in {"runway_exhausted", "acquisition", "ipo", "stay_private", "finished_10"} |
|
|
|
|
| def test_step_returns_required_fields(): |
| env = BoardSimEnvironment() |
| obs = env.reset(seed=1) |
| res = env.step(BoardSimAction(decision=obs.options[0], coalition_pitch="")) |
| assert hasattr(res, "reward") |
| assert hasattr(res, "done") |
| assert hasattr(res, "state") |
|
|
|
|
| |
|
|
| def test_per_step_reward_dense_and_bounded_until_terminal(): |
| """Non-terminal step rewards live in roughly [-3, +3]; terminal step can spike (+/-30 ish).""" |
| env = BoardSimEnvironment() |
| obs = env.reset(seed=11) |
| rewards = [] |
| while not obs.done: |
| obs = env.step(BoardSimAction(decision=obs.options[0], coalition_pitch="runway and morale matter")) |
| rewards.append(float(obs.reward or 0.0)) |
| if obs.done: |
| break |
| assert len(rewards) >= 1 |
| nonterm = rewards[:-1] if len(rewards) > 1 else [] |
| for r in nonterm: |
| assert -3.0 <= r <= 3.0, f"per-step reward {r} outside dense band" |
|
|
|
|
| def test_format_penalty_for_invalid_decision(): |
| """Format penalty (-0.5) should fire when action.decision is not in options. |
| |
| Pick a non-terminal first round so terminal bonuses don't dominate the |
| measurement; compare same-seed paired (valid vs invalid) reward. |
| """ |
| invalid_drops = 0 |
| n = 0 |
| for s in range(40): |
| e_valid, e_invalid = BoardSimEnvironment(), BoardSimEnvironment() |
| o_valid = e_valid.reset(seed=s) |
| e_invalid.reset(seed=s) |
| r_valid = e_valid.step(BoardSimAction(decision=o_valid.options[0], coalition_pitch="")) |
| r_invalid = e_invalid.step(BoardSimAction(decision="NOT_A_VALID_OPTION", coalition_pitch="")) |
| if r_valid.done or r_invalid.done: |
| continue |
| n += 1 |
| if (r_invalid.reward or 0.0) < (r_valid.reward or 0.0): |
| invalid_drops += 1 |
| assert n >= 5, "needed enough non-terminal first rounds to compare" |
| assert invalid_drops / n >= 0.7, "invalid decisions should reduce reward most of the time (format penalty)" |
|
|
|
|
| def test_pitch_bootstrap_increases_reward_vs_empty_pitch(): |
| """Non-empty pitch on a contested round earns the +0.05 bootstrap and >=0 persuasion bonus.""" |
| seeds_with_lift = 0 |
| for s in range(20): |
| e1, e2 = BoardSimEnvironment(), BoardSimEnvironment() |
| o1, o2 = e1.reset(seed=s), e2.reset(seed=s) |
| r1 = e1.step(BoardSimAction(decision=o1.options[0], coalition_pitch="")) |
| r2 = e2.step(BoardSimAction( |
| decision=o2.options[0], |
| coalition_pitch="runway discipline and engineering quality argue for this", |
| )) |
| if (r2.reward or 0.0) >= (r1.reward or 0.0): |
| seeds_with_lift += 1 |
| assert seeds_with_lift >= 14, "pitch should generally not hurt reward (>=70% of seeds non-decreasing)" |
|
|
|
|
| |
|
|
| def test_profitability_score_in_range(): |
| env = BoardSimEnvironment() |
| env.reset(seed=0) |
| score = env.state.state_dict["profitability_score"] |
| assert 0.0 <= score <= 100.0 |
| score2 = compute_profitability_score(env.state.state_dict) |
| assert abs(score - score2) < 1e-6 |
|
|
|
|
| |
|
|
| def test_trust_persists_and_updates(): |
| env = BoardSimEnvironment() |
| obs0 = env.reset(seed=3) |
| init_trust = dict(obs0.state["trust"]) |
| obs1 = env.step(BoardSimAction(decision=obs0.options[0], coalition_pitch="strong product readiness")) |
| after = obs1.state["trust"] |
| assert set(after.keys()) == set(init_trust.keys()) |
| assert any(abs(after[r] - init_trust[r]) > 1e-6 for r in init_trust), "at least one NPC's trust should move" |
| for v in after.values(): |
| assert 0.1 <= v <= 1.0 |
|
|
|
|
| |
|
|
| def test_ceo_weight_dominates_when_no_persuasion(): |
| """CEO weight 2.5 + at least one aligned NPC should usually carry the vote.""" |
| wins = 0 |
| for s in range(50): |
| env = BoardSimEnvironment() |
| obs = env.reset(seed=s) |
| target = obs.options[0] |
| env.step(BoardSimAction(decision=target, coalition_pitch="")) |
| history = env.state.state_dict["history"] |
| if history and history[-1]["winning_decision"] == target: |
| wins += 1 |
| assert wins / 50 >= 0.6, "CEO weight should win >=60% of single-step votes without pitch" |
|
|
|
|
| |
|
|
| def test_random_policy_survives_majority_of_episodes(): |
| import random |
| rng = random.Random(123) |
| survived = 0 |
| n = 30 |
| for ep in range(n): |
| env = BoardSimEnvironment() |
| obs = env.reset(seed=ep) |
| while not obs.done: |
| obs = env.step(BoardSimAction(decision=rng.choice(obs.options), coalition_pitch="")) |
| if env.state.state_dict.get("done_reason") != "runway_exhausted": |
| survived += 1 |
| assert survived / n >= 0.6, "random policy should survive >=60% of episodes (env-health floor)" |
|
|