Spaces:

StavanKhobare
/

SST-MetaxPyTorch-Hackathon

Sleeping

App Files Files Community

SST-MetaxPyTorch-Hackathon / tests /test_environment.py

StavanKhobare

Update documentation, add blog, and simplify inference script

312c390 about 1 month ago

raw

history blame contribute delete

8.84 kB

	"""End-to-end environment tests for the BoardSim OpenEnv environment.

	Covers:
	* deterministic reset/step contract
	* observation schema and required fields
	* 10-round episode termination
	* reward bounds (per-step dense, terminal spikes)
	* vote resolution on weighted CEO + NPC tally
	* format penalty fires for invalid decisions
	* pitch bootstrap fires for non-empty pitch
	* runway-exhaustion bankruptcy path
	* trust dynamics persist and update bidirectionally
	* event order is shuffled per seed (no trajectory memorisation)
	"""
	from __future__ import annotations

	import statistics

	import pytest

	from board_sim_env.models import BoardSimAction
	from board_sim_env.server.board_sim_env_environment import (
	BoardSimEnvironment,
	EVENTS,
	NPC_AGENDAS,
	ROLE_WEIGHT,
	compute_profitability_score,
	)


	# ─── Determinism ──────────────────────────────────────────────────────────

	def test_reset_is_deterministic_per_seed():
	e1, e2 = BoardSimEnvironment(), BoardSimEnvironment()
	o1, o2 = e1.reset(seed=7), e2.reset(seed=7)
	assert o1.event == o2.event
	assert o1.options == o2.options
	assert [s["statement"] for s in o1.npc_statements] == [s["statement"] for s in o2.npc_statements]


	def test_different_seeds_produce_different_event_orders():
	seen_first_events = set()
	for s in range(20):
	env = BoardSimEnvironment()
	seen_first_events.add(env.reset(seed=s).event)
	assert len(seen_first_events) >= 4, "event shuffling should produce variety across seeds"


	# ─── Schema ───────────────────────────────────────────────────────────────

	def test_observation_schema():
	env = BoardSimEnvironment()
	obs = env.reset(seed=0)
	for key in ("revenue", "burn_rate", "runway_months", "profitability_score", "trust"):
	assert key in obs.state, f"observation.state missing {key}"
	assert obs.round == 1
	assert len(obs.options) == 3
	assert len(obs.npc_statements) == 4
	for s in obs.npc_statements:
	assert {"role", "vote", "confidence", "statement"}.issubset(s.keys())
	assert s["role"] in NPC_AGENDAS
	assert s["vote"] in obs.options


	def test_npc_role_set_and_weights():
	assert set(NPC_AGENDAS.keys()) == {"CTO", "CFO", "Investor Rep", "Independent"}
	assert ROLE_WEIGHT["CEO"] == 2.5
	for role in NPC_AGENDAS:
	assert ROLE_WEIGHT[role] > 0


	# ─── Episode lifecycle ───────────────────────────────────────────────────

	def test_episode_terminates_at_or_before_ten_rounds():
	env = BoardSimEnvironment()
	obs = env.reset(seed=42)
	n = 0
	while not obs.done and n < 15:
	obs = env.step(BoardSimAction(decision=obs.options[0], coalition_pitch=""))
	n += 1
	assert obs.done
	assert n <= 10
	assert obs.state["done_reason"] in {"runway_exhausted", "acquisition", "ipo", "stay_private", "finished_10"}


	def test_step_returns_required_fields():
	env = BoardSimEnvironment()
	obs = env.reset(seed=1)
	res = env.step(BoardSimAction(decision=obs.options[0], coalition_pitch=""))
	assert hasattr(res, "reward")
	assert hasattr(res, "done")
	assert hasattr(res, "state")


	# ─── Reward bounds ───────────────────────────────────────────────────────

	def test_per_step_reward_dense_and_bounded_until_terminal():
	"""Non-terminal step rewards live in roughly [-3, +3]; terminal step can spike (+/-30 ish)."""
	env = BoardSimEnvironment()
	obs = env.reset(seed=11)
	rewards = []
	while not obs.done:
	obs = env.step(BoardSimAction(decision=obs.options[0], coalition_pitch="runway and morale matter"))
	rewards.append(float(obs.reward or 0.0))
	if obs.done:
	break
	assert len(rewards) >= 1
	nonterm = rewards[:-1] if len(rewards) > 1 else []
	for r in nonterm:
	assert -3.0 <= r <= 3.0, f"per-step reward {r} outside dense band"


	def test_format_penalty_for_invalid_decision():
	"""Format penalty (-0.5) should fire when action.decision is not in options.

	Pick a non-terminal first round so terminal bonuses don't dominate the
	measurement; compare same-seed paired (valid vs invalid) reward.
	"""
	invalid_drops = 0
	n = 0
	for s in range(40):
	e_valid, e_invalid = BoardSimEnvironment(), BoardSimEnvironment()
	o_valid = e_valid.reset(seed=s)
	e_invalid.reset(seed=s)
	r_valid = e_valid.step(BoardSimAction(decision=o_valid.options[0], coalition_pitch=""))
	r_invalid = e_invalid.step(BoardSimAction(decision="NOT_A_VALID_OPTION", coalition_pitch=""))
	if r_valid.done or r_invalid.done:
	continue
	n += 1
	if (r_invalid.reward or 0.0) < (r_valid.reward or 0.0):
	invalid_drops += 1
	assert n >= 5, "needed enough non-terminal first rounds to compare"
	assert invalid_drops / n >= 0.7, "invalid decisions should reduce reward most of the time (format penalty)"


	def test_pitch_bootstrap_increases_reward_vs_empty_pitch():
	"""Non-empty pitch on a contested round earns the +0.05 bootstrap and >=0 persuasion bonus."""
	seeds_with_lift = 0
	for s in range(20):
	e1, e2 = BoardSimEnvironment(), BoardSimEnvironment()
	o1, o2 = e1.reset(seed=s), e2.reset(seed=s)
	r1 = e1.step(BoardSimAction(decision=o1.options[0], coalition_pitch=""))
	r2 = e2.step(BoardSimAction(
	decision=o2.options[0],
	coalition_pitch="runway discipline and engineering quality argue for this",
	))
	if (r2.reward or 0.0) >= (r1.reward or 0.0):
	seeds_with_lift += 1
	assert seeds_with_lift >= 14, "pitch should generally not hurt reward (>=70% of seeds non-decreasing)"


	# ─── Profitability score ─────────────────────────────────────────────────

	def test_profitability_score_in_range():
	env = BoardSimEnvironment()
	env.reset(seed=0)
	score = env.state.state_dict["profitability_score"]
	assert 0.0 <= score <= 100.0
	score2 = compute_profitability_score(env.state.state_dict)
	assert abs(score - score2) < 1e-6


	# ─── Trust dynamics ──────────────────────────────────────────────────────

	def test_trust_persists_and_updates():
	env = BoardSimEnvironment()
	obs0 = env.reset(seed=3)
	init_trust = dict(obs0.state["trust"])
	obs1 = env.step(BoardSimAction(decision=obs0.options[0], coalition_pitch="strong product readiness"))
	after = obs1.state["trust"]
	assert set(after.keys()) == set(init_trust.keys())
	assert any(abs(after[r] - init_trust[r]) > 1e-6 for r in init_trust), "at least one NPC's trust should move"
	for v in after.values():
	assert 0.1 <= v <= 1.0


	# ─── Vote resolution ────────────────────────────────────────────────────

	def test_ceo_weight_dominates_when_no_persuasion():
	"""CEO weight 2.5 + at least one aligned NPC should usually carry the vote."""
	wins = 0
	for s in range(50):
	env = BoardSimEnvironment()
	obs = env.reset(seed=s)
	target = obs.options[0]
	env.step(BoardSimAction(decision=target, coalition_pitch=""))
	history = env.state.state_dict["history"]
	if history and history[-1]["winning_decision"] == target:
	wins += 1
	assert wins / 50 >= 0.6, "CEO weight should win >=60% of single-step votes without pitch"


	# ─── Sanity smoke ────────────────────────────────────────────────────────

	def test_random_policy_survives_majority_of_episodes():
	import random
	rng = random.Random(123)
	survived = 0
	n = 30
	for ep in range(n):
	env = BoardSimEnvironment()
	obs = env.reset(seed=ep)
	while not obs.done:
	obs = env.step(BoardSimAction(decision=rng.choice(obs.options), coalition_pitch=""))
	if env.state.state_dict.get("done_reason") != "runway_exhausted":
	survived += 1
	assert survived / n >= 0.6, "random policy should survive >=60% of episodes (env-health floor)"