world-simulator / scripts /playtest_agent.py
DeltaZN
feat: rename god -> world
c58d3eb
Raw
History Blame Contribute Delete
25.9 kB
from __future__ import annotations
import argparse
import json
import os
import sys
import time
from collections import Counter
from dataclasses import dataclass, field
from pathlib import Path
from typing import Any
from urllib.error import URLError
from urllib.parse import urlparse
from urllib.request import Request, urlopen
REPO_ROOT = Path(__file__).resolve().parents[1]
SRC_ROOT = REPO_ROOT / "src"
if str(SRC_ROOT) not in sys.path:
sys.path.insert(0, str(SRC_ROOT))
from world_simulator.config import ( # noqa: E402
ConnectorConfig,
GameConfig,
NpcConfig,
OverseerConfig,
ServerConfig,
SimulationConfig,
WorldConfig,
apply_runtime_env_overrides,
load_game_config,
)
from world_simulator.domain import WorldLogEvent, WorldState # noqa: E402
from world_simulator.simulation.chaos import apply_chaos_action # noqa: E402
from world_simulator.simulation.connectors.deterministic import ( # noqa: E402
DeterministicWorldSimulator,
)
from world_simulator.simulation.connectors.factory import create_world_simulator # noqa: E402
from world_simulator.simulation.connectors.openai_compatible import ( # noqa: E402
OpenAICompatibleWorldSimulator,
)
from world_simulator.simulation.mechanics import is_alive # noqa: E402
from world_simulator.simulation.overseer import ( # noqa: E402
OverseerController,
create_overseer,
scripted_overseer_controller,
)
from world_simulator.simulation.roles import normalize_role # noqa: E402
from world_simulator.simulation.spawning import create_world # noqa: E402
from world_simulator.simulation.tick import advance_world # noqa: E402
CHAOS_SCHEDULE: dict[int, list[str]] = {
50: ["spawn_beast"],
120: ["famine"],
200: ["beast_pack"],
280: ["maniac"],
}
MAJOR_EVENT_TYPES = {
"beast_spawned",
"beast_attack",
"beast_killed",
"beast_retreat",
"npc_attack",
"npc_died",
"npc_born",
"build_started",
"build_completed",
"house_damaged",
"house_destroyed",
"transfer",
"heal",
"consume",
"chaos_event",
"directive_issued",
"overseer_skipped",
"game_over",
}
@dataclass(frozen=True, slots=True)
class Snapshot:
tick: int
population: int
living_beasts: int
completed_houses: int
total_food: int
avg_hunger: float
max_hunger: float
max_fear: float
@dataclass(frozen=True, slots=True)
class ActionTrace:
tick: int
npc_id: str
role: str
goal: str
requested_action: str
action: str
summary: str
@dataclass(frozen=True, slots=True)
class TrustDelta:
tick: int
npc_id: str
target_id: str
before: float
after: float
@dataclass(slots=True)
class ChainResult:
name: str
occurred: bool
ticks: dict[str, int | None]
evidence: list[str] = field(default_factory=list)
@dataclass(slots=True)
class PlaytestResult:
world: WorldState
report_path: Path
ticks_requested: int
elapsed_seconds: float
simulator_label: str
overseer_label: str
snapshots: list[Snapshot]
action_traces: list[ActionTrace]
trust_deltas: list[TrustDelta]
chains: list[ChainResult]
verdict: str
def main() -> None:
args = _parse_args()
started = time.perf_counter()
config = apply_runtime_env_overrides(_load_or_default_config(args.config))
config = _with_playtest_overrides(config, seed=args.seed, npc_count=args.npcs)
world = create_world(config)
simulator, simulator_label = _build_simulator(config, args.use_llm)
overseer, overseer_label = _build_overseer(
config,
enabled=args.overseer_on,
real=args.real_overseer,
)
snapshots: list[Snapshot] = [_snapshot(world)]
action_traces: list[ActionTrace] = []
trust_deltas: list[TrustDelta] = []
previous_relationships = _relationship_matrix(world)
while world.tick < args.ticks:
before_event_count = len(world.event_log)
advance_world(world, simulator=simulator, overseer=overseer)
action_traces.extend(_action_traces(world))
trust_deltas.extend(_trust_deltas(world, previous_relationships))
previous_relationships = _relationship_matrix(world)
for action in CHAOS_SCHEDULE.get(world.tick, []):
applied = apply_chaos_action(world, action)
if not applied:
_append_manual_event(
world,
"chaos_event",
f"Scheduled chaos action {action} had no effect",
severity="warning",
)
if len(world.event_log) > before_event_count:
previous_relationships = _relationship_matrix(world)
snapshots.append(_snapshot(world))
chains = _analyze_chains(world, snapshots, action_traces, trust_deltas)
verdict = _balance_verdict(world, snapshots, overseer_on=args.overseer_on)
elapsed_seconds = time.perf_counter() - started
result = PlaytestResult(
world=world,
report_path=args.report,
ticks_requested=args.ticks,
elapsed_seconds=elapsed_seconds,
simulator_label=simulator_label,
overseer_label=overseer_label,
snapshots=snapshots,
action_traces=action_traces,
trust_deltas=trust_deltas,
chains=chains,
verdict=verdict,
)
_write_report(result)
_print_summary(result)
def _parse_args() -> argparse.Namespace:
parser = argparse.ArgumentParser(
description="Run the scheduled-chaos playtest and write playtest_report.md."
)
parser.add_argument(
"--config",
type=Path,
default=Path(os.getenv("WORLD_SIMULATOR_CONFIG", "config/game.modal.local.json")),
)
parser.add_argument("--report", type=Path, default=Path("playtest_report.md"))
parser.add_argument("--ticks", type=int, default=400)
parser.add_argument("--seed", type=int, default=None)
parser.add_argument("--npcs", type=int, default=None)
parser.add_argument("--overseer-on", action="store_true", help="Enable Overseer autopilot.")
parser.add_argument(
"--real-overseer",
action="store_true",
help="Use config.overseer instead of the deterministic mock when --overseer-on is set.",
)
parser.add_argument(
"--use-llm",
choices=("auto", "always", "never"),
default="auto",
help="auto uses an OpenAI-compatible NPC connector only when it looks reachable.",
)
return parser.parse_args()
def _load_or_default_config(path: Path) -> GameConfig:
if path.is_file():
return load_game_config(path)
return GameConfig(
world=WorldConfig(width=80, depth=80, terrain="plain_green", seed=42, survival=True),
npcs=NpcConfig(count=10),
simulation=SimulationConfig(tick_ms=1),
server=ServerConfig(host="127.0.0.1", port=8000),
)
def _with_playtest_overrides(
config: GameConfig,
*,
seed: int | None,
npc_count: int | None,
) -> GameConfig:
world = config.world
npcs = config.npcs
simulation = config.simulation
if seed is not None:
world = WorldConfig(
width=world.width,
depth=world.depth,
terrain=world.terrain,
seed=seed,
survival=True,
)
elif not world.survival:
world = WorldConfig(
width=world.width,
depth=world.depth,
terrain=world.terrain,
seed=world.seed,
survival=True,
)
if npc_count is not None:
npcs = NpcConfig(count=npc_count)
if simulation.tick_ms != 1:
simulation = SimulationConfig(tick_ms=1)
return GameConfig(
world=world,
npcs=npcs,
simulation=simulation,
server=config.server,
connector=config.connector,
god_console=config.god_console,
overseer=config.overseer,
)
def _build_simulator(
config: GameConfig,
use_llm: str,
) -> tuple[DeterministicWorldSimulator | OpenAICompatibleWorldSimulator, str]:
deterministic = DeterministicWorldSimulator()
if use_llm == "never" or config.connector.type != "openai_compatible":
return deterministic, "deterministic"
reason = _connector_unavailable_reason(config.connector)
if reason and use_llm == "auto":
return deterministic, f"deterministic (LLM skipped: {reason})"
try:
simulator = create_world_simulator(config)
except Exception as exc:
if use_llm == "always":
raise
return deterministic, f"deterministic (LLM unavailable: {exc})"
if not isinstance(simulator, OpenAICompatibleWorldSimulator):
return deterministic, "deterministic"
return simulator, "openai_compatible with deterministic fallback"
def _connector_unavailable_reason(config: ConnectorConfig) -> str | None:
if config.api_key_env and not os.getenv(config.api_key_env):
return f"missing {config.api_key_env}"
base_url = config.base_url or config.api_url
if not base_url:
return "missing base_url"
if not config.model:
return "missing model"
health_url = _health_url(base_url)
try:
request = Request(health_url, method="GET")
with urlopen(request, timeout=2.0) as response:
if 200 <= response.status < 500:
return None
return f"health returned HTTP {response.status}"
except (OSError, URLError) as exc:
return f"health check failed at {health_url}: {exc}"
def _health_url(base_url: str) -> str:
parsed = urlparse(base_url)
path = parsed.path.removesuffix("/v1").rstrip("/")
return parsed._replace(path=f"{path}/health", params="", query="", fragment="").geturl()
def _build_overseer(
config: GameConfig,
*,
enabled: bool,
real: bool,
) -> tuple[OverseerController | None, str]:
if not enabled:
return None, "off"
if not real:
return scripted_overseer_controller(mode="autopilot", cycle_ticks=8), "mock autopilot"
real_config = config
if config.overseer.mode == "off":
real_config = GameConfig(
world=config.world,
npcs=config.npcs,
simulation=config.simulation,
server=config.server,
connector=config.connector,
god_console=config.god_console,
overseer=OverseerConfig(
mode="autopilot",
cycle_ticks=8,
max_directives=3,
connector=config.connector,
),
)
return create_overseer(real_config), "real/config autopilot"
def _snapshot(world: WorldState) -> Snapshot:
living = [npc for npc in world.npcs if is_alive(npc)]
total_food = sum(node.amount for node in world.resource_nodes if node.resource_type == "food")
return Snapshot(
tick=world.tick,
population=sum(1 for npc in living),
living_beasts=sum(1 for beast in world.beasts if beast.state != "dead"),
completed_houses=sum(
1
for house in world.houses
if house.state == "completed" and house.hp > 0
),
total_food=total_food,
avg_hunger=round(sum(npc.hunger for npc in living) / max(1, len(living)), 2),
max_hunger=round(max((npc.hunger for npc in living), default=0.0), 2),
max_fear=round(max((npc.fear for npc in living), default=0.0), 2),
)
def _action_traces(world: WorldState) -> list[ActionTrace]:
roles = {npc.id: normalize_role(npc.role) for npc in world.npcs}
traces: list[ActionTrace] = []
for trace in world.last_action_debug:
npc_id = str(trace.get("npc_id", ""))
traces.append(
ActionTrace(
tick=world.tick,
npc_id=npc_id,
role=roles.get(npc_id, "unknown"),
goal=str(trace.get("goal", "")),
requested_action=str(trace.get("requested_action", "")),
action=str(trace.get("action", "")),
summary=str(trace.get("summary", "")),
)
)
return traces
def _relationship_matrix(world: WorldState) -> dict[tuple[str, str], float]:
matrix: dict[tuple[str, str], float] = {}
for npc in world.npcs:
for target_id, value in npc.relationships.items():
if target_id.startswith("beast"):
continue
matrix[(npc.id, target_id)] = float(value)
return matrix
def _trust_deltas(
world: WorldState,
previous: dict[tuple[str, str], float],
) -> list[TrustDelta]:
current = _relationship_matrix(world)
deltas: list[TrustDelta] = []
for key, after in current.items():
before = previous.get(key, after)
if abs(after - before) >= 0.001:
deltas.append(
TrustDelta(
tick=world.tick,
npc_id=key[0],
target_id=key[1],
before=round(before, 3),
after=round(after, 3),
)
)
return deltas
def _append_manual_event(
world: WorldState,
event_type: str,
summary: str,
*,
severity: str,
) -> None:
world.event_log.append(
WorldLogEvent(
tick=world.tick,
type=event_type,
summary=summary,
severity=severity, # type: ignore[arg-type]
)
)
def _analyze_chains(
world: WorldState,
snapshots: list[Snapshot],
traces: list[ActionTrace],
trust_deltas: list[TrustDelta],
) -> list[ChainResult]:
beast_attack_tick = _first_event_tick(world, "beast_attack")
fear_tick = _first_snapshot_tick(
snapshots,
min_tick=beast_attack_tick,
predicate=lambda snapshot: snapshot.max_fear >= 60.0,
)
help_tick = _first_trace_tick(
traces,
min_tick=beast_attack_tick,
predicate=lambda trace: _is_help_request(trace),
)
guard_response_tick = _first_guard_response_tick(
world,
traces,
min_tick=help_tick or beast_attack_tick,
)
trust_tick = _first_trust_tick(
trust_deltas,
min_tick=guard_response_tick,
guard_ids={npc.id for npc in world.npcs if normalize_role(npc.role) == "guard"},
)
beast_chain = ChainResult(
name="beast attack -> fear -> help_request -> guard response -> trust change",
occurred=all(
tick is not None
for tick in (beast_attack_tick, fear_tick, help_tick, guard_response_tick, trust_tick)
),
ticks={
"beast_attack": beast_attack_tick,
"fear": fear_tick,
"help_request": help_tick,
"guard_response": guard_response_tick,
"trust_change": trust_tick,
},
evidence=_evidence_lines(
world,
traces,
trust_deltas,
ticks=(beast_attack_tick, fear_tick, help_tick, guard_response_tick, trust_tick),
),
)
hunger_tick = _first_snapshot_tick(
snapshots,
min_tick=0,
predicate=lambda snapshot: snapshot.max_hunger >= 60.0,
)
gather_tick = _first_event_tick(
world,
"gather",
min_tick=hunger_tick,
predicate=lambda event: "food" in event.summary.lower()
or (event.object_id or "").startswith("res_food"),
)
steal_tick = _first_trace_tick(
traces,
min_tick=hunger_tick,
predicate=lambda trace: trace.action == "steal" or "stealing" in trace.summary,
)
hunger_chain = ChainResult(
name="hunger -> gather/steal",
occurred=hunger_tick is not None and (gather_tick is not None or steal_tick is not None),
ticks={
"hunger": hunger_tick,
"gather": gather_tick,
"steal": steal_tick,
},
evidence=_evidence_lines(
world,
traces,
trust_deltas,
ticks=(hunger_tick, gather_tick, steal_tick),
),
)
build_tick = _first_event_tick(world, "build_completed")
birth_tick = _first_event_tick(world, "npc_born", min_tick=build_tick)
build_chain = ChainResult(
name="build -> reproduce",
occurred=build_tick is not None and birth_tick is not None,
ticks={
"build_completed": build_tick,
"npc_born": birth_tick,
},
evidence=_evidence_lines(world, traces, trust_deltas, ticks=(build_tick, birth_tick)),
)
return [beast_chain, hunger_chain, build_chain]
def _first_event_tick(
world: WorldState,
event_type: str,
*,
min_tick: int | None = None,
predicate: Any | None = None,
) -> int | None:
for event in world.event_log:
if event.type != event_type:
continue
if min_tick is not None and event.tick < min_tick:
continue
if predicate is not None and not predicate(event):
continue
return event.tick
return None
def _first_snapshot_tick(
snapshots: list[Snapshot],
*,
min_tick: int | None,
predicate: Any,
) -> int | None:
for snapshot in snapshots:
if min_tick is not None and snapshot.tick < min_tick:
continue
if predicate(snapshot):
return snapshot.tick
return None
def _first_trace_tick(
traces: list[ActionTrace],
*,
min_tick: int | None,
predicate: Any,
) -> int | None:
for trace in traces:
if min_tick is not None and trace.tick < min_tick:
continue
if predicate(trace):
return trace.tick
return None
def _is_help_request(trace: ActionTrace) -> bool:
return (
trace.action == "communicate"
and (
"calling for help" in trace.summary
or "help_request" in trace.requested_action
or "help" in trace.summary
)
)
def _first_guard_response_tick(
world: WorldState,
traces: list[ActionTrace],
*,
min_tick: int | None,
) -> int | None:
roles = {npc.id: normalize_role(npc.role) for npc in world.npcs}
for event in world.event_log:
if min_tick is not None and event.tick < min_tick:
continue
if (
event.type == "npc_attack"
and event.actor_id is not None
and roles.get(event.actor_id) == "guard"
and (event.target_id or "").startswith("beast")
):
return event.tick
for trace in traces:
if min_tick is not None and trace.tick < min_tick:
continue
if trace.role == "guard" and trace.action == "attack":
return trace.tick
return None
def _first_trust_tick(
deltas: list[TrustDelta],
*,
min_tick: int | None,
guard_ids: set[str],
) -> int | None:
if min_tick is None:
return None
for delta in deltas:
if delta.tick < min_tick:
continue
if delta.target_id in guard_ids or delta.npc_id in guard_ids:
return delta.tick
return None
def _evidence_lines(
world: WorldState,
traces: list[ActionTrace],
trust_deltas: list[TrustDelta],
*,
ticks: tuple[int | None, ...],
) -> list[str]:
wanted = {tick for tick in ticks if tick is not None}
if not wanted:
return []
lines: list[str] = []
for event in world.event_log:
if event.tick in wanted and event.type in MAJOR_EVENT_TYPES | {"gather"}:
lines.append(f"tick {event.tick}: {event.type}: {event.summary}")
for trace in traces:
if trace.tick in wanted and (
"help" in trace.summary or trace.action in {"attack", "steal", "gather"}
):
lines.append(
f"tick {trace.tick}: {trace.npc_id} {trace.role} {trace.action}: {trace.summary}"
)
for delta in trust_deltas:
if delta.tick in wanted:
lines.append(
f"tick {delta.tick}: trust {delta.npc_id}->{delta.target_id} "
f"{delta.before:g}->{delta.after:g}"
)
return _dedupe(lines)[:8]
def _balance_verdict(
world: WorldState,
snapshots: list[Snapshot],
*,
overseer_on: bool,
) -> str:
final_population = snapshots[-1].population
min_population = min(snapshot.population for snapshot in snapshots)
first_zero = next((snapshot.tick for snapshot in snapshots if snapshot.population <= 0), None)
first_crippled = next(
(snapshot.tick for snapshot in snapshots if snapshot.population <= 3),
None,
)
if first_zero is not None and first_zero < 250:
return f"died too fast: population hit 0 at tick {first_zero}"
if not overseer_on and final_population > 6:
return f"survived too easily: overseer off ended with population {final_population}"
if not overseer_on and final_population <= 3:
return (
"chaos pressure is strong enough: "
f"final population {final_population}, first crippled tick {first_crippled}"
)
if overseer_on and final_population <= 3:
return f"overseer impact too weak: final population {final_population}"
if overseer_on and world.game_status == "running":
return f"overseer run survived to 400 with population {final_population}"
return (
f"mixed: status={world.game_status}, final population={final_population}, "
f"min population={min_population}"
)
def _write_report(result: PlaytestResult) -> None:
result.report_path.parent.mkdir(parents=True, exist_ok=True)
world = result.world
event_counts = Counter(event.type for event in world.event_log)
action_counts = Counter(trace.action for trace in result.action_traces)
major_events = [event for event in world.event_log if event.type in MAJOR_EVENT_TYPES]
lines = [
"# Playtest Report",
"",
"## Run",
"",
f"- Ticks requested: {result.ticks_requested}",
f"- Final tick: {world.tick}",
f"- Simulator: {result.simulator_label}",
f"- Overseer: {result.overseer_label}",
f"- Elapsed seconds: {result.elapsed_seconds:.2f}",
f"- Game status: {world.game_status}",
f"- Population: final {world.population}, peak {world.peak_population}, "
f"min {min(snapshot.population for snapshot in result.snapshots)}",
f"- Births: {world.total_births}",
f"- Deaths by cause: {json.dumps(world.deaths_by_cause, sort_keys=True)}",
f"- Houses built: {world.houses_built}",
f"- Beasts killed: {world.beasts_killed}",
f"- Score: Overseer {world.overseer_score}, Chaos {world.chaos_score}",
"",
"## Verdict",
"",
result.verdict,
"",
"## Core Chains",
"",
]
for chain in result.chains:
status = "yes" if chain.occurred else "no"
lines.append(f"- {chain.name}: {status}; ticks={json.dumps(chain.ticks, sort_keys=True)}")
for evidence in chain.evidence:
lines.append(f" - {evidence}")
lines.extend(
[
"",
"## Event Counts",
"",
"```json",
json.dumps(dict(sorted(event_counts.items())), indent=2, sort_keys=True),
"```",
"",
"## Action Counts",
"",
"```json",
json.dumps(dict(sorted(action_counts.items())), indent=2, sort_keys=True),
"```",
"",
"## Timeline",
"",
]
)
for event in major_events[:160]:
actor = f" actor={event.actor_id}" if event.actor_id else ""
target = f" target={event.target_id}" if event.target_id else ""
lines.append(f"- tick {event.tick}: {event.type}{actor}{target}: {event.summary}")
if len(major_events) > 160:
lines.append(f"- ... {len(major_events) - 160} additional major events omitted")
lines.extend(
[
"",
"## Population Graph Data",
"",
"```csv",
(
"tick,population,living_beasts,completed_houses,total_food,"
"avg_hunger,max_hunger,max_fear"
),
]
)
for snapshot in result.snapshots:
lines.append(
f"{snapshot.tick},{snapshot.population},{snapshot.living_beasts},"
f"{snapshot.completed_houses},{snapshot.total_food},{snapshot.avg_hunger},"
f"{snapshot.max_hunger},{snapshot.max_fear}"
)
lines.extend(["```", ""])
result.report_path.write_text("\n".join(lines), encoding="utf-8")
def _print_summary(result: PlaytestResult) -> None:
world = result.world
chain_summary = ", ".join(
f"{chain.name.split(' -> ')[0]}={'yes' if chain.occurred else 'no'}"
for chain in result.chains
)
print(f"PLAYTEST report={result.report_path}")
print(f" simulator={result.simulator_label}")
print(f" overseer={result.overseer_label}")
print(f" ticks={world.tick} status={world.game_status} verdict={result.verdict}")
print(
" "
f"pop final={world.population} peak={world.peak_population} "
f"min={min(snapshot.population for snapshot in result.snapshots)} "
f"births={world.total_births} deaths={dict(world.deaths_by_cause)}"
)
print(
" "
f"houses_built={world.houses_built} beasts_killed={world.beasts_killed} "
f"score={world.overseer_score}-{world.chaos_score}"
)
print(f" chains={chain_summary}")
def _dedupe(lines: list[str]) -> list[str]:
seen: set[str] = set()
deduped: list[str] = []
for line in lines:
if line in seen:
continue
seen.add(line)
deduped.append(line)
return deduped
if __name__ == "__main__":
main()