world-simulator / scripts /live_sim.py
kikikita's picture
Refactor survival prompt to provide a readable mission briefing
566718d
Raw
History Blame Contribute Delete
3.84 kB
"""Headless live-LLM self-play.
Drives the *real* runtime pipeline against the configured Modal endpoints (no
fake completer) for N ticks, then prints the ledger path and a behaviour
summary. Used to audit how the real NPC model reacts to the prompt briefing.
Usage:
.venv\\Scripts\\python.exe scripts\\live_sim.py --ticks 60
"""
from __future__ import annotations
import argparse
import json
from collections import Counter
from pathlib import Path
import sys
REPO_ROOT = Path(__file__).resolve().parents[1]
SRC_ROOT = REPO_ROOT / "src"
if str(SRC_ROOT) not in sys.path:
sys.path.insert(0, str(SRC_ROOT))
def _load_env() -> None:
env_path = REPO_ROOT / ".env"
if not env_path.is_file():
return
import os
for line in env_path.read_text(encoding="utf-8").splitlines():
line = line.strip()
if not line or line.startswith("#") or "=" not in line:
continue
key, value = line.split("=", 1)
os.environ.setdefault(key.strip(), value.strip())
def main() -> None:
_load_env()
from world_simulator.api.runtime import create_game_runtime
from world_simulator.config import load_game_config
from world_simulator.simulation.spawning import create_world
parser = argparse.ArgumentParser(description="Headless live-LLM self-play.")
parser.add_argument("--ticks", type=int, default=60)
parser.add_argument(
"--config", type=Path, default=REPO_ROOT / "config" / "game.modal.local.json"
)
args = parser.parse_args()
config = load_game_config(args.config)
world = create_world(config)
runtime = create_game_runtime(world=world, config=config)
print(
f"LIVE start config={args.config.name} ticks={args.ticks} "
f"simulator={runtime.simulator_name} npcs={len(world.npcs)}"
)
for i in range(args.ticks):
status, _payload = runtime.tick()
if int(status) != 200:
print(f"tick {i} failed: {status}")
break
if (i + 1) % 10 == 0:
print(f" ...tick {i + 1} done")
ledger_path = runtime._ledger.ledger_path # noqa: SLF001
print(f"LIVE done. ledger={ledger_path}")
_summarize(ledger_path)
def _summarize(ledger_path: Path) -> None:
actions: Counter[str] = Counter()
verdicts: Counter[str] = Counter()
fallbacks: Counter[str] = Counter()
event_types: Counter[str] = Counter()
sources: Counter[str] = Counter()
with ledger_path.open("r", encoding="utf-8") as handle:
for line in handle:
if not line.strip():
continue
rec = json.loads(line)
phase = rec.get("phase")
if phase == "npc_response":
verdict = rec.get("validator_verdict") or {}
verdicts[str(verdict.get("status"))] += 1
parsed = rec.get("parsed_action") or {}
if isinstance(parsed, dict) and parsed.get("action"):
actions[str(parsed["action"])] += 1
elif phase == "npc_fallback":
fallbacks[str(rec.get("reason"))] += 1
elif phase == "engine_events":
for event in rec.get("events") or []:
event_types[str(event.get("type"))] += 1
print("\n== parsed LLM actions ==")
for action, count in actions.most_common():
print(f" {action}: {count}")
print("\n== validator verdicts ==")
for status, count in verdicts.most_common():
print(f" {status}: {count}")
print("\n== fallback reasons ==")
for reason, count in fallbacks.most_common() or [("none", 0)]:
print(f" {reason}: {count}")
print("\n== engine event types ==")
for event_type, count in event_types.most_common():
print(f" {event_type}: {count}")
if __name__ == "__main__":
main()