from __future__ import annotations """ Run one simulated match and generate a visual trace PNG. Outputs: - toxic_royale_env/outputs/samples/sim_episode_trace.json - toxic_royale_env/outputs/plots/sim_episode_trace.png """ import json import os import random from pathlib import Path def main() -> int: from toxic_royale_env.simulator import ToxicRoyaleSim seed = int(os.environ.get("TOXIC_SIM_SEED", "0")) max_steps = int(os.environ.get("TOXIC_SIM_MAX_STEPS", "160")) # 160*0.5s = 80s; sim may overtime/finish earlier policy = os.environ.get("TOXIC_SIM_POLICY", "greedy") # greedy|random rng = random.Random(seed) sim = ToxicRoyaleSim(seed=seed) sim.reset() trace: dict = {"seed": seed, "policy": policy, "steps": []} zones_troop = ["back_left", "back_right", "mid_left", "mid_right"] zones_spell = ["back_left", "back_right", "mid_left", "mid_right", "bridge_left", "bridge_right"] for t in range(max_steps): st = sim.state() hand = list(st.my_hand) # pick action action: dict if policy == "random": # random legal play or wait playable = [c for c in hand if sim._cards[c].cost <= st.my_elixir + 1e-6] # noqa: SLF001 if not playable or rng.random() < 0.2: action = {"kind": "wait"} out = sim.step(kind="wait", card=None, zone=None, emote=None) else: card = rng.choice(playable) cdef = sim._cards[card] # noqa: SLF001 z = rng.choice(zones_spell if cdef.kind == "spell" else zones_troop) action = {"kind": "play", "card": card, "zone": z} out = sim.step(kind="play", card=card, zone=z, emote=None) else: # greedy: first affordable card, safe zone chosen = None for c in hand: if sim._cards[c].cost <= st.my_elixir + 1e-6: # noqa: SLF001 chosen = c break if chosen is None: action = {"kind": "wait"} out = sim.step(kind="wait", card=None, zone=None, emote=None) else: cdef = sim._cards[chosen] # noqa: SLF001 z = ("back_left" if (t % 2 == 0) else "back_right") if cdef.kind != "spell" else "bridge_left" # keep spells more aggressive so we see interactions if cdef.kind == "spell": z = "bridge_left" if (t % 2 == 0) else "bridge_right" action = {"kind": "play", "card": chosen, "zone": z} out = sim.step(kind="play", card=chosen, zone=z, emote=None) st2 = sim.state() last_event = sim.last_events(1) trace["steps"].append( { "t": t, "action": action, "reward": float(out.get("reward_total") or 0.0), "reward_breakdown": out.get("reward_breakdown", {}), "my_elixir": st2.my_elixir, "opp_elixir_est": st2.opp_elixir_estimate, "my_tower_hp": st2.my_tower_hp, "opp_tower_hp": st2.opp_tower_hp, "my_crowns": st2.my_crowns, "opp_crowns": st2.opp_crowns, "invalid_last": st2.invalid_action_last, "event": (last_event[0] if last_event else None), } ) if st2.done: break root = Path(__file__).resolve().parents[1] out_trace = root / "outputs" / "samples" / "sim_episode_trace.json" out_trace.parent.mkdir(parents=True, exist_ok=True) out_trace.write_text(json.dumps(trace, indent=2, ensure_ascii=False) + "\n", encoding="utf-8") print("wrote", out_trace) # Reuse the existing visualizer by writing to its expected location, then saving under a new name. expected = root / "outputs" / "training" / "fast_rl_episode_trace.json" expected.parent.mkdir(parents=True, exist_ok=True) expected.write_text(json.dumps({"episode": "sim", "steps": trace["steps"]}, indent=2, ensure_ascii=False) + "\n", encoding="utf-8") os.system(f"python3 {root}/training/visualize_episode_trace.py") src_png = root / "outputs" / "plots" / "episode_trace.png" dst_png = root / "outputs" / "plots" / "sim_episode_trace.png" if src_png.exists(): dst_png.write_bytes(src_png.read_bytes()) print("wrote", dst_png) return 0 if __name__ == "__main__": raise SystemExit(main())