Spaces:
Sleeping
Sleeping
File size: 4,517 Bytes
b0620f3 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 | from __future__ import annotations
"""
Run one simulated match and generate a visual trace PNG.
Outputs:
- toxic_royale_env/outputs/samples/sim_episode_trace.json
- toxic_royale_env/outputs/plots/sim_episode_trace.png
"""
import json
import os
import random
from pathlib import Path
def main() -> int:
from toxic_royale_env.simulator import ToxicRoyaleSim
seed = int(os.environ.get("TOXIC_SIM_SEED", "0"))
max_steps = int(os.environ.get("TOXIC_SIM_MAX_STEPS", "160")) # 160*0.5s = 80s; sim may overtime/finish earlier
policy = os.environ.get("TOXIC_SIM_POLICY", "greedy") # greedy|random
rng = random.Random(seed)
sim = ToxicRoyaleSim(seed=seed)
sim.reset()
trace: dict = {"seed": seed, "policy": policy, "steps": []}
zones_troop = ["back_left", "back_right", "mid_left", "mid_right"]
zones_spell = ["back_left", "back_right", "mid_left", "mid_right", "bridge_left", "bridge_right"]
for t in range(max_steps):
st = sim.state()
hand = list(st.my_hand)
# pick action
action: dict
if policy == "random":
# random legal play or wait
playable = [c for c in hand if sim._cards[c].cost <= st.my_elixir + 1e-6] # noqa: SLF001
if not playable or rng.random() < 0.2:
action = {"kind": "wait"}
out = sim.step(kind="wait", card=None, zone=None, emote=None)
else:
card = rng.choice(playable)
cdef = sim._cards[card] # noqa: SLF001
z = rng.choice(zones_spell if cdef.kind == "spell" else zones_troop)
action = {"kind": "play", "card": card, "zone": z}
out = sim.step(kind="play", card=card, zone=z, emote=None)
else:
# greedy: first affordable card, safe zone
chosen = None
for c in hand:
if sim._cards[c].cost <= st.my_elixir + 1e-6: # noqa: SLF001
chosen = c
break
if chosen is None:
action = {"kind": "wait"}
out = sim.step(kind="wait", card=None, zone=None, emote=None)
else:
cdef = sim._cards[chosen] # noqa: SLF001
z = ("back_left" if (t % 2 == 0) else "back_right") if cdef.kind != "spell" else "bridge_left"
# keep spells more aggressive so we see interactions
if cdef.kind == "spell":
z = "bridge_left" if (t % 2 == 0) else "bridge_right"
action = {"kind": "play", "card": chosen, "zone": z}
out = sim.step(kind="play", card=chosen, zone=z, emote=None)
st2 = sim.state()
last_event = sim.last_events(1)
trace["steps"].append(
{
"t": t,
"action": action,
"reward": float(out.get("reward_total") or 0.0),
"reward_breakdown": out.get("reward_breakdown", {}),
"my_elixir": st2.my_elixir,
"opp_elixir_est": st2.opp_elixir_estimate,
"my_tower_hp": st2.my_tower_hp,
"opp_tower_hp": st2.opp_tower_hp,
"my_crowns": st2.my_crowns,
"opp_crowns": st2.opp_crowns,
"invalid_last": st2.invalid_action_last,
"event": (last_event[0] if last_event else None),
}
)
if st2.done:
break
root = Path(__file__).resolve().parents[1]
out_trace = root / "outputs" / "samples" / "sim_episode_trace.json"
out_trace.parent.mkdir(parents=True, exist_ok=True)
out_trace.write_text(json.dumps(trace, indent=2, ensure_ascii=False) + "\n", encoding="utf-8")
print("wrote", out_trace)
# Reuse the existing visualizer by writing to its expected location, then saving under a new name.
expected = root / "outputs" / "training" / "fast_rl_episode_trace.json"
expected.parent.mkdir(parents=True, exist_ok=True)
expected.write_text(json.dumps({"episode": "sim", "steps": trace["steps"]}, indent=2, ensure_ascii=False) + "\n", encoding="utf-8")
os.system(f"python3 {root}/training/visualize_episode_trace.py")
src_png = root / "outputs" / "plots" / "episode_trace.png"
dst_png = root / "outputs" / "plots" / "sim_episode_trace.png"
if src_png.exists():
dst_png.write_bytes(src_png.read_bytes())
print("wrote", dst_png)
return 0
if __name__ == "__main__":
raise SystemExit(main())
|