File size: 4,517 Bytes
b0620f3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
from __future__ import annotations

"""
Run one simulated match and generate a visual trace PNG.

Outputs:
- toxic_royale_env/outputs/samples/sim_episode_trace.json
- toxic_royale_env/outputs/plots/sim_episode_trace.png
"""

import json
import os
import random
from pathlib import Path


def main() -> int:
    from toxic_royale_env.simulator import ToxicRoyaleSim

    seed = int(os.environ.get("TOXIC_SIM_SEED", "0"))
    max_steps = int(os.environ.get("TOXIC_SIM_MAX_STEPS", "160"))  # 160*0.5s = 80s; sim may overtime/finish earlier
    policy = os.environ.get("TOXIC_SIM_POLICY", "greedy")  # greedy|random

    rng = random.Random(seed)
    sim = ToxicRoyaleSim(seed=seed)
    sim.reset()

    trace: dict = {"seed": seed, "policy": policy, "steps": []}
    zones_troop = ["back_left", "back_right", "mid_left", "mid_right"]
    zones_spell = ["back_left", "back_right", "mid_left", "mid_right", "bridge_left", "bridge_right"]

    for t in range(max_steps):
        st = sim.state()
        hand = list(st.my_hand)

        # pick action
        action: dict
        if policy == "random":
            # random legal play or wait
            playable = [c for c in hand if sim._cards[c].cost <= st.my_elixir + 1e-6]  # noqa: SLF001
            if not playable or rng.random() < 0.2:
                action = {"kind": "wait"}
                out = sim.step(kind="wait", card=None, zone=None, emote=None)
            else:
                card = rng.choice(playable)
                cdef = sim._cards[card]  # noqa: SLF001
                z = rng.choice(zones_spell if cdef.kind == "spell" else zones_troop)
                action = {"kind": "play", "card": card, "zone": z}
                out = sim.step(kind="play", card=card, zone=z, emote=None)
        else:
            # greedy: first affordable card, safe zone
            chosen = None
            for c in hand:
                if sim._cards[c].cost <= st.my_elixir + 1e-6:  # noqa: SLF001
                    chosen = c
                    break
            if chosen is None:
                action = {"kind": "wait"}
                out = sim.step(kind="wait", card=None, zone=None, emote=None)
            else:
                cdef = sim._cards[chosen]  # noqa: SLF001
                z = ("back_left" if (t % 2 == 0) else "back_right") if cdef.kind != "spell" else "bridge_left"
                # keep spells more aggressive so we see interactions
                if cdef.kind == "spell":
                    z = "bridge_left" if (t % 2 == 0) else "bridge_right"
                action = {"kind": "play", "card": chosen, "zone": z}
                out = sim.step(kind="play", card=chosen, zone=z, emote=None)

        st2 = sim.state()
        last_event = sim.last_events(1)
        trace["steps"].append(
            {
                "t": t,
                "action": action,
                "reward": float(out.get("reward_total") or 0.0),
                "reward_breakdown": out.get("reward_breakdown", {}),
                "my_elixir": st2.my_elixir,
                "opp_elixir_est": st2.opp_elixir_estimate,
                "my_tower_hp": st2.my_tower_hp,
                "opp_tower_hp": st2.opp_tower_hp,
                "my_crowns": st2.my_crowns,
                "opp_crowns": st2.opp_crowns,
                "invalid_last": st2.invalid_action_last,
                "event": (last_event[0] if last_event else None),
            }
        )

        if st2.done:
            break

    root = Path(__file__).resolve().parents[1]
    out_trace = root / "outputs" / "samples" / "sim_episode_trace.json"
    out_trace.parent.mkdir(parents=True, exist_ok=True)
    out_trace.write_text(json.dumps(trace, indent=2, ensure_ascii=False) + "\n", encoding="utf-8")
    print("wrote", out_trace)

    # Reuse the existing visualizer by writing to its expected location, then saving under a new name.
    expected = root / "outputs" / "training" / "fast_rl_episode_trace.json"
    expected.parent.mkdir(parents=True, exist_ok=True)
    expected.write_text(json.dumps({"episode": "sim", "steps": trace["steps"]}, indent=2, ensure_ascii=False) + "\n", encoding="utf-8")

    os.system(f"python3 {root}/training/visualize_episode_trace.py")
    src_png = root / "outputs" / "plots" / "episode_trace.png"
    dst_png = root / "outputs" / "plots" / "sim_episode_trace.png"
    if src_png.exists():
        dst_png.write_bytes(src_png.read_bytes())
        print("wrote", dst_png)
    return 0


if __name__ == "__main__":
    raise SystemExit(main())