from __future__ import annotations import math import random from dataclasses import dataclass, field from typing import Literal, TypedDict from .arena_geometry import Arena from .card_catalog import load_cards from .reward_config import load_reward_config Zone = Literal[ "bridge_left", "bridge_right", "back_left", "back_right", "mid_left", "mid_right", ] Emote = Literal["laugh", "yawn", "cry", "thanks", "chicken", "wp"] class TowerHP(TypedDict): left: int right: int king: int @dataclass class Unit: owner: Literal["me", "opp"] card: str hp: float dps: float targets: Literal["ground", "air", "both", "towers"] zone: Zone is_air: bool stage: int # 0=back, 1=mid, 2=bridge splash: bool = False radius: float = 0.0 @dataclass class CardDef: key: str name: str cost: int kind: Literal["troop", "spell", "building"] # simplified combat stats (we keep them stable + fast; can be expanded later) hp: int = 0 dps: int = 0 targets: Literal["ground", "air", "both", "towers"] = "ground" is_air: bool = False spell_damage: int = 0 splash: bool = False radius: float = 0.0 description: str | None = None def _infer_carddef(card_key: str, catalog: dict) -> CardDef: """ Convert a real card record into our simplified simulator parameters. We keep the simulator intentionally abstract, so we infer missing values and clamp stats to keep learning stable. """ c = catalog.get(card_key) if c is None: raise KeyError(card_key) kind: Literal["troop", "spell", "building"] if c.type == "Troop": kind = "troop" elif c.type == "Building": kind = "building" elif c.type == "Spell": kind = "spell" else: kind = "troop" # Map targets to a coarse set. targets = "both" if (c.targets and "Air" in c.targets) else "ground" if c.targets and "Buildings" in c.targets: targets = "towers" hp = int(c.hitpoints or 0) dps = int(c.damage_per_second or 0) dmg = int(c.damage or 0) # Fall back if DPS missing. if dps <= 0 and dmg > 0 and c.hit_speed: dps = int(dmg / max(0.1, float(c.hit_speed))) # Clamp to keep sim stable. hp = max(1, min(hp, 6000)) if kind != "spell" else 0 dps = max(0, min(dps, 800)) if kind != "spell" else 0 # Spell damage: use "damage" if available else a stable fallback by elixir. spell_damage = 0 if kind == "spell": spell_damage = int(c.damage or (c.elixir * 120)) spell_damage = max(40, min(spell_damage, 1600)) is_air = bool(c.move_speed and "Air" in c.move_speed) # imperfect # Splash/radius heuristics for our small pool (good enough for RL + rewards) splash = card_key in {"fireball", "arrows"} radius = 0.0 if card_key == "fireball": radius = 0.35 if card_key == "arrows": radius = 0.45 # Targeting priors (closer to real CR interactions for our pool) # - Giant targets buildings (incl towers) only if card_key == "giant": targets = "towers" return CardDef( key=card_key, name=c.name, cost=int(c.elixir), kind=kind, hp=hp, dps=dps, targets=targets, # type: ignore[arg-type] is_air=is_air, spell_damage=spell_damage, splash=splash, radius=radius, description=c.description, ) PLACEMENT_ZONES: tuple[Zone, ...] = ( "bridge_left", "bridge_right", "back_left", "back_right", "mid_left", "mid_right", ) def _zone_lane(zone: Zone) -> Literal["left", "right"]: return "left" if zone.endswith("_left") else "right" def _is_air_zone(zone: Zone) -> bool: return False def _zone_stage(zone: Zone) -> int: if zone.startswith("back_"): return 0 if zone.startswith("mid_"): return 1 return 2 @dataclass class GameState: turn: int time_remaining_s: float double_elixir: bool my_elixir: float opp_elixir_estimate: float my_tower_hp: TowerHP opp_tower_hp: TowerHP my_hand: list[str] my_next_card: str opp_hand_estimate: list[str] my_units: list[Unit] = field(default_factory=list) opp_units: list[Unit] = field(default_factory=list) opp_tilt_meter: float = 0.0 my_crowns: int = 0 opp_crowns: int = 0 invalid_action_last: bool = False invalid_action_count: int = 0 done: bool = False class ToxicRoyaleSim: """ Lightweight, fast text simulator. Design goals for hackathon: - deterministic-ish (seedable) and fast enough for many rollouts - objective, non-zero rewards early - supports "tilt meter" dynamics for novelty """ def __init__(self, seed: int | None = None): self._rng = random.Random(seed) self._seed = seed self._last_events: list[str] = [] self._invalid_last = False self._arena = Arena() self.reset() def _zone_point(self, owner: Literal["me", "opp"], zone: Zone) -> tuple[float, float]: """ Map an abstract placement zone to a normalized arena (x,y). Convention: - y grows from opponent side (0) to my side (1) - our zone definitions are expressed from *my* perspective so we mirror y for opponent placements. """ x = 0.33 if zone.endswith("_left") else 0.67 if zone.startswith("back_"): y = 0.80 elif zone.startswith("mid_"): y = 0.66 else: y = 0.52 if owner == "opp": y = 1.0 - y return x, y def _placement_is_valid(self, owner: Literal["me", "opp"], card: CardDef, zone: Zone) -> bool: x, y = self._zone_point(owner, zone) if card.kind == "spell": return True return self._arena.can_place_troop(owner, x, y) def reset(self) -> GameState: self._last_events = [] self._turn = 0 # Real-ish Clash timing: 3:00 regulation + overtime if tied. # Keep it simple but closer to the real feel of the screen you shared. self._time_remaining_s = 180.0 self._in_overtime = False self._my_elixir = 5.0 self._opp_elixir = 5.0 self._my_tower_hp: TowerHP = {"left": 1400, "right": 1400, "king": 2400} self._opp_tower_hp: TowerHP = {"left": 1400, "right": 1400, "king": 2400} # Load real card catalog (RoyaleAPI static data) once per sim instance. if not hasattr(self, "_catalog"): self._catalog = load_cards() # Training pool: keep it small first (stable learning), but every card has a real description. # You can expand this list later to "all cards" without changing the env API. self._training_pool = [ "giant", "knight", "minions", "archers", "fireball", "arrows", "musketeer", "mini-pekka", ] # Build simulator defs for the pool. self._cards: dict[str, CardDef] = {} for key in self._training_pool: # keys in RoyaleAPI dataset are kebab-case (e.g. "mini-pekka") self._cards[key] = _infer_carddef(key, self._catalog) self._cards["wait"] = CardDef(key="wait", name="Wait", cost=0, kind="spell", spell_damage=0) self._my_deck = list(self._training_pool) self._opp_deck = list(self._training_pool) self._rng.shuffle(self._my_deck) self._rng.shuffle(self._opp_deck) self._my_hand = [self._my_deck.pop() for _ in range(4)] self._my_next = self._my_deck.pop() self._opp_hand = [self._opp_deck.pop() for _ in range(4)] self._opp_next = self._opp_deck.pop() self._my_units: list[Unit] = [] self._opp_units: list[Unit] = [] self._opp_tilt = 0.0 self._my_crowns = 0 self._opp_crowns = 0 self._done = False self._invalid_last = False self._invalid_count = 0 self._my_spent_last = 0.0 self._opp_spent_last = 0.0 self._my_wait_last = False self._opp_wait_last = False self._last_spell_hits = 0 self._last_spell_tower_dmg = 0 self._punish_window = 0 self._log_event("Game start.") return self.state() def state(self) -> GameState: return GameState( turn=self._turn, time_remaining_s=self._time_remaining_s, double_elixir=(self._time_remaining_s <= 60.0) or bool(getattr(self, "_in_overtime", False)), my_elixir=self._my_elixir, opp_elixir_estimate=self._opp_elixir, my_tower_hp=dict(self._my_tower_hp), opp_tower_hp=dict(self._opp_tower_hp), my_hand=list(self._my_hand), my_next_card=self._my_next, opp_hand_estimate=list(self._opp_hand), my_units=list(self._my_units), opp_units=list(self._opp_units), opp_tilt_meter=self._opp_tilt, my_crowns=self._my_crowns, opp_crowns=self._opp_crowns, invalid_action_last=self._invalid_last, invalid_action_count=self._invalid_count, done=self._done, ) def last_events(self, k: int = 6) -> list[str]: return self._last_events[-k:] def step(self, *, kind: Literal["play", "wait"], card: str | None, zone: Zone | None, emote: str | None) -> dict: """ Advance one tick (0.5s) with (agent action + scripted opponent response). Returns: a dict with reward_total and reward_breakdown. """ if self._done: return {"reward_total": 0.0, "reward_breakdown": {"already_done": 0.0}} before = self._snapshot_score() self._invalid_last = False self._my_spent_last = 0.0 self._opp_spent_last = 0.0 self._my_wait_last = False self._opp_wait_last = False self._last_spell_hits = 0 self._last_spell_tower_dmg = 0 self._punish_window = max(0, int(getattr(self, "_punish_window", 0)) - 1) # --- apply agent action --- self._apply_player_action(owner="me", kind=kind, card=card, zone=zone) self._update_tilt(emote=emote) if self._invalid_last: self._invalid_count += 1 # --- apply opponent action (scripted, tilt-affected) --- self._scripted_opponent_action() # If opponent just made a big investment, you have a short window to punish. if float(getattr(self, "_opp_spent_last", 0.0)) >= 7.0: self._punish_window = 4 # ~2 seconds (4 ticks) # --- tick combat + elixir regen --- self._tick() after = self._snapshot_score() breakdown = self._compute_rewards(before, after, emote=emote, invalid_action=self._invalid_last) return breakdown # ------------------------- # Internal mechanics # ------------------------- def _apply_player_action(self, *, owner: Literal["me", "opp"], kind: Literal["play", "wait"], card: str | None, zone: Zone | None): if kind == "wait": self._log_event(f"{owner} waits.") if owner == "me": self._my_wait_last = True else: self._opp_wait_last = True return if card is None or zone is None: self._log_event(f"{owner} attempted invalid play (missing card/zone).") if owner == "me": self._invalid_last = True return if owner == "me" and card not in self._my_hand: self._log_event(f"{owner} attempted to play {card} not in hand.") self._invalid_last = True return if owner == "opp" and card not in self._opp_hand: self._log_event(f"{owner} attempted to play {card} not in hand.") return cdef = self._cards.get(card) if cdef is None or card == "wait": self._log_event(f"{owner} attempted unknown card '{card}'.") if owner == "me": self._invalid_last = True return elixir = self._my_elixir if owner == "me" else self._opp_elixir if elixir + 1e-6 < cdef.cost: self._log_event(f"{owner} tried to overspend elixir on {card} (cost {cdef.cost}).") if owner == "me": self._invalid_last = True return if not self._placement_is_valid(owner, cdef, zone): self._log_event(f"{owner} attempted illegal placement: {card} at {zone}.") if owner == "me": self._invalid_last = True return # spend if owner == "me": self._my_elixir -= cdef.cost self._my_spent_last = float(cdef.cost) else: self._opp_elixir -= cdef.cost self._opp_spent_last = float(cdef.cost) # cycle hand if owner == "me": self._my_hand.remove(card) self._my_hand.append(self._my_next) self._my_next = self._my_deck.pop() if self._my_deck else self._rng.choice(self._my_hand) else: self._opp_hand.remove(card) self._opp_hand.append(self._opp_next) self._opp_next = self._opp_deck.pop() if self._opp_deck else self._rng.choice(self._opp_hand) if cdef.kind == "spell": self._apply_spell(owner=owner, card=card, zone=zone, dmg=cdef.spell_damage) return # troops/buildings become units is_air = cdef.is_air or _is_air_zone(zone) u = Unit( owner=owner, card=card, hp=float(cdef.hp), dps=float(cdef.dps), targets=cdef.targets, zone=zone, is_air=is_air, stage=_zone_stage(zone), ) if owner == "me": self._my_units.append(u) else: self._opp_units.append(u) self._log_event(f"{owner} played {card} at {zone} (cost {cdef.cost}).") def _apply_spell(self, *, owner: Literal["me", "opp"], card: str, zone: Zone, dmg: int): lane = _zone_lane(zone) target_towers = self._opp_tower_hp if owner == "me" else self._my_tower_hp target_units = self._opp_units if owner == "me" else self._my_units cdef = self._cards.get(card) radius = float(getattr(cdef, "radius", 0.0) or 0.0) # --- AoE: hit up to N units in the lane (front-most first) --- if radius > 0: lane_units = [u for u in target_units if _zone_lane(u.zone) == lane] # Larger radius -> more units affected, capped. max_hits = 3 if radius < 0.40 else 5 lane_units.sort(key=lambda u: (u.stage, -u.hp), reverse=True) hits = lane_units[:max_hits] if hits: for u in hits: u.hp -= float(dmg) self._log_event(f"{owner} cast {card} hitting {len(hits)} units in {lane} for {dmg}.") self._last_spell_hits = max(self._last_spell_hits, len(hits)) # --- Tower chip (spells can hit towers when targeted near them) --- # Bridge/mid zones chip lane tower lightly; back zones are assumed defensive. tower_mult = 1.0 if zone.startswith("bridge_") else (0.6 if zone.startswith("mid_") else 0.0) tower_dmg = int(dmg * tower_mult) if tower_dmg > 0: if target_towers[lane] > 0: target_towers[lane] = max(0, target_towers[lane] - tower_dmg) self._log_event(f"{owner} cast {card} chipping {lane} tower for {tower_dmg}.") else: target_towers["king"] = max(0, target_towers["king"] - int(tower_dmg * 0.5)) self._log_event(f"{owner} cast {card} chipping king tower for {int(tower_dmg*0.5)}.") self._last_spell_tower_dmg = max(self._last_spell_tower_dmg, tower_dmg) def _tick(self): # one turn = 0.5 seconds self._turn += 1 self._time_remaining_s = max(0.0, self._time_remaining_s - 0.5) # Overtime: if regulation ends tied, give 60s overtime (double elixir). if self._time_remaining_s <= 0.0 and not getattr(self, "_in_overtime", False): if self._my_crowns == self._opp_crowns: self._in_overtime = True self._time_remaining_s = 60.0 self._log_event("Overtime!") double = (self._time_remaining_s <= 60.0) or bool(getattr(self, "_in_overtime", False)) regen = 0.35 if not double else 0.7 self._my_elixir = min(10.0, self._my_elixir + regen) self._opp_elixir = min(10.0, self._opp_elixir + regen) # combat: lane skirmishes (units fight units first, then towers) self._lane_skirmish(lane="left") self._lane_skirmish(lane="right") self._cleanup_dead() self._update_crowns_and_done() def _lane_skirmish(self, *, lane: Literal["left", "right"]): """ Very simplified Clash-like combat: - units have a lane and a stage (back/mid/bridge) - troops advance toward bridge unless blocked by enemies - each tick, each side deals damage to one enemy unit (if any), else to the lane tower (else king) """ my_units = [u for u in self._my_units if _zone_lane(u.zone) == lane] opp_units = [u for u in self._opp_units if _zone_lane(u.zone) == lane] # Advance troops if no enemies near bridge; buildings don't advance. if not opp_units: for u in my_units: if self._cards[u.card].kind == "troop": u.stage = min(2, u.stage + 1) if not my_units: for u in opp_units: if self._cards[u.card].kind == "troop": u.stage = min(2, u.stage + 1) # If both have units, they fight: front-most units exchange damage. if my_units and opp_units: my_front = max(my_units, key=lambda u: (u.stage, -u.hp)) opp_front = max(opp_units, key=lambda u: (u.stage, -u.hp)) # Targeting: some units prefer towers/buildings (giant) so they ignore troops if possible. # In this simplified sim, that means: if targets==towers, they do NOT damage enemy units. if my_front.targets != "towers": opp_front.hp -= max(0.0, my_front.dps) * 0.5 if opp_front.targets != "towers": my_front.hp -= max(0.0, opp_front.dps) * 0.5 return # Otherwise, any units at bridge pressure the tower. def tower_hit(units: list[Unit], target: TowerHP): # Only units that can hit towers contribute. (All troops can, but if a troop targets towers-only, # it still contributes here; if it doesn't target towers, it can still hit towers once no troops exist.) dps = sum(u.dps for u in units if u.stage >= 2) if dps <= 0: return if target[lane] > 0: target[lane] = max(0, target[lane] - int(dps * 0.45)) else: target["king"] = max(0, target["king"] - int(dps * 0.20)) if my_units: tower_hit(my_units, self._opp_tower_hp) if opp_units: tower_hit(opp_units, self._my_tower_hp) def _cleanup_dead(self): self._my_units = [u for u in self._my_units if u.hp > 0] self._opp_units = [u for u in self._opp_units if u.hp > 0] def _update_crowns_and_done(self): self._my_crowns = int(self._opp_tower_hp["left"] == 0) + int(self._opp_tower_hp["right"] == 0) + int(self._opp_tower_hp["king"] == 0) self._opp_crowns = int(self._my_tower_hp["left"] == 0) + int(self._my_tower_hp["right"] == 0) + int(self._my_tower_hp["king"] == 0) if self._my_crowns >= 3 or self._opp_crowns >= 3 or self._time_remaining_s <= 0.0: self._done = True def _scripted_opponent_action(self): # Tilt makes opponent overspend / pick worse card; we implement that as randomness in choice + zone. playable = [c for c in self._opp_hand if self._cards[c].cost <= self._opp_elixir] if not playable: self._apply_player_action(owner="opp", kind="wait", card=None, zone=None) return noise = self._opp_card_noise() if self._rng.random() < noise: # "tilt": pick a random playable (might be expensive) card = self._rng.choice(playable) else: # baseline: pick cheapest playable card = sorted(playable, key=lambda c: self._cards[c].cost)[0] # also sometimes choose a poor zone when tilted zone = self._rng.choice(PLACEMENT_ZONES) self._apply_player_action(owner="opp", kind="play", card=card, zone=zone) def _update_tilt(self, *, emote: str | None): # Decay self._opp_tilt = max(0.0, min(1.0, self._opp_tilt - 0.03)) if emote is None: return # Detect "BM moment" proxies from last tick state deltas isn't available here; so use heuristics: # - if opponent is behind in total tower HP, emotes have bigger impact my_adv = self._tower_hp_advantage() timing_mult = 1.0 + min(1.0, max(0.0, my_adv / 1500.0)) # up to 2x base = 0.02 if my_adv > 400: base = 0.08 # "dominance" if emote == "laugh" and my_adv > 800: base = 0.12 self._opp_tilt = max(0.0, min(1.0, self._opp_tilt + base * timing_mult)) def _opp_card_noise(self) -> float: if self._opp_tilt < 0.3: return 0.05 if self._opp_tilt < 0.6: return 0.20 if self._opp_tilt < 0.9: return 0.40 return 0.60 def _tower_hp_advantage(self) -> float: my = self._my_tower_hp["left"] + self._my_tower_hp["right"] + self._my_tower_hp["king"] opp = self._opp_tower_hp["left"] + self._opp_tower_hp["right"] + self._opp_tower_hp["king"] return float(my - opp) * -1.0 # positive when opp has less HP (i'm ahead) def _snapshot_score(self) -> dict: return { "my_total_tower_hp": self._my_tower_hp["left"] + self._my_tower_hp["right"] + self._my_tower_hp["king"], "opp_total_tower_hp": self._opp_tower_hp["left"] + self._opp_tower_hp["right"] + self._opp_tower_hp["king"], "my_crowns": self._my_crowns, "opp_crowns": self._opp_crowns, "opp_tilt": self._opp_tilt, "my_elixir": self._my_elixir, "invalid_last": self._invalid_last, "my_spent_last": float(getattr(self, "_my_spent_last", 0.0)), "opp_spent_last": float(getattr(self, "_opp_spent_last", 0.0)), "my_wait_last": bool(getattr(self, "_my_wait_last", False)), "spell_hits": int(getattr(self, "_last_spell_hits", 0)), "spell_tower_dmg": int(getattr(self, "_last_spell_tower_dmg", 0)), "punish_window": int(getattr(self, "_punish_window", 0)), } def _compute_rewards(self, before: dict, after: dict, *, emote: str | None, invalid_action: bool) -> dict: weights, params = load_reward_config() # 1) Crown differential (Jaso-style log scaling, normalized) crowns_won = after["my_crowns"] crowns_lost = after["opp_crowns"] r_crowns = (4.9 * math.log(4.8 * crowns_won + 0.75) + 1.4) - (4.9 * math.log(4.8 * crowns_lost + 0.75) + 1.4) r_crowns_norm = float(max(-15.0, min(15.0, r_crowns)) / 15.0) # 2) Tower damage (dense) dmg_dealt = before["opp_total_tower_hp"] - after["opp_total_tower_hp"] dmg_taken = before["my_total_tower_hp"] - after["my_total_tower_hp"] r_tower = (dmg_dealt - 0.8 * dmg_taken) / 1200.0 # scale to ~[-1,1] typical # 3) Elixir discipline (penalize floating at full elixir) r_elixir = 0.0 if before["my_elixir"] >= params.full_elixir_threshold: r_elixir = params.full_elixir_penalty # 4) Emotes / tilt are NOT part of the reward. # They exist only as a novelty mechanic affecting opponent behavior. r_tilt = 0.0 # 5) Anti-stall reward (replaces constant alive reward). # Penalize repeated waiting, especially when floating high elixir. r_stall = 0.0 if bool(before.get("my_wait_last")): if float(before.get("my_elixir", 0.0)) >= params.tempo_elixir_threshold: r_stall = params.stall_wait_penalty_high_elixir else: r_stall = params.stall_wait_penalty_low_elixir # 6) Invalid action penalty (anti-hacking / stabilizes early RL) r_invalid = params.invalid_penalty if invalid_action else 0.0 # 7) Tempo: discourage waiting when you have plenty elixir. r_tempo = 0.0 if bool(before.get("my_wait_last")) and float(before.get("my_elixir", 0.0)) >= params.tempo_elixir_threshold: r_tempo = params.tempo_wait_penalty # 8) Spell value: reward multi-hit spells (a proxy for good "spell value"). r_spell = 0.0 hits = int(before.get("spell_hits", 0)) tower_chip = int(before.get("spell_tower_dmg", 0)) if hits >= 2: r_spell += 0.02 * min(5, hits) if tower_chip > 0: r_spell += min(0.05, tower_chip / 6000.0) # 9) Elixir efficiency: reward dealing damage with low spend (positive-trade proxy). spent = float(before.get("my_spent_last", 0.0)) if spent > 0: r_eff = max(0.0, (dmg_dealt - 0.5 * dmg_taken) / (800.0 * spent)) else: r_eff = 0.0 # 10) Punish window: if opponent overcommitted recently, reward spending to pressure quickly. r_punish = 0.0 if int(before.get("punish_window", 0)) > 0 and spent > 0.0: r_punish = params.punish_spend_reward # 11) Overcommit penalty: discourage going to (near) zero elixir unless it immediately creates advantage. r_overcommit = 0.0 if spent > 0.0 and float(before.get("my_elixir", 0.0)) <= params.overcommit_elixir_threshold and dmg_dealt <= 0: r_overcommit = params.overcommit_penalty # 12) Invalid rate penalty (shapes away from repeated illegal actions). # Uses episode-to-date invalid count from the state (tracked elsewhere). inv_count = int(before.get("invalid_action_count", 0)) steps = max(1, int(before.get("turn", 1))) inv_rate = inv_count / steps r_invalid_rate = params.invalid_rate_penalty * inv_rate # 13) Win bonus at terminal (verifiable). r_win = 0.0 if bool(after.get("done")): my_c = int(after.get("my_crowns", 0)) opp_c = int(after.get("opp_crowns", 0)) if my_c > opp_c: r_win = 1.0 elif my_c < opp_c: r_win = -1.0 else: r_win = 0.0 breakdown = { "crown_differential": r_crowns_norm, "tower_damage": float(r_tower), "elixir_discipline": float(r_elixir), "tilt_efficiency": float(r_tilt), "stall": float(r_stall), "invalid_action": float(r_invalid), "invalid_rate": float(r_invalid_rate), "tempo": float(r_tempo), "spell_value": float(r_spell), "elixir_efficiency": float(r_eff), "punish_window": float(r_punish), "overcommit": float(r_overcommit), "win_bonus": float(r_win), } total = ( weights.crown_differential * breakdown["crown_differential"] + weights.tower_damage * breakdown["tower_damage"] + weights.elixir_discipline * breakdown["elixir_discipline"] + weights.invalid_action * breakdown["invalid_action"] + weights.invalid_rate * breakdown["invalid_rate"] + weights.tempo * breakdown["tempo"] + weights.spell_value * breakdown["spell_value"] + weights.elixir_efficiency * breakdown["elixir_efficiency"] + weights.punish_window * breakdown["punish_window"] + weights.overcommit * breakdown["overcommit"] + weights.stall * breakdown["stall"] + weights.win_bonus * breakdown["win_bonus"] ) return {"reward_total": float(total), "reward_breakdown": breakdown} def _log_event(self, text: str): self._last_events.append(f"Turn {self._turn}: {text}")