""" Tokenizer for ByteFight game states. Converts raw game log data into token sequences for a value network. Sequence layout (971 tokens): [CLS] [my_stam] [my_max_stam] [opp_stam] [opp_max_stam] [my_row] [my_col] [opp_row] [opp_col] [turn] [cell_0_0] [cell_0_1] ... [cell_30_30] All token IDs live in a single unified vocabulary: 0: CLS 1-381: stamina (0-380) 382-412: position (0-30) 413-2413: turn (0-2000) 2414-2458: cell states (45 tokens) Total vocab size: 2459 """ import json import numpy as np from pathlib import Path from dataclasses import dataclass MAX_BOARD_SIZE = 31 MAX_STAMINA = 380 MAX_TURN = 2000 GRID_CELLS = MAX_BOARD_SIZE * MAX_BOARD_SIZE # 961 SEQ_LEN = 1 + 9 + GRID_CELLS # 971 # Global token offsets CLS_TOKEN = 0 STAMINA_OFFSET = 1 # 1-381 POSITION_OFFSET = STAMINA_OFFSET + MAX_STAMINA + 1 # 382-412 TURN_OFFSET = POSITION_OFFSET + MAX_BOARD_SIZE # 413-2413 CELL_OFFSET = TURN_OFFSET + MAX_TURN + 1 # 2414-2458 # Cell state tokens (local, before adding CELL_OFFSET) CELL_WALL = 0 CELL_EMPTY = 1 CELL_P1_PAINT_1 = 2 CELL_P1_PAINT_4 = 5 CELL_P2_PAINT_1 = 6 CELL_P2_PAINT_4 = 9 CELL_P1_BEACON = 10 CELL_P2_BEACON = 11 NUM_BASE_CELL_STATES = 12 # hill/powerup offsets (local) HILL_OFFSET_LOCAL = NUM_BASE_CELL_STATES # +12 POWERUP_OFFSET_LOCAL = 2 * NUM_BASE_CELL_STATES # +24 HILL_POWERUP_OFFSET_LOCAL = 3 * NUM_BASE_CELL_STATES # +36 # Max local cell token: CELL_P2_BEACON (11) + HILL_POWERUP_OFFSET_LOCAL (36) = 47 NUM_CELL_TOKENS = CELL_P2_BEACON + HILL_POWERUP_OFFSET_LOCAL + 1 # 48 VOCAB_SIZE = CELL_OFFSET + NUM_CELL_TOKENS # 2462 def _encode_cell(paint_value: int, beacon_parity: int, is_wall: bool, hill: bool, powerup: bool) -> int: """Encode a single cell's state into a global token ID.""" if is_wall: return CELL_OFFSET + CELL_WALL if beacon_parity == 1: base = CELL_P1_BEACON elif beacon_parity == -1: base = CELL_P2_BEACON elif paint_value > 0: base = CELL_P1_PAINT_1 + (paint_value - 1) elif paint_value < 0: base = CELL_P2_PAINT_1 + (-paint_value - 1) else: base = CELL_EMPTY if hill and powerup: local = base + HILL_POWERUP_OFFSET_LOCAL elif hill: local = base + HILL_OFFSET_LOCAL elif powerup: local = base + POWERUP_OFFSET_LOCAL else: local = base return CELL_OFFSET + local @dataclass class TokenizedState: """A single tokenized game state with its label.""" tokens: np.ndarray # (971,) int32 label: float # 1.0 = p1 wins, 0.0 = p2 wins def _parse_map(map_string: str): """Parse static map info (size, walls, hills) from a map string.""" parts = map_string.split("#") size_r, size_c = int(parts[0].split(",")[0]), int(parts[0].split(",")[1]) walls = set() for i, ch in enumerate(parts[3]): if ch == "1": walls.add((i // size_c, i % size_c)) hill_cells = set() hill_ids_str = parts[4] hill_sets_str = parts[5] if hill_ids_str.strip(): hill_id_list = [x for x in hill_ids_str.split(",") if x.strip()] hill_set_list = hill_sets_str.split("_") for i, _ in enumerate(hill_id_list): if i < len(hill_set_list) and hill_set_list[i]: coords = hill_set_list[i].split(",") for j in range(len(coords) // 2): r, c = int(coords[2 * j]), int(coords[2 * j + 1]) hill_cells.add((r, c)) return size_r, size_c, walls, hill_cells def _replay_deltas(gl: dict, size_r: int, size_c: int, up_to: int): """Replay paint/beacon/powerup deltas from turn 0 up to (inclusive).""" paint = np.zeros((size_r, size_c), dtype=np.int8) beacon = np.zeros((size_r, size_c), dtype=np.int8) powerup = np.zeros((size_r, size_c), dtype=np.bool_) for t in range(up_to + 1): for cell_key, value in gl["paint_updates"][t].items(): idx = int(cell_key) paint[idx // size_c, idx % size_c] = value for cell_key, value in gl["beacon_updates"][t].items(): idx = int(cell_key) beacon[idx // size_c, idx % size_c] = value for cell_key, value in gl["powerup_updates"][t].items(): idx = int(cell_key) powerup[idx // size_c, idx % size_c] = value return paint, beacon, powerup def _build_tokens(gl: dict, turn_idx: int, size_r: int, size_c: int, walls: set, hill_cells: set, paint: np.ndarray, beacon: np.ndarray, powerup: np.ndarray) -> np.ndarray: """Build the 971-token sequence for a single turn.""" tokens = np.zeros(SEQ_LEN, dtype=np.int32) tokens[0] = CLS_TOKEN tokens[1] = STAMINA_OFFSET + min(max(gl["p1_stamina"][turn_idx], 0), MAX_STAMINA) tokens[2] = STAMINA_OFFSET + min(max(gl["p1_max_stamina"][turn_idx], 0), MAX_STAMINA) tokens[3] = STAMINA_OFFSET + min(max(gl["p2_stamina"][turn_idx], 0), MAX_STAMINA) tokens[4] = STAMINA_OFFSET + min(max(gl["p2_max_stamina"][turn_idx], 0), MAX_STAMINA) tokens[5] = POSITION_OFFSET + gl["p1_loc"][turn_idx][0] tokens[6] = POSITION_OFFSET + gl["p1_loc"][turn_idx][1] tokens[7] = POSITION_OFFSET + gl["p2_loc"][turn_idx][0] tokens[8] = POSITION_OFFSET + gl["p2_loc"][turn_idx][1] tokens[9] = TURN_OFFSET + min(turn_idx, MAX_TURN) for r in range(MAX_BOARD_SIZE): for c in range(MAX_BOARD_SIZE): grid_idx = 10 + r * MAX_BOARD_SIZE + c if r >= size_r or c >= size_c: tokens[grid_idx] = CELL_OFFSET + CELL_WALL else: tokens[grid_idx] = _encode_cell( paint_value=int(paint[r, c]), beacon_parity=int(beacon[r, c]), is_wall=(r, c) in walls, hill=(r, c) in hill_cells, powerup=bool(powerup[r, c]), ) return tokens def _parse_label(result: str) -> float: if result == "PLAYER_1": return 1.0 elif result == "PLAYER_2": return 0.0 return 0.5 def tokenize_turn(gl: dict, map_string: str, turn_idx: int) -> np.ndarray: """Tokenize a single turn from a game log. Returns (971,) int32 array.""" size_r, size_c, walls, hill_cells = _parse_map(map_string) paint, beacon, powerup = _replay_deltas(gl, size_r, size_c, turn_idx) return _build_tokens(gl, turn_idx, size_r, size_c, walls, hill_cells, paint, beacon, powerup) def tokenize_match(match_path: str | Path) -> list[TokenizedState]: """ Tokenize all turns of a match into training examples. Returns examples from P1's perspective. To get P2's perspective, the caller can use flip_perspective(). """ with open(match_path) as f: data = json.load(f) gl = data["game_log"] size_r, size_c, walls, hill_cells = _parse_map(gl["map_string"]) label = _parse_label(gl["result"]) num_turns = len(gl["p1_stamina"]) # Incrementally replay deltas (more efficient than replaying from 0 each time) paint = np.zeros((size_r, size_c), dtype=np.int8) beacon = np.zeros((size_r, size_c), dtype=np.int8) powerup = np.zeros((size_r, size_c), dtype=np.bool_) examples = [] for t in range(num_turns): for cell_key, value in gl["paint_updates"][t].items(): idx = int(cell_key) paint[idx // size_c, idx % size_c] = value for cell_key, value in gl["beacon_updates"][t].items(): idx = int(cell_key) beacon[idx // size_c, idx % size_c] = value for cell_key, value in gl["powerup_updates"][t].items(): idx = int(cell_key) powerup[idx // size_c, idx % size_c] = value tokens = _build_tokens(gl, t, size_r, size_c, walls, hill_cells, paint, beacon, powerup) examples.append(TokenizedState(tokens=tokens, label=label)) return examples def flip_perspective(state: TokenizedState) -> TokenizedState: """ Flip a tokenized state from P1's perspective to P2's perspective. Swaps player stamina/position scalars and flips cell ownership (P1 paint <-> P2 paint, P1 beacon <-> P2 beacon). """ tokens = state.tokens.copy() # Swap stamina: (1,2) <-> (3,4) tokens[1], tokens[3] = tokens[3], tokens[1] tokens[2], tokens[4] = tokens[4], tokens[2] # Swap positions: (5,6) <-> (7,8) tokens[5], tokens[7] = tokens[7], tokens[5] tokens[6], tokens[8] = tokens[8], tokens[6] # Flip cell ownership in grid for i in range(10, SEQ_LEN): cell = tokens[i] - CELL_OFFSET if cell == CELL_WALL: continue if cell >= HILL_POWERUP_OFFSET_LOCAL: offset = HILL_POWERUP_OFFSET_LOCAL base = cell - HILL_POWERUP_OFFSET_LOCAL elif cell >= POWERUP_OFFSET_LOCAL: offset = POWERUP_OFFSET_LOCAL base = cell - POWERUP_OFFSET_LOCAL elif cell >= HILL_OFFSET_LOCAL: offset = HILL_OFFSET_LOCAL base = cell - HILL_OFFSET_LOCAL else: offset = 0 base = cell if CELL_P1_PAINT_1 <= base <= CELL_P1_PAINT_4: base = CELL_P2_PAINT_1 + (base - CELL_P1_PAINT_1) elif CELL_P2_PAINT_1 <= base <= CELL_P2_PAINT_4: base = CELL_P1_PAINT_1 + (base - CELL_P2_PAINT_1) elif base == CELL_P1_BEACON: base = CELL_P2_BEACON elif base == CELL_P2_BEACON: base = CELL_P1_BEACON tokens[i] = CELL_OFFSET + base + offset label = 1.0 - state.label return TokenizedState(tokens=tokens, label=label)