| """ |
| Tokenizer for ByteFight game states. |
| |
| Converts raw game log data into token sequences for a value network. |
| |
| Sequence layout (971 tokens): |
| [CLS] [my_stam] [my_max_stam] [opp_stam] [opp_max_stam] |
| [my_row] [my_col] [opp_row] [opp_col] [turn] |
| [cell_0_0] [cell_0_1] ... [cell_30_30] |
| |
| All token IDs live in a single unified vocabulary: |
| 0: CLS |
| 1-381: stamina (0-380) |
| 382-412: position (0-30) |
| 413-2413: turn (0-2000) |
| 2414-2458: cell states (45 tokens) |
| |
| Total vocab size: 2459 |
| """ |
|
|
| import json |
| import numpy as np |
| from pathlib import Path |
| from dataclasses import dataclass |
|
|
| MAX_BOARD_SIZE = 31 |
| MAX_STAMINA = 380 |
| MAX_TURN = 2000 |
| GRID_CELLS = MAX_BOARD_SIZE * MAX_BOARD_SIZE |
| SEQ_LEN = 1 + 9 + GRID_CELLS |
|
|
| |
| CLS_TOKEN = 0 |
| STAMINA_OFFSET = 1 |
| POSITION_OFFSET = STAMINA_OFFSET + MAX_STAMINA + 1 |
| TURN_OFFSET = POSITION_OFFSET + MAX_BOARD_SIZE |
| CELL_OFFSET = TURN_OFFSET + MAX_TURN + 1 |
|
|
| |
| CELL_WALL = 0 |
| CELL_EMPTY = 1 |
| CELL_P1_PAINT_1 = 2 |
| CELL_P1_PAINT_4 = 5 |
| CELL_P2_PAINT_1 = 6 |
| CELL_P2_PAINT_4 = 9 |
| CELL_P1_BEACON = 10 |
| CELL_P2_BEACON = 11 |
| NUM_BASE_CELL_STATES = 12 |
|
|
| |
| HILL_OFFSET_LOCAL = NUM_BASE_CELL_STATES |
| POWERUP_OFFSET_LOCAL = 2 * NUM_BASE_CELL_STATES |
| HILL_POWERUP_OFFSET_LOCAL = 3 * NUM_BASE_CELL_STATES |
|
|
| |
| NUM_CELL_TOKENS = CELL_P2_BEACON + HILL_POWERUP_OFFSET_LOCAL + 1 |
|
|
| VOCAB_SIZE = CELL_OFFSET + NUM_CELL_TOKENS |
|
|
|
|
| def _encode_cell(paint_value: int, beacon_parity: int, is_wall: bool, |
| hill: bool, powerup: bool) -> int: |
| """Encode a single cell's state into a global token ID.""" |
| if is_wall: |
| return CELL_OFFSET + CELL_WALL |
|
|
| if beacon_parity == 1: |
| base = CELL_P1_BEACON |
| elif beacon_parity == -1: |
| base = CELL_P2_BEACON |
| elif paint_value > 0: |
| base = CELL_P1_PAINT_1 + (paint_value - 1) |
| elif paint_value < 0: |
| base = CELL_P2_PAINT_1 + (-paint_value - 1) |
| else: |
| base = CELL_EMPTY |
|
|
| if hill and powerup: |
| local = base + HILL_POWERUP_OFFSET_LOCAL |
| elif hill: |
| local = base + HILL_OFFSET_LOCAL |
| elif powerup: |
| local = base + POWERUP_OFFSET_LOCAL |
| else: |
| local = base |
|
|
| return CELL_OFFSET + local |
|
|
|
|
| @dataclass |
| class TokenizedState: |
| """A single tokenized game state with its label.""" |
| tokens: np.ndarray |
| label: float |
|
|
|
|
| def _parse_map(map_string: str): |
| """Parse static map info (size, walls, hills) from a map string.""" |
| parts = map_string.split("#") |
| size_r, size_c = int(parts[0].split(",")[0]), int(parts[0].split(",")[1]) |
|
|
| walls = set() |
| for i, ch in enumerate(parts[3]): |
| if ch == "1": |
| walls.add((i // size_c, i % size_c)) |
|
|
| hill_cells = set() |
| hill_ids_str = parts[4] |
| hill_sets_str = parts[5] |
| if hill_ids_str.strip(): |
| hill_id_list = [x for x in hill_ids_str.split(",") if x.strip()] |
| hill_set_list = hill_sets_str.split("_") |
| for i, _ in enumerate(hill_id_list): |
| if i < len(hill_set_list) and hill_set_list[i]: |
| coords = hill_set_list[i].split(",") |
| for j in range(len(coords) // 2): |
| r, c = int(coords[2 * j]), int(coords[2 * j + 1]) |
| hill_cells.add((r, c)) |
|
|
| return size_r, size_c, walls, hill_cells |
|
|
|
|
| def _replay_deltas(gl: dict, size_r: int, size_c: int, up_to: int): |
| """Replay paint/beacon/powerup deltas from turn 0 up to (inclusive).""" |
| paint = np.zeros((size_r, size_c), dtype=np.int8) |
| beacon = np.zeros((size_r, size_c), dtype=np.int8) |
| powerup = np.zeros((size_r, size_c), dtype=np.bool_) |
|
|
| for t in range(up_to + 1): |
| for cell_key, value in gl["paint_updates"][t].items(): |
| idx = int(cell_key) |
| paint[idx // size_c, idx % size_c] = value |
| for cell_key, value in gl["beacon_updates"][t].items(): |
| idx = int(cell_key) |
| beacon[idx // size_c, idx % size_c] = value |
| for cell_key, value in gl["powerup_updates"][t].items(): |
| idx = int(cell_key) |
| powerup[idx // size_c, idx % size_c] = value |
|
|
| return paint, beacon, powerup |
|
|
|
|
| def _build_tokens(gl: dict, turn_idx: int, size_r: int, size_c: int, |
| walls: set, hill_cells: set, |
| paint: np.ndarray, beacon: np.ndarray, |
| powerup: np.ndarray) -> np.ndarray: |
| """Build the 971-token sequence for a single turn.""" |
| tokens = np.zeros(SEQ_LEN, dtype=np.int32) |
|
|
| tokens[0] = CLS_TOKEN |
| tokens[1] = STAMINA_OFFSET + min(max(gl["p1_stamina"][turn_idx], 0), MAX_STAMINA) |
| tokens[2] = STAMINA_OFFSET + min(max(gl["p1_max_stamina"][turn_idx], 0), MAX_STAMINA) |
| tokens[3] = STAMINA_OFFSET + min(max(gl["p2_stamina"][turn_idx], 0), MAX_STAMINA) |
| tokens[4] = STAMINA_OFFSET + min(max(gl["p2_max_stamina"][turn_idx], 0), MAX_STAMINA) |
| tokens[5] = POSITION_OFFSET + gl["p1_loc"][turn_idx][0] |
| tokens[6] = POSITION_OFFSET + gl["p1_loc"][turn_idx][1] |
| tokens[7] = POSITION_OFFSET + gl["p2_loc"][turn_idx][0] |
| tokens[8] = POSITION_OFFSET + gl["p2_loc"][turn_idx][1] |
| tokens[9] = TURN_OFFSET + min(turn_idx, MAX_TURN) |
|
|
| for r in range(MAX_BOARD_SIZE): |
| for c in range(MAX_BOARD_SIZE): |
| grid_idx = 10 + r * MAX_BOARD_SIZE + c |
| if r >= size_r or c >= size_c: |
| tokens[grid_idx] = CELL_OFFSET + CELL_WALL |
| else: |
| tokens[grid_idx] = _encode_cell( |
| paint_value=int(paint[r, c]), |
| beacon_parity=int(beacon[r, c]), |
| is_wall=(r, c) in walls, |
| hill=(r, c) in hill_cells, |
| powerup=bool(powerup[r, c]), |
| ) |
|
|
| return tokens |
|
|
|
|
| def _parse_label(result: str) -> float: |
| if result == "PLAYER_1": |
| return 1.0 |
| elif result == "PLAYER_2": |
| return 0.0 |
| return 0.5 |
|
|
|
|
| def tokenize_turn(gl: dict, map_string: str, turn_idx: int) -> np.ndarray: |
| """Tokenize a single turn from a game log. Returns (971,) int32 array.""" |
| size_r, size_c, walls, hill_cells = _parse_map(map_string) |
| paint, beacon, powerup = _replay_deltas(gl, size_r, size_c, turn_idx) |
| return _build_tokens(gl, turn_idx, size_r, size_c, walls, hill_cells, |
| paint, beacon, powerup) |
|
|
|
|
| def tokenize_match(match_path: str | Path) -> list[TokenizedState]: |
| """ |
| Tokenize all turns of a match into training examples. |
| |
| Returns examples from P1's perspective. To get P2's perspective, |
| the caller can use flip_perspective(). |
| """ |
| with open(match_path) as f: |
| data = json.load(f) |
|
|
| gl = data["game_log"] |
| size_r, size_c, walls, hill_cells = _parse_map(gl["map_string"]) |
| label = _parse_label(gl["result"]) |
| num_turns = len(gl["p1_stamina"]) |
|
|
| |
| paint = np.zeros((size_r, size_c), dtype=np.int8) |
| beacon = np.zeros((size_r, size_c), dtype=np.int8) |
| powerup = np.zeros((size_r, size_c), dtype=np.bool_) |
|
|
| examples = [] |
|
|
| for t in range(num_turns): |
| for cell_key, value in gl["paint_updates"][t].items(): |
| idx = int(cell_key) |
| paint[idx // size_c, idx % size_c] = value |
| for cell_key, value in gl["beacon_updates"][t].items(): |
| idx = int(cell_key) |
| beacon[idx // size_c, idx % size_c] = value |
| for cell_key, value in gl["powerup_updates"][t].items(): |
| idx = int(cell_key) |
| powerup[idx // size_c, idx % size_c] = value |
|
|
| tokens = _build_tokens(gl, t, size_r, size_c, walls, hill_cells, |
| paint, beacon, powerup) |
| examples.append(TokenizedState(tokens=tokens, label=label)) |
|
|
| return examples |
|
|
|
|
| def flip_perspective(state: TokenizedState) -> TokenizedState: |
| """ |
| Flip a tokenized state from P1's perspective to P2's perspective. |
| |
| Swaps player stamina/position scalars and flips cell ownership |
| (P1 paint <-> P2 paint, P1 beacon <-> P2 beacon). |
| """ |
| tokens = state.tokens.copy() |
|
|
| |
| tokens[1], tokens[3] = tokens[3], tokens[1] |
| tokens[2], tokens[4] = tokens[4], tokens[2] |
|
|
| |
| tokens[5], tokens[7] = tokens[7], tokens[5] |
| tokens[6], tokens[8] = tokens[8], tokens[6] |
|
|
| |
| for i in range(10, SEQ_LEN): |
| cell = tokens[i] - CELL_OFFSET |
| if cell == CELL_WALL: |
| continue |
|
|
| if cell >= HILL_POWERUP_OFFSET_LOCAL: |
| offset = HILL_POWERUP_OFFSET_LOCAL |
| base = cell - HILL_POWERUP_OFFSET_LOCAL |
| elif cell >= POWERUP_OFFSET_LOCAL: |
| offset = POWERUP_OFFSET_LOCAL |
| base = cell - POWERUP_OFFSET_LOCAL |
| elif cell >= HILL_OFFSET_LOCAL: |
| offset = HILL_OFFSET_LOCAL |
| base = cell - HILL_OFFSET_LOCAL |
| else: |
| offset = 0 |
| base = cell |
|
|
| if CELL_P1_PAINT_1 <= base <= CELL_P1_PAINT_4: |
| base = CELL_P2_PAINT_1 + (base - CELL_P1_PAINT_1) |
| elif CELL_P2_PAINT_1 <= base <= CELL_P2_PAINT_4: |
| base = CELL_P1_PAINT_1 + (base - CELL_P2_PAINT_1) |
| elif base == CELL_P1_BEACON: |
| base = CELL_P2_BEACON |
| elif base == CELL_P2_BEACON: |
| base = CELL_P1_BEACON |
|
|
| tokens[i] = CELL_OFFSET + base + offset |
|
|
| label = 1.0 - state.label |
|
|
| return TokenizedState(tokens=tokens, label=label) |
|
|