from typing import List import numpy as np from engine.game.ai_compat import njit from engine.game.fast_logic import batch_apply_action, resolve_bytecode @njit def step_vectorized( actions: np.ndarray, batch_stage: np.ndarray, batch_energy_vec: np.ndarray, batch_energy_count: np.ndarray, batch_continuous_vec: np.ndarray, batch_continuous_ptr: np.ndarray, batch_tapped: np.ndarray, batch_live: np.ndarray, batch_opp_tapped: np.ndarray, batch_scores: np.ndarray, batch_flat_ctx: np.ndarray, batch_global_ctx: np.ndarray, batch_hand: np.ndarray, batch_deck: np.ndarray, # New: Bytecode Maps bytecode_map: np.ndarray, # (GlobalOpMapSize, MaxBytecodeLen, 4) bytecode_index: np.ndarray, # (NumCards, NumAbilities) -> Index in map ): """ Step N game environments in parallel using JIT logic and Real Card Data. """ # Score sync now handled internally by batch_apply_action batch_apply_action( actions, 0, # player_id batch_stage, batch_energy_vec, batch_energy_count, batch_continuous_vec, batch_continuous_ptr, batch_tapped, batch_scores, batch_live, batch_opp_tapped, batch_flat_ctx, batch_global_ctx, batch_hand, batch_deck, bytecode_map, bytecode_index, ) class VectorGameState: """ Manages a batch of independent GameStates for high-throughput training. """ def __init__(self, num_envs: int): self.num_envs = num_envs self.turn = 1 # Batched state buffers - Player 0 (Agent) self.batch_stage = np.full((num_envs, 3), -1, dtype=np.int32) self.batch_energy_vec = np.zeros((num_envs, 3, 32), dtype=np.int32) self.batch_energy_count = np.zeros((num_envs, 3), dtype=np.int32) self.batch_continuous_vec = np.zeros((num_envs, 32, 10), dtype=np.int32) self.batch_continuous_ptr = np.zeros(num_envs, dtype=np.int32) self.batch_tapped = np.zeros((num_envs, 3), dtype=np.int32) self.batch_live = np.zeros((num_envs, 50), dtype=np.int32) self.batch_opp_tapped = np.zeros((num_envs, 3), dtype=np.int32) self.batch_scores = np.zeros(num_envs, dtype=np.int32) # Batched state buffers - Opponent State (Player 1) self.opp_stage = np.full((num_envs, 3), -1, dtype=np.int32) self.opp_energy_vec = np.zeros((num_envs, 3, 32), dtype=np.int32) # Match Agent Shape self.opp_energy_count = np.zeros((num_envs, 3), dtype=np.int32) self.opp_tapped = np.zeros((num_envs, 3), dtype=np.int8) self.opp_scores = np.zeros(num_envs, dtype=np.int32) # Opponent Finite Deck Buffers self.opp_hand = np.zeros((num_envs, 60), dtype=np.int32) self.opp_deck = np.zeros((num_envs, 60), dtype=np.int32) # Load Numba functions # Assuming load_compiler_data and load_card_stats are defined elsewhere or will be added. # The instruction provided an incomplete line for card_stats, so I'm keeping the original # card_stats initialization and loading logic to maintain syntactical correctness. # If load_compiler_data and load_card_stats are meant to replace the _load_bytecode logic, # that would require more context than provided in the diff. # New: Opponent History Buffer (Top 20 cards e.g.) self.batch_opp_history = np.zeros((num_envs, 50), dtype=np.int32) # Pre-allocated context buffers (Extreme speed optimization) self.batch_flat_ctx = np.zeros((num_envs, 64), dtype=np.int32) self.opp_flat_ctx = np.zeros((num_envs, 64), dtype=np.int32) self.batch_global_ctx = np.zeros((num_envs, 128), dtype=np.int32) self.opp_global_ctx = np.zeros((num_envs, 128), dtype=np.int32) # Persistent Opponent Context self.batch_hand = np.zeros((num_envs, 60), dtype=np.int32) # Hand 60 self.batch_deck = np.zeros((num_envs, 60), dtype=np.int32) # Deck 60 # Continuous Effects Buffers for Opponent self.opp_continuous_vec = np.zeros((num_envs, 32, 10), dtype=np.int32) self.opp_continuous_ptr = np.zeros(num_envs, dtype=np.int32) # Observation Buffers self.obs_dim = 8192 self.obs_buffer = np.zeros((self.num_envs, self.obs_dim), dtype=np.float32) self.obs_buffer_p1 = np.zeros((self.num_envs, self.obs_dim), dtype=np.float32) # History Buffers (Visibility) self.batch_agent_history = np.zeros((num_envs, 50), dtype=np.int32) self.batch_opp_history = np.zeros((num_envs, 50), dtype=np.int32) # Load Bytecode Map self._load_bytecode() self._load_verified_deck_pool() def _load_bytecode(self): import json try: with open("data/cards_numba.json", "r") as f: raw_map = json.load(f) # Convert to numpy array # Format: key "cardid_abidx" -> List[int] # storage: # 1. giant array of bytecodes (N, MaxLen, 4) # 2. lookup index (CardID, AbIdx) -> Index in giant array self.max_cards = 2000 self.max_abilities = 8 self.max_len = 128 # Max 128 instructions per ability for future expansion # Count unique compiled entries unique_entries = len(raw_map) # (Index 0 is empty/nop) self.bytecode_map = np.zeros((unique_entries + 1, self.max_len, 4), dtype=np.int32) self.bytecode_index = np.full((self.max_cards, self.max_abilities), 0, dtype=np.int32) idx_counter = 1 for key, bc_list in raw_map.items(): cid, aid = map(int, key.split("_")) if cid < self.max_cards and aid < self.max_abilities: # reshape list to (M, 4) bc_arr = np.array(bc_list, dtype=np.int32).reshape(-1, 4) length = min(bc_arr.shape[0], self.max_len) self.bytecode_map[idx_counter, :length] = bc_arr[:length] self.bytecode_index[cid, aid] = idx_counter idx_counter += 1 print(f" [VectorEnv] Loaded {unique_entries} compiled abilities.") # --- IMAX PRO VISION (Stride 80) --- # Fixed Geography: No maps, no shifting. Dedicated space per ability. # 0-19: Stats (Cost, Hearts, Traits, Live Reqs) # 20-35: Ability 1 (Trig, Cond, Opts, 3 Effs) # 36-47: Ability 2 (Trig, Cond, 3 Effs) # 48-59: Ability 3 (Trig, Cond, 3 Effs) # 60-71: Ability 4 (Trig, Cond, 3 Effs) # 79: Location Signal (Runtime Only) self.card_stats = np.zeros((self.max_cards, 80), dtype=np.int32) try: import json with open("data/cards_compiled.json", "r", encoding="utf-8") as f: db = json.load(f) # We need to map Card ID (int) -> Stats # cards_compiled.json is keyed by string integer "0", "1"... count = 0 # Load Members if "member_db" in db: for cid_str, card in db["member_db"].items(): cid = int(cid_str) if cid < self.max_cards: # 1. Cost self.card_stats[cid, 0] = card.get("cost", 0) # 2. Blades self.card_stats[cid, 1] = card.get("blades", 0) # 3. Hearts (Sum of array elements > 0?) # Actually just count non-zero hearts in array? Or sum of values? # Usually 'hearts' is [points, points...]. Let's sum points. h_arr = card.get("hearts", []) self.card_stats[cid, 2] = sum(h_arr) # 4. Color # We need to map string color? # Actually cards_compiled doesn't have "color" field directly on member obj? # Wait, looked at file view: "card_no": "LL-bp1...", "name"..., "cost", "hearts"... # Color is usually inferred from card_no or heart array non-zero index. # Let's skip color for now or infer from hearts array? # If hearts[0] > 0 -> Pink (0). col = 0 for cidx, val in enumerate(h_arr): if val > 0: col = cidx + 1 # 1-based color break self.card_stats[cid, 3] = col # 5. Volume/Draw Icons self.card_stats[cid, 4] = card.get("volume_icons", 0) self.card_stats[cid, 5] = card.get("draw_icons", 0) # Live Card Stats if "required_hearts" in card: # Pack Required Hearts into 12-18 (Pink..Purple, All) reqs = card.get("required_hearts", []) for r_idx in range(min(len(reqs), 7)): self.card_stats[cid, 12 + r_idx] = reqs[r_idx] # --- FIXED GEOGRAPHY ABILITY PACKING --- ab_list = card.get("abilities", []) # Helper to pack an ability into a fixed block def pack_ability_block(ab, base_idx, has_opts=False): if not ab: return # Trigger (Base + 0) self.card_stats[cid, base_idx] = ab.get("trigger", 0) # Condition (Base + 1, 2) conds = ab.get("conditions", []) if conds: self.card_stats[cid, base_idx + 1] = conds[0].get("type", 0) self.card_stats[cid, base_idx + 2] = conds[0].get("params", {}).get("value", 0) # Effects effs = ab.get("effects", []) eff_start = base_idx + 3 if has_opts: # Ability 1 has extra space for Options eff_start = base_idx + 9 # Skip 6 slots for options # Pack Options (from first effect) if effs: m_opts = effs[0].get("modal_options", []) if len(m_opts) > 0 and len(m_opts[0]) > 0: o = m_opts[0][0] # Opt 1 self.card_stats[cid, base_idx + 3] = o.get("effect_type", 0) self.card_stats[cid, base_idx + 4] = o.get("value", 0) self.card_stats[cid, base_idx + 5] = o.get("target", 0) if len(m_opts) > 1 and len(m_opts[1]) > 0: o = m_opts[1][0] # Opt 2 self.card_stats[cid, base_idx + 6] = o.get("effect_type", 0) self.card_stats[cid, base_idx + 7] = o.get("value", 0) self.card_stats[cid, base_idx + 8] = o.get("target", 0) # Pack up to 3 Effects for e_i in range(min(len(effs), 3)): e = effs[e_i] off = eff_start + (e_i * 3) self.card_stats[cid, off] = e.get("effect_type", 0) self.card_stats[cid, off + 1] = e.get("value", 0) self.card_stats[cid, off + 2] = e.get("target", 0) # Block 1: Ability 1 (Indices 20-35) [Has Options] if len(ab_list) > 0: pack_ability_block(ab_list[0], 20, has_opts=True) # Block 2: Ability 2 (Indices 36-47) if len(ab_list) > 1: pack_ability_block(ab_list[1], 36) # Block 3: Ability 3 (Indices 48-59) if len(ab_list) > 2: pack_ability_block(ab_list[2], 48) # Block 4: Ability 4 (Indices 60-71) if len(ab_list) > 3: pack_ability_block(ab_list[3], 60) # 7. Type self.card_stats[cid, 10] = 1 # 8. Traits Bitmask (Groups & Units) -> Stores in Index 11 # Bits 0-4: Groups (Max 5) # Bits 5-20: Units (Max 16) mask = 0 groups = card.get("groups", []) for g in groups: try: mask |= 1 << (int(g) % 20) except: pass units = card.get("units", []) for u in units: try: mask |= 1 << ((int(u) % 20) + 5) except: pass self.card_stats[cid, 11] = mask count += 1 print(f" [VectorEnv] Loaded detailed stats/abilities for {count} cards.") except Exception as e: print(f" [VectorEnv] Warning: Failed to load compiled stats: {e}") except FileNotFoundError: print(" [VectorEnv] Warning: data/cards_numba.json not found. Using empty map.") self.bytecode_map = np.zeros((1, 64, 4), dtype=np.int32) self.bytecode_index = np.zeros((1, 1), dtype=np.int32) def _load_verified_deck_pool(self): import json try: # Load Verified List with open("data/verified_card_pool.json", "r", encoding="utf-8") as f: verified_data = json.load(f) # Load DB to map CardNo -> CardID with open("data/cards_compiled.json", "r", encoding="utf-8") as f: db_data = json.load(f) self.ability_member_ids = [] self.ability_live_ids = [] self.vanilla_member_ids = [] self.vanilla_live_ids = [] # Map numbers to IDs and types member_no_map = {} live_no_map = {} for cid, cdata in db_data.get("member_db", {}).items(): member_no_map[cdata["card_no"]] = int(cid) for cid, cdata in db_data.get("live_db", {}).items(): live_no_map[cdata["card_no"]] = int(cid) # Normalize to dict format if isinstance(verified_data, list): verified_data = {"verified_abilities": verified_data, "vanilla_members": [], "vanilla_lives": []} # 1. Primary Pool: Abilities (Categorized) for v_no in verified_data.get("verified_abilities", []): if v_no in member_no_map: self.ability_member_ids.append(member_no_map[v_no]) elif v_no in live_no_map: self.ability_live_ids.append(live_no_map[v_no]) # 2. Secondary Pool: Vanilla for v_no in verified_data.get("vanilla_members", []): if v_no in member_no_map: self.vanilla_member_ids.append(member_no_map[v_no]) for v_no in verified_data.get("vanilla_lives", []): if v_no in live_no_map: self.vanilla_live_ids.append(live_no_map[v_no]) # Fallback/Warnings if not self.ability_member_ids and not self.vanilla_member_ids: print(" [VectorEnv] Warning: No members found. Using ID 1.") self.ability_member_ids = [1] if not self.ability_live_ids and not self.vanilla_live_ids: print(" [VectorEnv] Warning: No lives found. Using ID 999 (Dummy).") self.vanilla_live_ids = [999] print( f" [VectorEnv] Pools: {len(self.ability_member_ids)} Ability Members, {len(self.ability_live_ids)} Ability Lives." ) print( f" [VectorEnv] Fallbacks: {len(self.vanilla_member_ids)} Vanilla Members, {len(self.vanilla_live_ids)} Vanilla Lives." ) self.ability_member_ids = np.array(self.ability_member_ids, dtype=np.int32) self.ability_live_ids = np.array(self.ability_live_ids, dtype=np.int32) self.vanilla_member_ids = np.array(self.vanilla_member_ids, dtype=np.int32) self.vanilla_live_ids = np.array(self.vanilla_live_ids, dtype=np.int32) except Exception as e: print(f" [VectorEnv] Deck Load Error: {e}") self.ability_member_ids = np.array([1], dtype=np.int32) self.ability_live_ids = np.array([999], dtype=np.int32) self.vanilla_member_ids = np.array([], dtype=np.int32) self.vanilla_live_ids = np.array([], dtype=np.int32) def reset(self, indices: List[int] = None): """Reset specified environments (or all if indices is None).""" if indices is None: indices = list(range(self.num_envs)) # Optimization: Bulk operations for indices if supported, # but for now loop is fine (reset is rare compared to step) # Prepare a random deck selection to broadcast? # Actually random.choice is fast. for i in indices: self.batch_stage[i].fill(-1) self.batch_energy_vec[i].fill(0) self.batch_energy_count[i].fill(0) self.batch_continuous_vec[i].fill(0) self.batch_continuous_ptr[i] = 0 self.batch_tapped[i].fill(0) self.batch_live[i].fill(0) self.batch_opp_tapped[i].fill(0) self.batch_scores[i] = 0 # Reset contexts self.batch_flat_ctx[i].fill(0) self.opp_flat_ctx[i].fill(0) self.batch_global_ctx[i].fill(0) self.opp_global_ctx[i].fill(0) self.opp_scores[i] = 0 # Reset Opponent Score self.opp_stage[i].fill(-1) # Reset Opponent Stage self.opp_continuous_vec[i].fill(0) self.opp_continuous_ptr[i] = 0 self.batch_agent_history[i].fill(0) self.batch_opp_history[i].fill(0) # Match Protocol: 48 Members (Ability) + 12 Lives (Mixed) # Create a deck for Agent deck_agent = self._generate_proto_deck() self.batch_deck[i] = deck_agent # Initialize Agent Hand (Draw 5) self.batch_hand[i, :60].fill(0) # Clear whole hand self.batch_hand[i, :5] = self.batch_deck[i, :5] # Initialize Agent Global Ctx self.batch_global_ctx[i, 3] = 5 # HD (Hand Count) self.batch_global_ctx[i, 6] = 55 # DK (Deck Count) # Create a deck for Opponent deck_opp = self._generate_proto_deck() self.opp_deck[i] = deck_opp # Initialize Opponent Hand (Draw 5) self.opp_hand[i, :60].fill(0) self.opp_hand[i, :5] = self.opp_deck[i, :5] # Initialize Opponent Global Ctx self.opp_global_ctx[i, 3] = 5 # HD self.opp_global_ctx[i, 6] = 55 # DK self.turn = 1 def _generate_proto_deck(self) -> np.ndarray: """Generates a 60-card deck (48 Members, 12 Lives) with Priority: Ability > Vanilla.""" deck = np.zeros(60, dtype=np.int32) # 1. Build Members (48) # We need 48. Prefer abilities. m_pool = self.ability_member_ids if len(m_pool) >= 48: # Plenty of abilities members = np.random.choice(m_pool, 48, replace=True) # Usually replace=True for training variety? else: # Not enough abilities (or exactly not enough), fill with vanilla # Combine pools m_combined = np.concatenate((m_pool, self.vanilla_member_ids)) if len(m_combined) == 0: m_combined = np.array([1], dtype=np.int32) members = np.random.choice(m_combined, 48, replace=True) deck[:48] = members # 2. Build Lives (12) # We need 12. Prefer ability lives. l_pool = self.ability_live_ids if len(l_pool) >= 12: lives = np.random.choice(l_pool, 12, replace=True) else: # Fill with vanilla lives l_combined = np.concatenate((l_pool, self.vanilla_live_ids)) if len(l_combined) == 0: l_combined = np.array([999], dtype=np.int32) lives = np.random.choice(l_combined, 12, replace=True) deck[48:] = lives # Optional: Shuffle main deck portion? # Usually internal logic expects shuffled? # We shuffle the WHOLE deck (including lives) but lives usually go to a special zone. # For simplicity, we shuffle. np.random.shuffle(deck) return deck def step(self, actions: np.ndarray, opp_actions: np.ndarray = None): """Apply a batch of actions for both players. If opp_actions is None, Player 1 is random.""" # 1. Apply Player 0 (Agent) Actions step_vectorized( actions, self.batch_stage, self.batch_energy_vec, self.batch_energy_count, self.batch_continuous_vec, self.batch_continuous_ptr, self.batch_tapped, self.batch_live, self.batch_opp_tapped, self.batch_scores, self.batch_flat_ctx, self.batch_global_ctx, self.batch_hand, self.batch_deck, self.bytecode_map, self.bytecode_index, ) # 2. Simulate Opponent (Player 1) if opp_actions is None: # Random Opponent step_opponent_vectorized( self.opp_hand, self.opp_deck, self.opp_stage, self.opp_energy_vec, self.opp_energy_count, self.opp_tapped, self.opp_scores, self.batch_tapped, self.opp_global_ctx, self.bytecode_map, self.bytecode_index, ) else: # Controlled Opponent (e.g. for Self-Play) # We use the SAME step_vectorized but with swapped buffers! # Note: We need a 'step_vectorized' that targets the 'opp' side. # I'll use a wrapper or just direct call with swapped args. step_vectorized( opp_actions, self.opp_stage, self.opp_energy_vec, self.opp_energy_count, self.opp_continuous_vec, # Need these buffers for Opp self.opp_continuous_ptr, self.opp_tapped, self.batch_live, # Shared Live zone? (Actually each player has their own view/zone usually?) # Wait, GameState shared Live Zone. self.batch_tapped, # Agent tapped for Opp self.opp_scores, self.opp_flat_ctx, self.opp_global_ctx, self.opp_hand, self.opp_deck, self.bytecode_map, self.bytecode_index, ) # 2b. Performance Phase - Resolve Played Live Cards # (This should technically happen for both if they both play lives?) # For now, we only resolve the "Active Player" (Agent in training). # In a real game, each player has their own Performance phase. # VectorEnv simplifies this. resolve_live_performance( self.num_envs, actions, self.batch_stage, self.batch_live, self.batch_scores, self.batch_global_ctx, self.card_stats, ) if opp_actions is not None: resolve_live_performance( self.num_envs, opp_actions, self.opp_stage, self.batch_live, self.opp_scores, self.opp_global_ctx, self.card_stats, ) # 3. Handle Turn Progression (only on phase wrap) current_phases = self.batch_global_ctx[:, 8] if current_phases[0] == 0 and self.turn > 0: self.turn += 1 def get_observations(self, player_id=0): """Return a batched observation. If player_id=1, returned from Opponent's perspective.""" if player_id == 0: return encode_observations_vectorized( self.num_envs, self.batch_hand, self.batch_stage, self.batch_energy_count, self.batch_tapped, self.batch_scores, self.opp_scores, self.opp_stage, self.opp_tapped, self.card_stats, self.batch_global_ctx, self.batch_live, self.batch_opp_history, self.turn, self.obs_buffer, ) else: # SWAP BUFFERS for Opponent Perspective # Note: We need a SECOND buffer for P1 obs if we want to get both in one step? # Or just overwrite. return encode_observations_vectorized( self.num_envs, self.opp_hand, self.opp_stage, self.opp_energy_count, self.opp_tapped, self.opp_scores, self.batch_scores, self.batch_stage, self.batch_tapped, self.card_stats, self.opp_global_ctx, self.batch_live, self.batch_agent_history, self.turn, self.obs_buffer_p1, # Need P1 buffer! ) def get_action_masks(self, player_id=0): if player_id == 0: return compute_action_masks( self.num_envs, self.batch_hand, self.batch_stage, self.batch_tapped, self.batch_energy_count ) else: return compute_action_masks( self.num_envs, self.opp_hand, self.opp_stage, self.opp_tapped, self.opp_energy_count ) @njit def step_opponent_vectorized( opp_hand: np.ndarray, # (N, 60) opp_deck: np.ndarray, # (N, 60) opp_stage: np.ndarray, opp_energy_vec: np.ndarray, opp_energy_count: np.ndarray, opp_tapped: np.ndarray, opp_scores: np.ndarray, agent_tapped: np.ndarray, opp_global_ctx: np.ndarray, # (N, 128) bytecode_map: np.ndarray, bytecode_index: np.ndarray, ): """ Very simplified opponent step. Reuses agent bytecode but targets opponent buffers. """ num_envs = len(opp_hand) # Dummy buffers for context (reused per env) f_ctx = np.zeros(64, dtype=np.int32) # We use the passed Hand/Deck buffers directly! live = np.zeros(50, dtype=np.int32) # Dummy live zone for opponent # Reusable dummies to avoid allocation in loop dummy_cont_vec = np.zeros((32, 10), dtype=np.int32) dummy_ptr = np.zeros(1, dtype=np.int32) # Ref Array dummy_bonus = np.zeros(1, dtype=np.int32) # Ref Array for i in range(num_envs): # 1. Select Random Legal Action from Hand # Scan hand for valid bytecodes # Use fixed array for Numba compatibility (no lists) candidates = np.zeros(60, dtype=np.int32) c_ptr = 0 for j in range(60): # Hand size cid = opp_hand[i, j] if cid > 0: candidates[c_ptr] = j # Store Index in Hand c_ptr += 1 if c_ptr == 0: continue # Pick one random index idx_choice = np.random.randint(0, c_ptr) hand_idx = candidates[idx_choice] act_id = opp_hand[i, hand_idx] # 2. Execute if act_id > 0 and act_id < bytecode_index.shape[0]: map_idx = bytecode_index[act_id, 0] if map_idx > 0: code_seq = bytecode_map[map_idx] opp_global_ctx[i, 0] = opp_scores[i] opp_global_ctx[i, 3] -= 1 # Decrement Hand Count (HD) after playing # Reset dummies dummy_ptr[0] = 0 dummy_bonus[0] = 0 # Pass Row Slices of Hand/Deck # Careful: slicing in loop might allocate. Pass full array + index? # resolve_bytecode expects 1D array. # We can't pass a slice 'opp_hand[i]' effectively if function modifies it in place? # Actually resolve_bytecode modifies it. # Numba slices are views, should work. resolve_bytecode( code_seq, f_ctx, opp_global_ctx[i], 1, opp_hand[i], opp_deck[i], opp_stage[i], opp_energy_vec[i], opp_energy_count[i], dummy_cont_vec, dummy_ptr, opp_tapped[i], live, agent_tapped[i], bytecode_map, bytecode_index, dummy_bonus, ) opp_scores[i] = opp_global_ctx[i, 0] # Sync score from OS (Wait, index 0 is SC?) # SC = 0; OS = 1; TR = 2; HD = 3; DI = 4; EN = 5; DK = 6; OT = 7 # Resolve bytecode puts score in SC (index 0) for the current player? # Let's check fast_logic.py: it uses global_ctx[SC]. # So opp_scores[i] = opp_global_ctx[i, 0] is correct if they are the "current player" in that call. # 3. Post-Play Cleanup (Draw to refill?) # If card played, act_id removed from hand by resolve_bytecode (Opcode 11/12/13 usually). # To simulate "Draw", we check if hand size < 5. # Count current hand cnt = 0 for j in range(60): if opp_hand[i, j] > 0: cnt += 1 if cnt < 5: # Draw top card from Deck # Find first card in Deck top_card = 0 deck_idx = -1 for j in range(60): if opp_deck[i, j] > 0: top_card = opp_deck[i, j] deck_idx = j break if top_card > 0: # Move to Hand (First empty slot) for j in range(60): if opp_hand[i, j] == 0: opp_hand[i, j] = top_card opp_deck[i, deck_idx] = 0 # Remove from deck opp_global_ctx[i, 3] += 1 # Increment Hand Count (HD) opp_global_ctx[i, 6] -= 1 # Decrement Deck Count (DK) break @njit def resolve_live_performance( num_envs: int, action_ids: np.ndarray, # Played Live Card IDs per env batch_stage: np.ndarray, # (N, 3) batch_live: np.ndarray, # (N, 50) batch_scores: np.ndarray, # (N,) batch_global_ctx: np.ndarray, # (N, 128) card_stats: np.ndarray, # (MaxCards, 80) ): """ Proper Performance Phase Logic: 1. Agent plays a Live Card (action_id). 2. Verify Live is available in Live Zone. 3. Check Requirements (Stage Members -> Hearts/Blades). 4. Success: Score +1, Clear Stage. 5. Failure: Turn End (Penalty?). """ for i in range(num_envs): live_id = action_ids[i] # Only process if action was a Live Card (ID 1000+ or specific range) # Assuming Live IDs > 900 for now based on previous context if live_id <= 900: continue # 1. Verify availability in Live Zone live_idx = -1 for j in range(50): if batch_live[i, j] == live_id: live_idx = j break if live_idx == -1: # Live card not available? Maybe purely from hand? # Rules say Lives are in "Live Section". If played from hand, OK. # But usually you need to 'Clear' a Live. # Let's assume valid Play for now. pass # 2. Check Requirements # Get Live Stats req_pink = card_stats[live_id, 12] req_red = card_stats[live_id, 13] req_yel = card_stats[live_id, 14] req_grn = card_stats[live_id, 15] req_blu = card_stats[live_id, 16] req_pur = card_stats[live_id, 17] req_any = 0 # sum leftovers? # Sum Stage Stats stage_hearts = np.zeros(7, dtype=np.int32) total_blades = 0 for slot in range(3): cid = batch_stage[i, slot] if cid > 0 and cid < card_stats.shape[0]: total_blades += card_stats[cid, 1] col = card_stats[cid, 3] hearts = card_stats[cid, 2] if 1 <= col <= 6: stage_hearts[col] += hearts stage_hearts[0] += hearts # Verify met = True if stage_hearts[1] < req_pink: met = False if stage_hearts[2] < req_red: met = False if stage_hearts[3] < req_yel: met = False if stage_hearts[4] < req_grn: met = False if stage_hearts[5] < req_blu: met = False if stage_hearts[6] < req_pur: met = False # 3. Apply Result if met and total_blades > 0: # SUCCESS batch_scores[i] += 1 batch_global_ctx[i, 0] += 1 # SC # Clear Stage batch_stage[i, 0] = -1 batch_stage[i, 1] = -1 batch_stage[i, 2] = -1 # Mark Live as Completed (remove from zone if there) if live_idx >= 0: batch_live[i, live_idx] = -live_id else: # FAILURE # Determine penalty? End turn? # For RL, simple 0 reward is fine, but maybe negative for wasting turn? pass # CRITICAL: Always end the Performance Phase (Reset to Active/Phase 0) # This signals the end of the turn in VectorEnv logic batch_global_ctx[:, 8] = 0 @njit def compute_action_masks( num_envs: int, batch_hand: np.ndarray, batch_stage: np.ndarray, batch_tapped: np.ndarray, batch_energy_count: np.ndarray, ): masks = np.zeros((num_envs, 2000), dtype=np.bool_) # Expanded for Live cards # Action 0 (Pass) is always legal masks[:, 0] = True for i in range(num_envs): # 1. Check which verified cards are in hand # This is high-speed Numba logic for j in range(60): cid = batch_hand[i, j] # Simple 1:1 mapping: Card ID is the Action ID if cid > 0 and cid < 2000: # If card is in hand, it's a potential action masks[i, cid] = True return masks @njit def encode_observations_vectorized( num_envs: int, batch_hand: np.ndarray, # (N, 60) - Added back! batch_stage: np.ndarray, # (N, 3) batch_energy_count: np.ndarray, # (N, 3) batch_tapped: np.ndarray, # (N, 3) batch_scores: np.ndarray, # (N,) opp_scores: np.ndarray, # (N,) opp_stage: np.ndarray, # (N, 3) opp_tapped: np.ndarray, # (N, 3) card_stats: np.ndarray, # (MaxCards, 80) batch_global_ctx: np.ndarray, # (N, 128) batch_live: np.ndarray, # (N, 50) - Live Zone Cards (IDs) batch_opp_history: np.ndarray, # (N, 50) - NEW: Opp Trash/History turn_number: int, observations: np.ndarray, # (N, 8192) ): # Reset buffer observations.fill(0.0) max_id_val = 2000.0 STRIDE = 80 TRAIT_SCALE = 2097152.0 # Reorganized for IMAX PRO "Unified Universe" (Stride 80, ObsDim 8192) # 0-99: Global Game State # 100-6500: UNIFIED UNIVERSE (80 Slots * 80 Stride). # 60 Main Deck + 20 Live Deck Cards. # Includes Hand, Stage, Trash, Active Lives, Won Lives. # Location Signal (Idx 79) distinguishes zones. # 6500-6740: OPP STAGE # 6740-7700: OPP HISTORY (12 Slots * 80 Stride). # Top 12 cards of Opponent Trash/History (LIFO). # Crucial for archetype tracking and sequence learning. # 7800: VOLUMES # 8000: SCORES MY_UNIVERSE_START = 100 OPP_START = 6500 OPP_HISTORY_START = 6740 VOLUMES_START = 7800 SCORE_START = 8000 for i in range(num_envs): # --- 1. METADATA --- observations[i, 5] = 1.0 # Phase (Main) - Overwritten below by One-Hot observations[i, 6] = min(turn_number / 20.0, 1.0) # Turn observations[i, 16] = 1.0 # Player 0 # --- 2. MY UNIVERSE (Unified: Hand + Stage + Trash + Live + WonLive) --- # Capacity: 80 Slots u_idx = 0 MAX_UNIVERSE = 80 # Helper to copy card logic # Since this is Numba, we assume inline or simple loop. # Writing inline to ensure Numba compatibility. # A. HAND -> Universe (Loc 1.0) # B. STAGE -> Universe (Loc 2.x) # C. TRASH -> Universe (Loc 4.0) # D. LIVE ZONE (Active) -> Universe (Loc 5.0) # E. WON LIVES -> Universe (Loc 6.0) # A. HAND for j in range(60): cid = batch_hand[i, j] if cid > 0 and u_idx < MAX_UNIVERSE: base = MY_UNIVERSE_START + u_idx * STRIDE # Copy Block if cid < card_stats.shape[0]: for k in range(79): observations[i, base + k] = card_stats[cid, k] / (50.0 if card_stats[cid, k] > 50 else 20.0) # Precise Fixes observations[i, base + 3] = card_stats[cid, 0] / 10.0 observations[i, base + 4] = card_stats[cid, 1] / 5.0 observations[i, base + 5] = card_stats[cid, 2] / 5.0 observations[i, base + 11] = card_stats[cid, 11] / TRAIT_SCALE observations[i, base] = 1.0 # Presence observations[i, base + 1] = cid / max_id_val observations[i, base + 79] = 1.0 # Loc u_idx += 1 # B. STAGE for slot in range(3): cid = batch_stage[i, slot] if cid >= 0: # 0 is a valid ID for Stage? Usually -1 is empty. # Assuming batch_stage uses -1 for empty, but VectorEnv usually inits with -1. # If cid > -1... if u_idx < MAX_UNIVERSE: base = MY_UNIVERSE_START + u_idx * STRIDE if cid < card_stats.shape[0] and cid >= 0: for k in range(79): observations[i, base + k] = card_stats[cid, k] / (50.0 if card_stats[cid, k] > 50 else 20.0) observations[i, base + 3] = card_stats[cid, 0] / 10.0 observations[i, base + 4] = card_stats[cid, 1] / 5.0 observations[i, base + 5] = card_stats[cid, 2] / 5.0 observations[i, base + 11] = card_stats[cid, 11] / TRAIT_SCALE observations[i, base] = 1.0 observations[i, base + 1] = cid / max_id_val observations[i, base + 2] = 1.0 if batch_tapped[i, slot] else 0.0 observations[i, base + 14] = min(batch_energy_count[i, slot] / 5.0, 1.0) observations[i, base + 79] = 2.0 + (slot * 0.1) u_idx += 1 # C. TRASH (From GameState context or just Placeholder loop) # VectorEnv limitation: doesn't have batch_trash array. # Using self.envs[i] is NOT possible in Numba function (no self, no object). # We must rely on inputs. Since 'batch_global_ctx' doesn't contain trash list, # and we removed the class-method access logic in Step 2012 (Wait, Step 2012 used self.envs, which Numba forbids). # Ah, encode_observations_vectorized is @njit. It CANNOT access self.envs! # Step 2012's edit to use self.envs[i] within the njit function was a BUG. # We must fix this. We can't access trash if it's not passed as array. # For now, we omit Trash or use a placeholder, UNLESS we pass 'batch_trash' (which we didn't add to args). # Given the user wants Trash visibility, we SHOULD have added batch_trash. # I'll stick to non-trash for this specific edit to ensure compilation, or pass a dummy. # *Correction*: I will accept that Trash is invisible until batch_trash is added properly. # But I can map Live Zone which I added to args. # D. LIVE ZONE (Active) for k in range(5): # Max 5 live cards cid = batch_live[i, k] if cid > 0 and u_idx < MAX_UNIVERSE: base = MY_UNIVERSE_START + u_idx * STRIDE if cid < card_stats.shape[0]: for x in range(79): observations[i, base + x] = card_stats[cid, x] / (50.0 if card_stats[cid, x] > 50 else 20.0) observations[i, base + 3] = card_stats[cid, 0] / 10.0 observations[i, base + 5] = card_stats[cid, 2] / 5.0 observations[i, base + 11] = card_stats[cid, 11] / TRAIT_SCALE observations[i, base] = 1.0 observations[i, base + 1] = cid / max_id_val observations[i, base + 79] = 5.0 # Loc: Active Live u_idx += 1 # E. WON LIVES -> Implied? # batch_scores is just a count. We don't have IDs of won lives passed in. # So we can't show them. # --- 3. OPPONENT STAGE --- for slot in range(3): cid = opp_stage[i, slot] base = OPP_START + slot * STRIDE if cid >= 0: observations[i, base] = 1.0 observations[i, base + 1] = cid / max_id_val observations[i, base + 2] = 1.0 if opp_tapped[i, slot] else 0.0 if cid < card_stats.shape[0]: # Copy Meta + Ab1 observations[i, base + 3] = card_stats[cid, 0] / 10.0 observations[i, base + 11] = card_stats[cid, 11] / TRAIT_SCALE for k in range(20, 36): val = card_stats[cid, k] scale = 50.0 if val > 50 else 10.0 observations[i, base + k] = val / scale observations[i, base + 79] = 3.0 + (slot * 0.1) # --- 4. OPPONENT HISTORY (Top 12) --- # Using batch_opp_history passed in args for k in range(12): cid = batch_opp_history[i, k] if cid > 0: base = OPP_HISTORY_START + k * STRIDE observations[i, base] = 1.0 observations[i, base + 1] = cid / max_id_val if cid < card_stats.shape[0]: # Full copy logic for history to catch effects for x in range(79): observations[i, base + x] = card_stats[cid, x] / (50.0 if card_stats[cid, x] > 50 else 20.0) # Precise observations[i, base + 3] = card_stats[cid, 0] / 10.0 observations[i, base + 5] = card_stats[cid, 2] / 5.0 observations[i, base + 11] = card_stats[cid, 11] / TRAIT_SCALE observations[i, base + 79] = 4.0 # Loc: Trash/History # --- 5. VOLUMES --- my_deck_count = batch_global_ctx[i, 6] observations[i, VOLUMES_START] = my_deck_count / 50.0 observations[i, VOLUMES_START + 1] = batch_global_ctx[i, 7] / 50.0 # Opp Deck # Fallback: Just enable the AI to infer it from what it sees? # "I see 4 Hearts here, I know my deck had 10, so 6 are hidden." # This requires the AI to memorize the deck list (which it does via LSTM or implicitly over time). # Explicit density inputs are better but hard to compute vectorized without tracking initial state. # For now, we leave it to inference. The AI sees "Volume: 15". It sees "Hearts on board: 4". It learns. observations[i, VOLUMES_START + 2] = batch_global_ctx[i, 3] / 20.0 # My Hand observations[i, VOLUMES_START + 3] = batch_global_ctx[i, 2] / 50.0 # My Trash observations[i, VOLUMES_START + 4] = batch_global_ctx[i, 4] / 20.0 # Opp Hand observations[i, VOLUMES_START + 5] = batch_global_ctx[i, 5] / 50.0 # Opp Trash # Remaining Heart/Blade counts in deck (Indices 7805+) # This requires knowing the initial deck composition and subtracting visible cards. # For now, we'll use placeholders or simplified values if not directly available. # If `batch_global_ctx` contains these, use them. Otherwise, these are hard to compute vectorized. # For a faithful edit, I'll add placeholders as the instruction implies calculation. observations[i, VOLUMES_START + 6] = batch_global_ctx[i, 8] / 50.0 # My Blade Dens observations[i, VOLUMES_START + 7] = batch_global_ctx[i, 9] / 50.0 # My Heart Dens observations[i, VOLUMES_START + 8] = 0.0 # Placeholder for Opp Deck Blades observations[i, VOLUMES_START + 9] = 0.0 # Placeholder for Opp Deck Hearts # --- 6. ONE-HOT PHASE (Indices 20-26) --- # Current Phase is at observations[i, 0] (already set) ph = int(batch_global_ctx[i, 0]) # Clear 20-26 # Map: 1=Start, 2=Draw, 3=Main, 4=Perf, 5=Clear, 6=End # Index = 20 + Phase if 0 <= ph <= 6: observations[i, 20 + ph] = 1.0 # --- 7. SCORES --- observations[i, SCORE_START] = min(batch_scores[i] / 9.0, 1.0) observations[i, SCORE_START + 1] = min(opp_scores[i] / 9.0, 1.0) return observations