Spaces:

trioskosmos
/

LovecaSim

Sleeping

File size: 11,107 Bytes

2badd2f

from typing import List

import numpy as np

from engine.game.ai_compat import njit
from engine.game.fast_logic import batch_apply_action


@njit(cache=True)
def step_vectorized(

    actions: np.ndarray,

    batch_stage: np.ndarray,

    batch_energy_vec: np.ndarray,

    batch_energy_count: np.ndarray,

    batch_continuous_vec: np.ndarray,

    batch_continuous_ptr: np.ndarray,

    batch_tapped: np.ndarray,

    batch_live: np.ndarray,

    batch_opp_tapped: np.ndarray,

    batch_scores: np.ndarray,

    batch_flat_ctx: np.ndarray,

    batch_global_ctx: np.ndarray,

    batch_hand: np.ndarray,

    batch_deck: np.ndarray,

    # New: Bytecode Maps

    bytecode_map: np.ndarray,  # (GlobalOpMapSize, MaxBytecodeLen, 4)

    bytecode_index: np.ndarray,  # (NumCards, NumAbilities) -> Index in map

):
    """

    Step N game environments in parallel using JIT logic and Real Card Data.

    """
    # Sync individual scores to global_ctx before stepping
    for i in range(len(actions)):
        batch_global_ctx[i, 0] = batch_scores[i]

    batch_apply_action(
        actions,
        0,  # player_id
        batch_stage,
        batch_energy_vec,
        batch_energy_count,
        batch_continuous_vec,
        batch_continuous_ptr,
        batch_tapped,
        batch_scores,
        batch_live,
        batch_opp_tapped,
        batch_flat_ctx,
        batch_global_ctx,
        batch_hand,
        batch_deck,
        bytecode_map,
        bytecode_index,
    )


class VectorGameState:
    """

    Manages a batch of independent GameStates for high-throughput training.

    """

    def __init__(self, num_envs: int):
        self.num_envs = num_envs
        self.turn = 1

        # Batched state buffers
        self.batch_stage = np.full((num_envs, 3), -1, dtype=np.int32)
        self.batch_energy_vec = np.zeros((num_envs, 3, 32), dtype=np.int32)
        self.batch_energy_count = np.zeros((num_envs, 3), dtype=np.int32)
        self.batch_continuous_vec = np.zeros((num_envs, 32, 10), dtype=np.int32)
        self.batch_continuous_ptr = np.zeros(num_envs, dtype=np.int32)
        self.batch_tapped = np.zeros((num_envs, 3), dtype=np.int32)
        self.batch_live = np.zeros((num_envs, 50), dtype=np.int32)
        self.batch_opp_tapped = np.zeros((num_envs, 3), dtype=np.int32)
        self.batch_scores = np.zeros(num_envs, dtype=np.int32)

        # Pre-allocated context buffers (Extreme speed optimization)
        self.batch_flat_ctx = np.zeros((num_envs, 64), dtype=np.int32)
        self.batch_global_ctx = np.zeros((num_envs, 128), dtype=np.int32)
        self.batch_hand = np.zeros((num_envs, 50), dtype=np.int32)
        self.batch_deck = np.zeros((num_envs, 50), dtype=np.int32)

        # Pre-allocated observation buffer (SAVES ALLOCATION TIME)
        self.obs_buffer = np.zeros((num_envs, 320), dtype=np.float32)

        # Load Bytecode Map
        self._load_bytecode()
        self._load_verified_deck_pool()

    def _load_bytecode(self):
        import json

        try:
            with open("data/cards_numba.json", "r") as f:
                raw_map = json.load(f)

            # Convert to numpy array
            # Format: key "cardid_abidx" -> List[int]
            # storage:
            # 1. giant array of bytecodes (N, MaxLen, 4)
            # 2. lookup index (CardID, AbIdx) -> Index in giant array

            self.max_cards = 2000
            self.max_abilities = 4
            self.max_len = 64  # Max 64 instructions per ability

            # Count unique compiled entries
            unique_entries = len(raw_map)
            # (Index 0 is empty/nop)
            self.bytecode_map = np.zeros((unique_entries + 1, self.max_len, 4), dtype=np.int32)
            self.bytecode_index = np.full((self.max_cards, self.max_abilities), 0, dtype=np.int32)

            idx_counter = 1
            for key, bc_list in raw_map.items():
                cid, aid = map(int, key.split("_"))
                if cid < self.max_cards and aid < self.max_abilities:
                    # reshape list to (M, 4)
                    bc_arr = np.array(bc_list, dtype=np.int32).reshape(-1, 4)
                    length = min(bc_arr.shape[0], self.max_len)
                    self.bytecode_map[idx_counter, :length] = bc_arr[:length]
                    self.bytecode_index[cid, aid] = idx_counter
                    idx_counter += 1

            print(f" [VectorEnv] Loaded {unique_entries} compiled abilities.")

        except FileNotFoundError:
            print(" [VectorEnv] Warning: data/cards_numba.json not found. Using empty map.")
            self.bytecode_map = np.zeros((1, 64, 4), dtype=np.int32)
            self.bytecode_index = np.zeros((1, 1), dtype=np.int32)

    def _load_verified_deck_pool(self):
        import json

        try:
            # Load Verified List
            with open("verified_card_pool.json", "r", encoding="utf-8") as f:
                verified_data = json.load(f)

            # Load DB to map CardNo -> CardID
            with open("data/cards_compiled.json", "r", encoding="utf-8") as f:
                db_data = json.load(f)

            self.verified_card_ids = []

            # Map numbers to IDs
            card_no_map = {}
            for cid, cdata in db_data["member_db"].items():
                card_no_map[cdata["card_no"]] = int(cid)

            for v_no in verified_data.get("verified_abilities", []):
                if v_no in card_no_map:
                    self.verified_card_ids.append(card_no_map[v_no])

            # Fallback
            if not self.verified_card_ids:
                print(" [VectorEnv] Warning: No verified cards found. Using ID 1.")
                self.verified_card_ids = [1]
            else:
                print(f" [VectorEnv] Loaded {len(self.verified_card_ids)} verified cards for training.")

            self.verified_card_ids = np.array(self.verified_card_ids, dtype=np.int32)

        except Exception as e:
            print(f" [VectorEnv] Deck Load Error: {e}")
            self.verified_card_ids = np.array([1], dtype=np.int32)

    def reset(self, indices: List[int] = None):
        """Reset specified environments (or all if indices is None)."""
        if indices is None:
            indices = list(range(self.num_envs))

        # Optimization: Bulk operations for indices if supported,
        # but for now loop is fine (reset is rare compared to step)

        # Prepare a random deck selection to broadcast?
        # Actually random.choice is fast.

        for i in indices:
            self.batch_stage[i].fill(-1)
            self.batch_energy_vec[i].fill(0)
            self.batch_energy_count[i].fill(0)
            self.batch_continuous_vec[i].fill(0)
            self.batch_continuous_ptr[i] = 0
            self.batch_tapped[i].fill(0)
            self.batch_live[i].fill(0)
            self.batch_opp_tapped[i].fill(0)
            self.batch_scores[i] = 0

            # Reset contexts
            self.batch_flat_ctx[i].fill(0)
            self.batch_global_ctx[i].fill(0)

            # Initialize Deck with Verified Cards (Random 50)
            # Fast choice from verified pool
            if len(self.verified_card_ids) > 0:
                dk = np.random.choice(self.verified_card_ids, 50)
                self.batch_deck[i] = dk

            # Initialize Hand (Draw 5 from deck)
            # Simple simulation: Move top 5 deck to hand
            self.batch_hand[i, :5] = self.batch_deck[i, :5]
            # Shift deck? Or just pointer?
            # For this benchmark we assume infinite deck or simple pointer logic via opcodes.
            # But the 'hand' array needs to be populated for gameplay to start.

        self.turn = 1

    def step(self, actions: np.ndarray):
        """Apply a batch of actions across all environments."""
        step_vectorized(
            actions,
            self.batch_stage,
            self.batch_energy_vec,
            self.batch_energy_count,
            self.batch_continuous_vec,
            self.batch_continuous_ptr,
            self.batch_tapped,
            self.batch_live,
            self.batch_opp_tapped,
            self.batch_scores,
            self.batch_flat_ctx,
            self.batch_global_ctx,
            self.batch_hand,
            self.batch_deck,
            self.bytecode_map,
            self.bytecode_index,
        )
        # Simplified turn advancement
        # In real VectorEnv, this would be managed by the engine rules
        pass

    def get_observations(self):
        """Return a batched observation for RL models."""
        return encode_observations_vectorized(
            self.num_envs,
            self.batch_stage,
            self.batch_energy_count,
            self.batch_tapped,
            self.batch_scores,
            self.turn,
            self.obs_buffer,
        )


@njit(cache=True)
def encode_observations_vectorized(

    num_envs: int,

    batch_stage: np.ndarray,  # (N, 3)

    batch_energy_count: np.ndarray,  # (N, 3)

    batch_tapped: np.ndarray,  # (N, 3)

    batch_scores: np.ndarray,  # (N,)

    turn_number: int,

    observations: np.ndarray,  # (N, 320)

):
    # Reset buffer (extremely fast on pre-allocated)
    observations.fill(0.0)
    max_id_val = 2000.0  # Normalization constant

    for i in range(num_envs):
        # --- 1. METADATA [0:36] ---
        # Phase (Simplify: Always Main Phase=1 for now in vector env)
        # Phase 1=Start, 2=Draw, 3=Main... Main is index 3+2=5?
        # GameState logic: phase_val = int(phase) + 2. Main is 3. So 5.
        observations[i, 5] = 1.0

        # Current Player [16:18] - Always Player 0 for this vector view
        observations[i, 16] = 1.0

        # --- 2. HAND [36:168] ---
        # VectorEnv doesn't track hand yet. Leave 0.0.

        # --- 3. SELF STAGE [168:204] (3 slots * 12 features) ---
        for slot in range(3):
            cid = batch_stage[i, slot]
            base = 168 + slot * 12
            if cid >= 0:
                observations[i, base] = 1.0
                observations[i, base + 1] = cid / max_id_val
                observations[i, base + 2] = 1.0 if batch_tapped[i, slot] else 0.0

                # Mock attributes (since we don't have full DB access inside JIT yet)
                # In real imp, we'd pass arrays like member_costs
                observations[i, base + 3] = 0.5  # Default power

                # Energy Count
                observations[i, base + 11] = min(batch_energy_count[i, slot] / 5.0, 1.0)

        # --- 4. OPPONENT STAGE [204:240] ---
        # Not tracked in partial vector env yet.

        # --- 5. LIVE ZONE [240:270] ---
        # Not tracked in partial vector env yet.

        # --- 6. SCORES [270:272] ---
        observations[i, 270] = min(batch_scores[i] / 5.0, 1.0)

    return observations