Spaces:

trioskosmos
/

LovecaSim

Sleeping

App Files Files Community

trioskosmos commited on Feb 3

Commit

8d81185

verified ·

1 Parent(s): 1c22442

Upload ai/agents/search_prob_agent.py with huggingface_hub

Browse files

Files changed (1) hide show

ai/agents/search_prob_agent.py +407 -0

ai/agents/search_prob_agent.py ADDED Viewed

	@@ -0,0 +1,407 @@

+from typing import List
+import numpy as np
+from ai.agents.agent_base import Agent
+from engine.game.enums import Phase as PhaseEnum
+from engine.game.game_state import GameState
+try:
+    from numba import njit
+    HAS_NUMBA = True
+except ImportError:
+    HAS_NUMBA = False
+    # Mock njit decorator if numba is missing
+    def njit(f):
+        return f
+@njit
+def _check_meet_jit(hearts, req):
+    """Greedy heart requirement check matching engine logic - Optimized."""
+    # 1. Match specific colors (0-5)
+    needed_specific = req[:6]
+    have_specific = hearts[:6]
+    # Numba doesn't support np.minimum for arrays in all versions efficiently, doing manual element-wise
+    used_specific = np.zeros(6, dtype=np.int32)
+    for i in range(6):
+        if needed_specific[i] < have_specific[i]:
+            used_specific[i] = needed_specific[i]
+        else:
+            used_specific[i] = have_specific[i]
+    remaining_req_0 = req[0] - used_specific[0]
+    remaining_req_1 = req[1] - used_specific[1]
+    remaining_req_2 = req[2] - used_specific[2]
+    remaining_req_3 = req[3] - used_specific[3]
+    remaining_req_4 = req[4] - used_specific[4]
+    remaining_req_5 = req[5] - used_specific[5]
+    temp_hearts_0 = hearts[0] - used_specific[0]
+    temp_hearts_1 = hearts[1] - used_specific[1]
+    temp_hearts_2 = hearts[2] - used_specific[2]
+    temp_hearts_3 = hearts[3] - used_specific[3]
+    temp_hearts_4 = hearts[4] - used_specific[4]
+    temp_hearts_5 = hearts[5] - used_specific[5]
+    # 2. Match Any requirement (index 6) with remaining specific hearts
+    needed_any = req[6]
+    have_any_from_specific = (
+        temp_hearts_0 + temp_hearts_1 + temp_hearts_2 + temp_hearts_3 + temp_hearts_4 + temp_hearts_5
+    )
+    used_any_from_specific = needed_any
+    if have_any_from_specific < needed_any:
+        used_any_from_specific = have_any_from_specific
+    # 3. Match remaining Any with Any (Wildcard) hearts (index 6)
+    needed_any -= used_any_from_specific
+    have_wild = hearts[6]
+    used_wild = needed_any
+    if have_wild < needed_any:
+        used_wild = have_wild
+    # Check if satisfied
+    if remaining_req_0 > 0:
+        return False
+    if remaining_req_1 > 0:
+        return False
+    if remaining_req_2 > 0:
+        return False
+    if remaining_req_3 > 0:
+        return False
+    if remaining_req_4 > 0:
+        return False
+    if remaining_req_5 > 0:
+        return False
+    if (needed_any - used_wild) > 0:
+        return False
+    return True
+@njit
+def _run_sampling_jit(stage_hearts, deck_ids, global_matrix, num_yells, total_req, samples):
+    # deck_ids: array of card Base IDs (ints)
+    # global_matrix: (MAX_ID+1, 7) array of hearts
+    success_count = 0
+    deck_size = len(deck_ids)
+    # Fix for empty deck case
+    if deck_size == 0:
+        if _check_meet_jit(stage_hearts, total_req):
+            return float(samples)
+        else:
+            return 0.0
+    sample_size = num_yells
+    if sample_size > deck_size:
+        sample_size = deck_size
+    # Create an index array for shuffling
+    indices = np.arange(deck_size)
+    for _ in range(samples):
+        # Fisher-Yates shuffle for first N elements
+        # Reuse existing indices array logic
+        for i in range(sample_size):
+            j = np.random.randint(i, deck_size)
+            # Swap
+            temp = indices[i]
+            indices[i] = indices[j]
+            indices[j] = temp
+        # Sum selected hearts using indirect lookup
+        simulated_hearts = stage_hearts.copy()
+        for k in range(sample_size):
+            idx = indices[k]
+            card_id = deck_ids[idx]
+            # Simple bounds check if needed, but assuming valid IDs
+            # Numba handles array access fast
+            # Unrolling 7 heart types
+            simulated_hearts[0] += global_matrix[card_id, 0]
+            simulated_hearts[1] += global_matrix[card_id, 1]
+            simulated_hearts[2] += global_matrix[card_id, 2]
+            simulated_hearts[3] += global_matrix[card_id, 3]
+            simulated_hearts[4] += global_matrix[card_id, 4]
+            simulated_hearts[5] += global_matrix[card_id, 5]
+            simulated_hearts[6] += global_matrix[card_id, 6]
+        if _check_meet_jit(simulated_hearts, total_req):
+            success_count += 1
+    return success_count / samples
+class YellOddsCalculator:
+    """
+    Calculates the probability of completing a set of lives given a known (but unordered) deck.
+    Optimized with Numba if available using Indirect Lookup.
+    """
+    def __init__(self, member_db, live_db):
+        self.member_db = member_db
+        self.live_db = live_db
+        # Pre-compute global heart matrix for fast lookup
+        if self.member_db:
+            max_id = max(self.member_db.keys())
+        else:
+            max_id = 0
+        # Shape: (MaxID + 1, 7)
+        # We need to ensure it's contiguous and int32
+        self.global_heart_matrix = np.zeros((max_id + 1, 7), dtype=np.int32)
+        for mid, member in self.member_db.items():
+            self.global_heart_matrix[mid] = member.blade_hearts.astype(np.int32)
+        # Ensure it's ready for Numba
+        if HAS_NUMBA:
+            self.global_heart_matrix = np.ascontiguousarray(self.global_heart_matrix)
+    def calculate_odds(
+        self, deck_cards: List[int], stage_hearts: np.ndarray, live_ids: List[int], num_yells: int, samples: int = 150
+    ) -> float:
+        if not live_ids:
+            return 1.0
+        # Pre-calculate requirements
+        total_req = np.zeros(7, dtype=np.int32)
+        for live_id in live_ids:
+            base_id = live_id & 0xFFFFF
+            if base_id in self.live_db:
+                total_req += self.live_db[base_id].required_hearts
+        # Optimization: Just convert deck to IDs. No object lookups.
+        # Mask out extra bits to get Base ID
+        # Vectorized operation if deck_cards was numpy, but it's list.
+        # List comprehension is reasonably fast for small N (~50).
+        deck_ids_list = [c & 0xFFFFF for c in deck_cards]
+        deck_ids = np.array(deck_ids_list, dtype=np.int32)
+        # Use JITted function
+        if HAS_NUMBA:
+            # Ensure contiguous arrays
+            stage_hearts_c = np.ascontiguousarray(stage_hearts, dtype=np.int32)
+            return _run_sampling_jit(stage_hearts_c, deck_ids, self.global_heart_matrix, num_yells, total_req, samples)
+        else:
+            return _run_sampling_jit(stage_hearts, deck_ids, self.global_heart_matrix, num_yells, total_req, samples)
+    def check_meet(self, hearts: np.ndarray, req: np.ndarray) -> bool:
+        """Legacy wrapper for tests."""
+        return _check_meet_jit(hearts, req)
+class SearchProbAgent(Agent):
+    """
+    AI that uses Alpha-Beta search for decisions and sampling for probability.
+    Optimizes for Expected Value (EV) = P(Success) * Score.
+    """
+    def __init__(self, depth=2, beam_width=5):
+        self.depth = depth
+        self.beam_width = beam_width
+        self.calculator = None
+        self._last_state_id = None
+        self._action_cache = {}
+    def get_calculator(self, state: GameState):
+        if self.calculator is None:
+            self.calculator = YellOddsCalculator(state.member_db, state.live_db)
+        return self.calculator
+    def evaluate_state(self, state: GameState, player_id: int) -> float:
+        if state.game_over:
+            if state.winner == player_id:
+                return 10000.0
+            if state.winner >= 0:
+                return -10000.0
+            return 0.0
+        p = state.players[player_id]
+        opp = state.players[1 - player_id]
+        score = 0.0
+        # 1. Guaranteed Score (Successful Lives)
+        score += len(p.success_lives) * 1000.0
+        score -= len(opp.success_lives) * 800.0
+        # 2. Board Presence (Members on Stage) - HIGH PRIORITY
+        stage_member_count = sum(1 for cid in p.stage if cid >= 0)
+        score += stage_member_count * 150.0  # Big bonus for having members on stage
+        # 3. Board Value (Hearts and Blades from members on stage)
+        total_blades = 0
+        total_hearts = np.zeros(7, dtype=np.int32)
+        for i, cid in enumerate(p.stage):
+            if cid >= 0:
+                base_id = cid & 0xFFFFF
+                if base_id in state.member_db:
+                    member = state.member_db[base_id]
+                    total_blades += member.blades
+                    total_hearts += member.hearts
+        score += total_blades * 80.0
+        score += np.sum(total_hearts) * 40.0
+        # 4. Expected Score from Pending Lives
+        target_lives = list(p.live_zone)
+        if target_lives and total_blades > 0:
+            calc = self.get_calculator(state)
+            prob = calc.calculate_odds(p.main_deck, total_hearts, target_lives, total_blades)
+            potential_score = sum(
+                state.live_db[lid & 0xFFFFF].score for lid in target_lives if (lid & 0xFFFFF) in state.live_db
+            )
+            score += prob * potential_score * 500.0
+            if prob > 0.9:
+                score += 500.0
+        # 5. Resources
+        # Diminishing returns for hand size to prevent hoarding
+        hand_val = len(p.hand)
+        if hand_val > 8:
+            score += 80.0 + (hand_val - 8) * 1.0  # Very small bonus for cards beyond 8
+        else:
+            score += hand_val * 10.0
+        score += p.count_untapped_energy() * 10.0
+        score -= len(opp.hand) * 5.0
+        return score
+    def choose_action(self, state: GameState, player_id: int) -> int:
+        legal_mask = state.get_legal_actions()
+        legal_indices = np.where(legal_mask)[0]
+        if len(legal_indices) == 1:
+            return int(legal_indices[0])
+        # Skip search for simple phases
+        if state.phase not in (PhaseEnum.MAIN, PhaseEnum.LIVE_SET):
+            return int(np.random.choice(legal_indices))
+        # Alpha-Beta Search for Main Phase
+        best_action = legal_indices[0]
+        best_val = -float("inf")
+        alpha = -float("inf")
+        beta = float("inf")
+        # Limit branching factor for performance
+        candidates = list(legal_indices)
+        if len(candidates) > 15:
+            # Better heuristic: prioritize Play/Live/Activate over others
+            def action_priority(idx):
+                if 1 <= idx <= 180:
+                    return 0  # Play Card
+                if 400 <= idx <= 459:
+                    return 1  # Live Set
+                if 200 <= idx <= 202:
+                    return 2  # Activate Ability
+                if idx == 0:
+                    return 5  # Pass (End Phase)
+                if 900 <= idx <= 902:
+                    return -1  # Performance (High Priority)
+                return 10  # Everything else (choices, target selection etc)
+            candidates.sort(key=action_priority)
+            candidates = candidates[:15]
+            if 0 not in candidates and 0 in legal_indices:
+                candidates.append(0)
+        for action in candidates:
+            try:
+                ns = state.copy()
+                ns = ns.step(action)
+                while ns.pending_choices and ns.current_player == player_id:
+                    ns = ns.step(self._greedy_choice(ns))
+                val = self._minimax(ns, self.depth - 1, alpha, beta, False, player_id)
+                if val > best_val:
+                    best_val = val
+                    best_action = action
+                alpha = max(alpha, val)
+            except Exception:
+                continue
+        return int(best_action)
+    def _minimax(
+        self, state: GameState, depth: int, alpha: float, beta: float, is_max: bool, original_player: int
+    ) -> float:
+        if depth == 0 or state.game_over:
+            return self.evaluate_state(state, original_player)
+        legal_mask = state.get_legal_actions()
+        legal_indices = np.where(legal_mask)[0]
+        if not legal_indices.any():
+            return self.evaluate_state(state, original_player)
+        # Optimization: Only search if it's still original player's turn or transition
+        # If it's opponent's turn, we can either do a full minimax or just use a fixed heuristic
+        # for their move. Let's do simple minimax.
+        current_is_max = state.current_player == original_player
+        candidates = list(legal_indices)
+        if len(candidates) > 8:
+            indices = np.random.choice(legal_indices, 8, replace=False)
+            candidates = list(indices)
+            if 0 in legal_indices and 0 not in candidates:
+                candidates.append(0)
+        if current_is_max:
+            max_eval = -float("inf")
+            for action in candidates:
+                try:
+                    ns = state.copy().step(action)
+                    while ns.pending_choices and ns.current_player == state.current_player:
+                        ns = ns.step(self._greedy_choice(ns))
+                    eval = self._minimax(ns, depth - 1, alpha, beta, False, original_player)
+                    max_eval = max(max_eval, eval)
+                    alpha = max(alpha, eval)
+                    if beta <= alpha:
+                        break
+                except:
+                    continue
+            return max_eval
+        else:
+            min_eval = float("inf")
+            # For simplicity, if it's opponent's turn, maybe just assume they pass if we are deep enough
+            # or use a very shallow search.
+            for action in candidates:
+                try:
+                    ns = state.copy().step(action)
+                    while ns.pending_choices and ns.current_player == state.current_player:
+                        ns = ns.step(self._greedy_choice(ns))
+                    eval = self._minimax(ns, depth - 1, alpha, beta, True, original_player)
+                    min_eval = min(min_eval, eval)
+                    beta = min(beta, eval)
+                    if beta <= alpha:
+                        break
+                except:
+                    continue
+            return min_eval
+    def _greedy_choice(self, state: GameState) -> int:
+        """Fast greedy resolution for pending choices during search."""
+        mask = state.get_legal_actions()
+        indices = np.where(mask)[0]
+        if not indices.any():
+            return 0
+        # Simple priority: 1. Keep high cost (if mulligan), 2. Target slot 1, etc.
+        # For now, just pick the first valid action
+        return int(indices[0])