Spaces:

trioskosmos
/

LovecaSim

Running

File size: 35,332 Bytes

69c4849

import argparse
import logging
import os
import random
import sys
import time

import numpy as np

# Add parent dir to path
# Add parent dir to path (for ai directory)
sys.path.append(os.path.dirname(os.path.abspath(__file__)))
# Add engine directory
# Add project root directory
sys.path.append(os.path.join(os.path.dirname(os.path.abspath(__file__)), ".."))

from ai.agents.agent_base import Agent
from ai.agents.search_prob_agent import SearchProbAgent
from engine.game.data_loader import CardDataLoader
from engine.game.game_state import GameState, Phase


class TrueRandomAgent(Agent):
    """Completely random agent with no heuristics"""

    def choose_action(self, state: GameState, player_id: int) -> int:
        legal_mask = state.get_legal_actions()
        legal_indices = np.where(legal_mask)[0]
        if len(legal_indices) == 0:
            return 0
        return int(np.random.choice(legal_indices))


class RandomAgent(Agent):
    def choose_action(self, state: GameState, player_id: int) -> int:
        legal_mask = state.get_legal_actions()
        legal_indices = np.where(legal_mask)[0]
        if len(legal_indices) == 0:
            return 0

        # SMART HEURISTICS
        non_pass = [i for i in legal_indices if i != 0]

        # MULLIGAN: Sometimes confirm (action 0)
        if state.phase in (Phase.MULLIGAN_P1, Phase.MULLIGAN_P2):
            # 30% chance to confirm, 70% to toggle cards
            if random.random() < 0.3:
                return 0
            mulligan_actions = [i for i in legal_indices if 300 <= i <= 359]
            if mulligan_actions:
                return int(np.random.choice(mulligan_actions))
            return 0

        # Priority 1: In LIVE_SET, prioritize setting LIVE cards over passing
        if state.phase == Phase.LIVE_SET:
            live_set_actions = [i for i in legal_indices if 400 <= i <= 459]
            if live_set_actions:
                return int(np.random.choice(live_set_actions))

        # Priority 2: In MAIN phase, try to play members to stage
        if state.phase == Phase.MAIN:
            play_actions = [i for i in legal_indices if 1 <= i <= 180]
            if play_actions:
                # 80% chance to play instead of pass
                if random.random() < 0.8:
                    return int(np.random.choice(play_actions))

        # Priority 3: Never pass if ANY other action available
        if non_pass:
            return int(np.random.choice(non_pass))

        return 0


class SmartHeuristicAgent(Agent):
    """Advanced AI with better winning strategies"""

    def __init__(self):
        self.last_turn_num = -1
        self.turn_action_counts = {}

    def choose_action(self, state: GameState, player_id: int) -> int:
        # --- Loop Protection ---
        if state.turn_number != self.last_turn_num:
            self.last_turn_num = state.turn_number
            self.turn_action_counts = {}

        legal_mask = state.get_legal_actions()
        legal_indices = np.where(legal_mask)[0]
        if len(legal_indices) == 0:
            return 0

        p = state.players[player_id]

        # --- MULLIGAN PHASE ---
        if state.phase in (Phase.MULLIGAN_P1, Phase.MULLIGAN_P2):
            # Keep members with cost <= 3, discard others and all Live cards
            # 300-359: index i is toggled

            # Initialize mulligan_selection if not present
            if not hasattr(p, "mulligan_selection"):
                p.mulligan_selection = set()

            to_toggle = []
            for i, card_id in enumerate(p.hand):
                should_keep = False
                if card_id in state.member_db:
                    member = state.member_db[card_id]
                    if member.cost <= 3:
                        should_keep = True

                # Check if already marked for return (mulligan_selection is a set of indices)
                is_marked = i in p.mulligan_selection
                if should_keep and is_marked:
                    # Unmark keepable card
                    to_toggle.append(300 + i)
                elif not should_keep and not is_marked:
                    # Mark bad card
                    to_toggle.append(300 + i)

            if to_toggle:
                # Filter to only legal toggles
                legal_set = set(legal_indices.tolist())
                valid_toggles = [a for a in to_toggle if a in legal_set]
                if valid_toggles:
                    choice = np.random.choice(valid_toggles)
                    return int(choice) if np.isscalar(choice) else int(choice[0])
            return 0  # Confirm

        # --- LIVE SET PHASE ---
        if state.phase == Phase.LIVE_SET:
            live_actions = [i for i in legal_indices if 400 <= i <= 459]
            if not live_actions:
                return 0  # Pass

            current_hearts = p.get_total_hearts(state.member_db)

            # Calculate what we already need for pending live cards
            pending_req = np.zeros(7, dtype=np.int32)
            for live_id in p.live_zone:
                if live_id in state.live_db:
                    pending_req += state.live_db[live_id].required_hearts

            # --- Improved LIVE_SET Logic ---
            best_action = -1
            max_value = -1

            for action in live_actions:
                hand_idx = action - 400
                card_id = p.hand[hand_idx]
                if card_id not in state.live_db:
                    continue

                live = state.live_db[card_id]
                total_req = pending_req + live.required_hearts

                # Check feasibility
                needed = total_req.copy()
                have = current_hearts.copy()

                # 1. Colors
                possible = True
                for c in range(6):
                    if have[c] >= needed[c]:
                        have[c] -= needed[c]
                        needed[c] = 0
                    else:
                        possible = False
                        break

                if not possible:
                    continue

                # 2. Any hearts
                if np.sum(have) < needed[6]:
                    continue

                # If possible, calculate value
                value = live.score * 10
                # Prefer cards we have hearts for
                value += np.sum(have) - needed[6]

                if value > max_value:
                    max_value = value
                    best_action = action

            if best_action != -1:
                return int(best_action)
            return 0  # Pass if no safe plays

        # --- MAIN PHASE ---
        if state.phase == Phase.MAIN:
            # 1. Activate Abilities (Rule of thumb: Draw/Energy > Buff > Damage)
            activate_actions = [i for i in legal_indices if 200 <= i <= 202]
            best_ability_action = -1
            best_ability_score = -1

            for action in activate_actions:
                area = action - 200
                card_id = p.stage[area]
                if card_id in state.member_db:
                    # HEURISTIC: Use 1-step lookahead to detect no-ops or loops
                    try:
                        next_state = state.step(action)
                        next_p = next_state.players[player_id]

                        # Comparison metrics
                        hand_delta = len(next_p.hand) - len(p.hand)
                        energy_delta = len(next_p.energy_zone) - len(p.energy_zone)
                        tap_delta = np.sum(next_p.tapped_energy) - np.sum(p.tapped_energy)
                        stage_changed = not np.array_equal(next_p.stage, p.stage)
                        choice_pending = len(next_state.pending_choices) > 0

                        # Repeating action penalty
                        reps = self.turn_action_counts.get(action, 0)

                        if (
                            not any([hand_delta > 0, energy_delta > 0, stage_changed, choice_pending])
                            and tap_delta <= 0
                        ):
                            # State didn't meaningfully improve for the better (maybe it tapped something but didn't gain)
                            score = -10
                        else:
                            score = 15 if (hand_delta > 0 or energy_delta > 0) else 10

                        # Apply repetition penalty
                        score -= reps * 20

                    except Exception:
                        score = -100  # Crashes are bad

                    if score > best_ability_score:
                        best_ability_score = score
                        best_ability_action = action

            # 2. Play Members
            play_actions = [i for i in legal_indices if 1 <= i <= 180]
            best_play_action = -1
            best_play_score = -1

            if play_actions:
                # Find current requirements from all live cards in zone
                # Precise "Scanning" of what hearts are missing
                pending_req = np.zeros(7, dtype=np.int32)
                for live_id in p.live_zone:
                    if live_id in state.live_db:
                        pending_req += state.live_db[live_id].required_hearts

                # What we have (excluding hand)
                current_hearts = p.get_total_hearts(state.member_db)

                # Calculate simple missing vector (ignoring Any for a moment to prioritize colors)
                # We really want to find a card that reduces the "Distance" to completion

                for action in play_actions:
                    hand_idx = (action - 1) // 3
                    card_id = p.hand[hand_idx]
                    member = state.member_db[card_id]

                    score = 0

                    # A. Heart Contribution
                    # Does this member provide a heart provided in 'pending_req' that we don't have enough of?
                    prov = member.hearts  # Shape (6,)

                    for c in range(6):
                        if pending_req[c] > current_hearts[c]:
                            # We need this color
                            if prov[c] > 0:
                                score += 20  # HUGE bonus for matching a need

                    # A2. Total Heart Volume (Crucial for 'Any' requirements)
                    total_hearts = prov.sum()
                    score += total_hearts * 5

                    # B. Base Stats
                    score += member.blades  # Power is good
                    score += member.draw_icons * 5  # Drawing is good

                    # C. Cost Efficiency
                    # If we are low on energy, cheap cards are better
                    # But don't punish so hard we don't play at all!
                    untapped_energy = p.count_untapped_energy()
                    if untapped_energy < 1 and member.cost > 1:
                        score -= 2  # Small penalty

                    # D. Slot Efficiency
                    area = (action - 1) % 3
                    if p.stage[area] >= 0:
                        # Replacing a member.
                        prev = state.member_db[p.stage[area]]
                        if prev.hearts.sum() > member.hearts.sum():
                            score -= 5
                    else:
                        score += 5  # Filling empty slot is good

                    if score > best_play_score:
                        best_play_score = score
                        best_play_action = action

            # Decision
            if best_ability_score > 0:
                self.turn_action_counts[best_ability_action] = self.turn_action_counts.get(best_ability_action, 0) + 1
                return int(best_ability_action)

            if best_play_action != -1:
                return int(best_play_action)

            # Pass - but verify it's legal
            if 0 in legal_indices:
                return 0
            return int(legal_indices[0])  # Fallback to first legal

        # Default: pick random non-pass if available
        non_pass = [i for i in legal_indices if i != 0]
        if non_pass:
            return int(np.random.choice(non_pass))
        # Fallback
        return int(legal_indices[0]) if len(legal_indices) > 0 else 0


def generate_random_decks(member_ids, live_ids):
    """Generate two random decks: 40 members + 10 lives in ONE main_deck each"""
    m_pool = list(member_ids)
    l_pool = list(live_ids)

    # Ensure pool is not empty
    if not m_pool:
        m_pool = [0]
    if not l_pool:
        l_pool = [0]

    # Mix members and lives in one deck
    deck1 = [random.choice(m_pool) for _ in range(40)] + [random.choice(l_pool) for _ in range(10)]
    deck2 = [random.choice(m_pool) for _ in range(40)] + [random.choice(l_pool) for _ in range(10)]

    random.shuffle(deck1)
    random.shuffle(deck2)

    return deck1, deck2


def initialize_game(use_real_data: bool = True, cards_path: str = "data/cards.json") -> GameState:
    """Initializes GameState with card data."""
    if use_real_data:
        try:
            loader = CardDataLoader(cards_path)
            m_db, l_db, e_db = loader.load()
            GameState.member_db = m_db
            GameState.live_db = l_db
        except Exception as e:
            print(f"Failed to load real data: {e}")
            GameState.member_db = {}
            GameState.live_db = {}
    else:
        # For testing, ensure dbs are empty or mocked if not loading real data
        GameState.member_db = {}
        GameState.live_db = {}
    return GameState()


def create_easy_cards():
    """Create custom easy cards for testing scoring"""
    import numpy as np
    from game.game_state import LiveCard, MemberCard

    # Easy Member: Cost 1, provides 1 of each heart + 1 blade
    m = MemberCard(
        card_id=888,
        card_no="PL!-sd1-001-SD",  # Correct field name
        name="Easy Member",
        cost=1,
        hearts=np.array([1, 1, 1, 1, 1, 1], dtype=np.int32),
        blade_hearts=np.array([0, 0, 0, 0, 0, 0], dtype=np.int32),
        blades=1,
        volume_icons=0,
        draw_icons=0,
        img_path="cards/PLSD01/PL!-sd1-001-SD.png",
        group="Easy",
    )

    # Easy Live: Score 1, Requires 1 Any Heart
    l = LiveCard(
        card_id=39999,
        card_no="PL!-pb1-019-SD",  # Correct field name
        name="Easy Live",
        score=1,
        required_hearts=np.array([0, 0, 0, 0, 0, 0, 1], dtype=np.int32),
        volume_icons=0,
        draw_icons=0,
        img_path="cards/PLSD01/PL!-pb1-019-SD.png",
        group="Easy",
    )

    return m, l


def setup_game(args):
    # Initialize game state
    use_easy = args.deck_type == "easy"

    state = initialize_game(use_real_data=(not use_easy), cards_path=args.cards_path)

    # Set seed
    np.random.seed(args.seed)
    random.seed(args.seed)

    if use_easy:
        # INJECT EASY CARDS
        m, l = create_easy_cards()
        state.member_db[888] = m
        state.live_db[39999] = l

        # Single main_deck with BOTH Members (40) and Lives (10), shuffled
        for p in state.players:
            m_list = [888] * 48
            l_list = [39999] * 12
            p.main_deck = m_list + l_list
            random.shuffle(p.main_deck)
            p.energy_deck = [40000] * 12
            p.hand = []
            p.energy_zone = []
            p.live_zone = []
            p.discard = []
            p.stage = np.array([-1, -1, -1], dtype=np.int32)
    else:
        # Normal Random Decks (Members + Lives mixed)
        member_keys = list(state.member_db.keys())

        if args.deck_type == "ability_only":
            # Filter for members with abilities
            member_keys = [mid for mid in member_keys if state.member_db[mid].abilities]
            if not member_keys:
                print("WARNING: No members with abilities found! Reverting to all members.")
                member_keys = list(state.member_db.keys())

        deck1, deck2 = generate_random_decks(member_keys, state.live_db.keys())
        state.players[0].main_deck = deck1
        state.players[0].energy_deck = [39999] * 10

        state.players[1].main_deck = deck2
        state.players[1].energy_deck = [39999] * 10

        # Clear hands/zones just in case
        for p in state.players:
            p.hand = []
            p.energy_zone = []

    # Initial Draw (5 cards from main_deck)
    for _ in range(5):
        if state.players[0].main_deck:
            state.players[0].hand.append(state.players[0].main_deck.pop())
        if state.players[1].main_deck:
            state.players[1].hand.append(state.players[1].main_deck.pop())

    # Setup Energy Decks (Rule 6.1.1.3: 12 cards)
    for p in state.players:
        p.energy_deck = [40000] * 12
        p.energy_zone = []
        # Initial Energy (Rule 6.2.1.7: Move 3 cards to energy zone)
        for _ in range(3):
            if p.energy_deck:
                p.energy_zone.append(p.energy_deck.pop(0))

    return state


class AbilityFocusAgent(SmartHeuristicAgent):
    """

    Agent that prioritizes activating abilities and playing cards with abilities.

    Used for stress-testing ability implementations.

    """

    def choose_action(self, state: GameState, player_id: int) -> int:
        legal_mask = state.get_legal_actions()
        legal_indices = np.where(legal_mask)[0]
        if len(legal_indices) == 0:
            return 0

        # If we have pending choices, we MUST choose one of them (usually 500+)
        if state.pending_choices:
            non_zero = [i for i in legal_indices if i != 0]
            if non_zero:
                return int(np.random.choice(non_zero))
            return int(np.random.choice(legal_indices))

        p = state.players[player_id]

        # 1. (LIVE_SET is handled by superclass logic for smarter selection)

        # 2. MAIN Phase Priorities
        if state.phase == Phase.MAIN:
            priority_actions = []

            # Check Play Actions (1-180)
            play_actions = [i for i in legal_indices if 1 <= i <= 180]
            for action_id in play_actions:
                hand_idx = (action_id - 1) // 3
                if hand_idx < len(p.hand):
                    card_id = p.hand[hand_idx]
                    if card_id in state.member_db:
                        card = state.member_db[card_id]
                        if card.abilities:
                            # Massive priority for cards with ON_PLAY or ACTIVATED
                            has_prio = any(a.trigger in (1, 7) for a in card.abilities)  # 1=ON_PLAY, 7=ACTIVATED
                            if has_prio:
                                priority_actions.append(action_id)

            # Check Activated Ability Actions (200-202)
            ability_actions = [i for i in legal_indices if 200 <= i <= 202]
            priority_actions.extend(ability_actions)

            if priority_actions:
                return int(np.random.choice(priority_actions))

        # Fallback to SmartHeuristic if no high-priority ability action found
        return super().choose_action(state, player_id)


class ConservativeAgent(SmartHeuristicAgent):
    """

    Very safe AI. Only sets Live cards if it has strictly sufficient hearts

    available on stage right now (untapped members). Never gambles on future draws.

    """

    def choose_action(self, state: GameState, player_id: int) -> int:
        # Override LIVE_SET phase with ultra-conservative logic
        if state.phase == Phase.LIVE_SET:
            p = state.players[player_id]
            legal_indices = np.where(state.get_legal_actions())[0]
            live_actions = [i for i in legal_indices if 400 <= i <= 459]
            if not live_actions:
                return 0  # Pass

            # ONLY count hearts on stage (no assumptions about future)
            stage_hearts = p.get_total_hearts(state.member_db)

            # Calculate what we already need for pending live cards
            pending_req = np.zeros(7, dtype=np.int32)
            for live_id in p.live_zone:
                if live_id in state.live_db:
                    pending_req += state.live_db[live_id].required_hearts

            best_action = -1
            max_value = -1

            for action in live_actions:
                hand_idx = action - 400
                card_id = p.hand[hand_idx]
                if card_id not in state.live_db:
                    continue

                live = state.live_db[card_id]
                total_req = pending_req + live.required_hearts

                # Ultra-strict feasibility check: need EXACT hearts available
                needed = total_req.copy()
                have = stage_hearts.copy()

                # 1. Check colored hearts (must have exact matches)
                possible = True
                for c in range(6):
                    if have[c] < needed[c]:
                        possible = False
                        break
                    have[c] -= needed[c]
                    needed[c] = 0

                if not possible:
                    continue

                # 2. Check "Any" hearts (must have enough remaining)
                if np.sum(have) < needed[6]:
                    continue

                # If strictly possible, calculate conservative value
                value = live.score * 10
                # Small bonus for having extra hearts (prefer safer plays)
                value += np.sum(have) - needed[6]

                if value > max_value:
                    max_value = value
                    best_action = action

            if best_action != -1:
                return int(best_action)
            return 0  # Pass if no 100% safe plays

        # For all other phases, use SmartHeuristicAgent logic
        return super().choose_action(state, player_id)


class GambleAgent(SmartHeuristicAgent):
    """

    Risk-taking AI. Sets Live cards if it has enough hearts OR if it has

    enough blades on stage to likely get the hearts from yell cards.

    """

    def choose_action(self, state: GameState, player_id: int) -> int:
        if state.phase == Phase.LIVE_SET:
            p = state.players[player_id]
            legal_indices = np.where(state.get_legal_actions())[0]
            live_actions = [i for i in legal_indices if 400 <= i <= 459]
            if not live_actions:
                return 0

            # Current hearts on stage
            stage_hearts = p.get_total_hearts(state.member_db)
            # Total blades on stage (potential yells)
            total_blades = p.get_total_blades(state.member_db)

            # Estimated hearts from yells: Roughly 0.5 hearts per blade?
            # Or simplified: consider blades as "Any" hearts for feasibility check
            est_extra_hearts = total_blades // 2

            best_action = -1
            max_value = -1

            # Pending req
            pending_req = np.zeros(7, dtype=np.int32)
            for live_id in p.live_zone:
                if live_id in state.live_db:
                    pending_req += state.live_db[live_id].required_hearts

            for action in live_actions:
                hand_idx = action - 400
                card_id = p.hand[hand_idx]
                if card_id not in state.live_db:
                    continue

                live = state.live_db[card_id]
                total_req = pending_req + live.required_hearts

                # Feasibility check with "Gamble" factor
                needed = total_req.copy()
                have = stage_hearts.copy()

                # satisfy colors
                possible = True
                for c in range(6):
                    if have[c] < needed[c]:
                        # Can we gamble on this color?
                        # Maybe if we have a lot of blades.
                        # For simplicity, let's say we can only gamble on 'Any'
                        possible = False
                        break
                    have[c] -= needed[c]

                if not possible:
                    continue

                # Any hearts check with gamble
                total_have = np.sum(have) + est_extra_hearts
                if total_have >= needed[6]:
                    value = live.score * 10 + (total_have - needed[6])
                    if value > max_value:
                        max_value = value
                        best_action = action

            if best_action != -1:
                return int(best_action)
            return 0

        return super().choose_action(state, player_id)


class NNAgent(Agent):
    """

    Agent backed by a Neural Network (PyTorch), running on GPU if available.

    """

    def __init__(self, device=None, model_path=None):
        try:
            # Lazy import to avoid hard dependency if not used
            # import torch
            from game.network import NetworkConfig
            from game.network_torch import TorchNetworkWrapper

            self.config = NetworkConfig()
            self.net = TorchNetworkWrapper(self.config, device=device)
            self.device = self.net.device

            if model_path:
                print(f"Loading model from {model_path}...")
                self.net.load(model_path)
            # print(f"NNAgent initialized on device: {self.device}")

        except ImportError as e:
            print(f"WARNING: PyTorch or network modules not found. NNAgent falling back to Random. Error: {e}")
            self.net = None
        except Exception as e:
            print(f"WARNING: Failed to initialize NNAgent: {e}")
            self.net = None

    def choose_action(self, state: GameState, player_id: int) -> int:
        if self.net is None:
            # Fallback to random if failed to load
            legal_mask = state.get_legal_actions()
            legal_indices = np.where(legal_mask)[0]
            return int(np.random.choice(legal_indices)) if len(legal_indices) > 0 else 0

        # Predict policy (this runs on GPU if available)
        policy, value = self.net.predict(state)

        # Choose action based on policy probabilities
        # Direct policy sampling (fastest way to use the network without MCTS)

        # Ensure probabilities sum to 1 (handling float errors)
        policy_sum = policy.sum()
        if policy_sum > 0:
            policy = policy / policy_sum
            return int(np.random.choice(len(policy), p=policy))
        else:
            # Fallback if policy is all zeros (shouldn't happen with proper masking)
            legal_mask = state.get_legal_actions()
            legal_indices = np.where(legal_mask)[0]
            return int(np.random.choice(legal_indices)) if len(legal_indices) > 0 else 0


def run_simulation(args):
    import io

    # We will manage logging manually per game
    root_logger = logging.getLogger()
    root_logger.setLevel(logging.INFO)

    # Console handler for high-level info
    console = logging.StreamHandler()
    console.setLevel(logging.WARNING)  # Only show warnings/errors to console during run
    root_logger.addHandler(console)

    best_combined_score = -1
    best_log_content = ""
    best_game_idx = -1
    best_winner = -1

    results = []

    start_total = time.time()

    for game_idx in range(args.num_games):
        # Capture logs for this game
        log_capture = io.StringIO()
        handler = logging.StreamHandler(log_capture)
        handler.setLevel(logging.INFO)
        # Use a simple format for game logs
        formatter = logging.Formatter("%(message)s")
        handler.setFormatter(formatter)

        root_logger.handlers = [console, handler]  # Replace handlers (keep console)

        # Log Header
        logging.info(f"=== Game {game_idx + 1} ===")

        # Setup Game
        try:
            state = setup_game(args)
            current_seed = args.seed + game_idx
            random.seed(current_seed)
            np.random.seed(current_seed)

            # Agent Selection
            if args.agent == "random":
                p0_agent = RandomAgent()
            elif args.agent == "ability_focus":
                p0_agent = AbilityFocusAgent()
            elif args.agent == "conservative":
                p0_agent = ConservativeAgent()
            elif args.agent == "gamble":
                p0_agent = GambleAgent()
            elif args.agent == "nn":
                p0_agent = NNAgent()
            elif args.agent == "search":
                p0_agent = SearchProbAgent(depth=args.depth)
            else:
                p0_agent = SmartHeuristicAgent()

            # Agent Selection P1
            if args.agent_p2 == "ability_focus":
                p1_agent = AbilityFocusAgent()
            elif args.agent_p2 == "search":
                p1_agent = SearchProbAgent(depth=args.depth)
            elif args.agent_p2 == "smart":
                p1_agent = SmartHeuristicAgent()
            else:
                p1_agent = RandomAgent()

            agents = [p0_agent, p1_agent]

            action_count = 0
            while not state.game_over:
                # Limit safety
                if action_count > args.max_turns:
                    break
                state.check_win_condition()
                if state.game_over:
                    break

                active_pid = state.current_player

                # Detailed Log
                logging.info("-" * 40)
                logging.info(f"Turn {state.turn_number} | Phase {state.phase.name} | Active: P{active_pid}")
                p0 = state.players[0]
                p1 = state.players[1]
                logging.info(f"Score: P0({len(p0.success_lives)}) - P1({len(p1.success_lives)})")
                logging.info(f"Hand: P0({len(p0.hand)}) - P1({len(p1.hand)})")

                # Agent Act
                action = agents[active_pid].choose_action(state, active_pid)
                logging.info(f"Action: P{active_pid} chooses {action}")

                state = state.step(action)
                action_count += 1

            # Game End
            p0_score = len(state.players[0].success_lives)
            p1_score = len(state.players[1].success_lives)
            combined_score = p0_score + p1_score
            winner = state.winner

            logging.info("=" * 40)
            logging.info(f"Game Over. Winner: {winner}. Score: {p0_score}-{p1_score}")

            res = {
                "id": game_idx,
                "winner": winner,
                "score_total": combined_score,
                "p0_score": p0_score,
                "p1_score": p1_score,
                "actions": action_count,
                "game_turns": state.turn_number,
            }
            results.append(res)
            print(f"DEBUG: Game {game_idx} Winner: {winner}")

            # Check if this is the "best" game
            is_win = winner == 0 or winner == 1
            if is_win or combined_score > best_combined_score:
                if is_win and best_winner == -1:
                    print(f"Found a Winner in Game {game_idx + 1}! (Winner: P{winner})")

                best_log_content = log_capture.getvalue()
                best_combined_score = combined_score
                best_winner = winner
                best_game_idx = game_idx  # Added this line to update best_game_idx

            if (game_idx + 1) % 100 == 0:
                print(f"Simulated {game_idx + 1} games... Best Score: {best_combined_score}")

        except Exception as e:
            msg = f"Error in game {game_idx}: {e}"
            print(msg, file=sys.stderr)
            import traceback

            traceback.print_exc()

        finally:
            log_capture.close()

    total_time = time.time() - start_total

    # Write best log
    with open(args.log_file, "w", encoding="utf-8") as f:
        f.write(best_log_content)

    print("\n=== Simulation Complete ===")
    print(f"Total Games Ran: {len(results)}")
    print(f"Total Time: {total_time:.2f}s")

    wins0 = sum(1 for r in results if r["winner"] == 0)
    wins1 = sum(1 for r in results if r["winner"] == 1)
    draws = sum(1 for r in results if r["winner"] == 2)

    print(f"Wins: P0={wins0}, P1={wins1}, Draws={draws}")

    total_actions = sum(r["actions"] for r in results)
    total_game_turns = sum(r["game_turns"] for r in results)

    if total_time > 0:
        print(f"APS (Actions Per Second): {total_actions / total_time:.2f}")
        print(f"TPS (Turns Per Second): {total_game_turns / total_time:.2f}")

    print(
        f"Best Game was Game {best_game_idx + 1} with Score Total {best_combined_score if best_combined_score >= 0 else 0}"
    )
    print(f"Log for best game saved to {args.log_file}")
    import json

    if results:
        print(f"Last Game Summary: {json.dumps(results[-1], indent=2)}")


if __name__ == "__main__":
    # Default path relative to this script
    script_dir = os.path.dirname(os.path.abspath(__file__))
    default_cards_path = os.path.join(script_dir, "..", "engine", "data", "cards.json")

    parser = argparse.ArgumentParser()
    parser.add_argument("--cards_path", default=default_cards_path, help="Path to cards.json")
    parser.add_argument(
        "--deck_type",
        default="normal",
        choices=["normal", "easy", "ability_only"],
        help="Deck type: normal, easy, or ability_only",
    )
    parser.add_argument("--max_turns", type=int, default=1000, help="Max steps/turns to run")
    parser.add_argument("--log_file", default="game_log.txt", help="Output log file")
    parser.add_argument("--seed", type=int, default=42, help="Random seed")
    parser.add_argument("--num_games", type=int, default=1, help="Number of games to run")
    parser.add_argument(
        "--agent",
        default="smart",
        choices=["random", "smart", "ability_focus", "conservative", "gamble", "nn", "search"],
        help="Agent type to control P0",
    )
    parser.add_argument(
        "--agent_p2",
        default="random",
        choices=["random", "smart", "ability_focus", "conservative", "gamble", "nn", "search"],
        help="Agent type to control P1",
    )
    parser.add_argument("--depth", type=int, default=2, help="Search depth for SearchProbAgent")

    args = parser.parse_args()

    run_simulation(args)