Spaces:

trioskosmos
/

LovecaSim

Running

App Files Files Community

LovecaSim / ai /headless_runner.py

trioskosmos

Upload ai/headless_runner.py with huggingface_hub

69c4849 verified 9 days ago

raw

history blame contribute delete

35.3 kB

	import argparse
	import logging
	import os
	import random
	import sys
	import time

	import numpy as np

	# Add parent dir to path
	# Add parent dir to path (for ai directory)
	sys.path.append(os.path.dirname(os.path.abspath(__file__)))
	# Add engine directory
	# Add project root directory
	sys.path.append(os.path.join(os.path.dirname(os.path.abspath(__file__)), ".."))

	from ai.agents.agent_base import Agent
	from ai.agents.search_prob_agent import SearchProbAgent
	from engine.game.data_loader import CardDataLoader
	from engine.game.game_state import GameState, Phase


	class TrueRandomAgent(Agent):
	"""Completely random agent with no heuristics"""

	def choose_action(self, state: GameState, player_id: int) -> int:
	legal_mask = state.get_legal_actions()
	legal_indices = np.where(legal_mask)[0]
	if len(legal_indices) == 0:
	return 0
	return int(np.random.choice(legal_indices))


	class RandomAgent(Agent):
	def choose_action(self, state: GameState, player_id: int) -> int:
	legal_mask = state.get_legal_actions()
	legal_indices = np.where(legal_mask)[0]
	if len(legal_indices) == 0:
	return 0

	# SMART HEURISTICS
	non_pass = [i for i in legal_indices if i != 0]

	# MULLIGAN: Sometimes confirm (action 0)
	if state.phase in (Phase.MULLIGAN_P1, Phase.MULLIGAN_P2):
	# 30% chance to confirm, 70% to toggle cards
	if random.random() < 0.3:
	return 0
	mulligan_actions = [i for i in legal_indices if 300 <= i <= 359]
	if mulligan_actions:
	return int(np.random.choice(mulligan_actions))
	return 0

	# Priority 1: In LIVE_SET, prioritize setting LIVE cards over passing
	if state.phase == Phase.LIVE_SET:
	live_set_actions = [i for i in legal_indices if 400 <= i <= 459]
	if live_set_actions:
	return int(np.random.choice(live_set_actions))

	# Priority 2: In MAIN phase, try to play members to stage
	if state.phase == Phase.MAIN:
	play_actions = [i for i in legal_indices if 1 <= i <= 180]
	if play_actions:
	# 80% chance to play instead of pass
	if random.random() < 0.8:
	return int(np.random.choice(play_actions))

	# Priority 3: Never pass if ANY other action available
	if non_pass:
	return int(np.random.choice(non_pass))

	return 0


	class SmartHeuristicAgent(Agent):
	"""Advanced AI with better winning strategies"""

	def __init__(self):
	self.last_turn_num = -1
	self.turn_action_counts = {}

	def choose_action(self, state: GameState, player_id: int) -> int:
	# --- Loop Protection ---
	if state.turn_number != self.last_turn_num:
	self.last_turn_num = state.turn_number
	self.turn_action_counts = {}

	legal_mask = state.get_legal_actions()
	legal_indices = np.where(legal_mask)[0]
	if len(legal_indices) == 0:
	return 0

	p = state.players[player_id]

	# --- MULLIGAN PHASE ---
	if state.phase in (Phase.MULLIGAN_P1, Phase.MULLIGAN_P2):
	# Keep members with cost <= 3, discard others and all Live cards
	# 300-359: index i is toggled

	# Initialize mulligan_selection if not present
	if not hasattr(p, "mulligan_selection"):
	p.mulligan_selection = set()

	to_toggle = []
	for i, card_id in enumerate(p.hand):
	should_keep = False
	if card_id in state.member_db:
	member = state.member_db[card_id]
	if member.cost <= 3:
	should_keep = True

	# Check if already marked for return (mulligan_selection is a set of indices)
	is_marked = i in p.mulligan_selection
	if should_keep and is_marked:
	# Unmark keepable card
	to_toggle.append(300 + i)
	elif not should_keep and not is_marked:
	# Mark bad card
	to_toggle.append(300 + i)

	if to_toggle:
	# Filter to only legal toggles
	legal_set = set(legal_indices.tolist())
	valid_toggles = [a for a in to_toggle if a in legal_set]
	if valid_toggles:
	choice = np.random.choice(valid_toggles)
	return int(choice) if np.isscalar(choice) else int(choice[0])
	return 0 # Confirm

	# --- LIVE SET PHASE ---
	if state.phase == Phase.LIVE_SET:
	live_actions = [i for i in legal_indices if 400 <= i <= 459]
	if not live_actions:
	return 0 # Pass

	current_hearts = p.get_total_hearts(state.member_db)

	# Calculate what we already need for pending live cards
	pending_req = np.zeros(7, dtype=np.int32)
	for live_id in p.live_zone:
	if live_id in state.live_db:
	pending_req += state.live_db[live_id].required_hearts

	# --- Improved LIVE_SET Logic ---
	best_action = -1
	max_value = -1

	for action in live_actions:
	hand_idx = action - 400
	card_id = p.hand[hand_idx]
	if card_id not in state.live_db:
	continue

	live = state.live_db[card_id]
	total_req = pending_req + live.required_hearts

	# Check feasibility
	needed = total_req.copy()
	have = current_hearts.copy()

	# 1. Colors
	possible = True
	for c in range(6):
	if have[c] >= needed[c]:
	have[c] -= needed[c]
	needed[c] = 0
	else:
	possible = False
	break

	if not possible:
	continue

	# 2. Any hearts
	if np.sum(have) < needed[6]:
	continue

	# If possible, calculate value
	value = live.score * 10
	# Prefer cards we have hearts for
	value += np.sum(have) - needed[6]

	if value > max_value:
	max_value = value
	best_action = action

	if best_action != -1:
	return int(best_action)
	return 0 # Pass if no safe plays

	# --- MAIN PHASE ---
	if state.phase == Phase.MAIN:
	# 1. Activate Abilities (Rule of thumb: Draw/Energy > Buff > Damage)
	activate_actions = [i for i in legal_indices if 200 <= i <= 202]
	best_ability_action = -1
	best_ability_score = -1

	for action in activate_actions:
	area = action - 200
	card_id = p.stage[area]
	if card_id in state.member_db:
	# HEURISTIC: Use 1-step lookahead to detect no-ops or loops
	try:
	next_state = state.step(action)
	next_p = next_state.players[player_id]

	# Comparison metrics
	hand_delta = len(next_p.hand) - len(p.hand)
	energy_delta = len(next_p.energy_zone) - len(p.energy_zone)
	tap_delta = np.sum(next_p.tapped_energy) - np.sum(p.tapped_energy)
	stage_changed = not np.array_equal(next_p.stage, p.stage)
	choice_pending = len(next_state.pending_choices) > 0

	# Repeating action penalty
	reps = self.turn_action_counts.get(action, 0)

	if (
	not any([hand_delta > 0, energy_delta > 0, stage_changed, choice_pending])
	and tap_delta <= 0
	):
	# State didn't meaningfully improve for the better (maybe it tapped something but didn't gain)
	score = -10
	else:
	score = 15 if (hand_delta > 0 or energy_delta > 0) else 10

	# Apply repetition penalty
	score -= reps * 20

	except Exception:
	score = -100 # Crashes are bad

	if score > best_ability_score:
	best_ability_score = score
	best_ability_action = action

	# 2. Play Members
	play_actions = [i for i in legal_indices if 1 <= i <= 180]
	best_play_action = -1
	best_play_score = -1

	if play_actions:
	# Find current requirements from all live cards in zone
	# Precise "Scanning" of what hearts are missing
	pending_req = np.zeros(7, dtype=np.int32)
	for live_id in p.live_zone:
	if live_id in state.live_db:
	pending_req += state.live_db[live_id].required_hearts

	# What we have (excluding hand)
	current_hearts = p.get_total_hearts(state.member_db)

	# Calculate simple missing vector (ignoring Any for a moment to prioritize colors)
	# We really want to find a card that reduces the "Distance" to completion

	for action in play_actions:
	hand_idx = (action - 1) // 3
	card_id = p.hand[hand_idx]
	member = state.member_db[card_id]

	score = 0

	# A. Heart Contribution
	# Does this member provide a heart provided in 'pending_req' that we don't have enough of?
	prov = member.hearts # Shape (6,)

	for c in range(6):
	if pending_req[c] > current_hearts[c]:
	# We need this color
	if prov[c] > 0:
	score += 20 # HUGE bonus for matching a need

	# A2. Total Heart Volume (Crucial for 'Any' requirements)
	total_hearts = prov.sum()
	score += total_hearts * 5

	# B. Base Stats
	score += member.blades # Power is good
	score += member.draw_icons * 5 # Drawing is good

	# C. Cost Efficiency
	# If we are low on energy, cheap cards are better
	# But don't punish so hard we don't play at all!
	untapped_energy = p.count_untapped_energy()
	if untapped_energy < 1 and member.cost > 1:
	score -= 2 # Small penalty

	# D. Slot Efficiency
	area = (action - 1) % 3
	if p.stage[area] >= 0:
	# Replacing a member.
	prev = state.member_db[p.stage[area]]
	if prev.hearts.sum() > member.hearts.sum():
	score -= 5
	else:
	score += 5 # Filling empty slot is good

	if score > best_play_score:
	best_play_score = score
	best_play_action = action

	# Decision
	if best_ability_score > 0:
	self.turn_action_counts[best_ability_action] = self.turn_action_counts.get(best_ability_action, 0) + 1
	return int(best_ability_action)

	if best_play_action != -1:
	return int(best_play_action)

	# Pass - but verify it's legal
	if 0 in legal_indices:
	return 0
	return int(legal_indices[0]) # Fallback to first legal

	# Default: pick random non-pass if available
	non_pass = [i for i in legal_indices if i != 0]
	if non_pass:
	return int(np.random.choice(non_pass))
	# Fallback
	return int(legal_indices[0]) if len(legal_indices) > 0 else 0


	def generate_random_decks(member_ids, live_ids):
	"""Generate two random decks: 40 members + 10 lives in ONE main_deck each"""
	m_pool = list(member_ids)
	l_pool = list(live_ids)

	# Ensure pool is not empty
	if not m_pool:
	m_pool = [0]
	if not l_pool:
	l_pool = [0]

	# Mix members and lives in one deck
	deck1 = [random.choice(m_pool) for _ in range(40)] + [random.choice(l_pool) for _ in range(10)]
	deck2 = [random.choice(m_pool) for _ in range(40)] + [random.choice(l_pool) for _ in range(10)]

	random.shuffle(deck1)
	random.shuffle(deck2)

	return deck1, deck2


	def initialize_game(use_real_data: bool = True, cards_path: str = "data/cards.json") -> GameState:
	"""Initializes GameState with card data."""
	if use_real_data:
	try:
	loader = CardDataLoader(cards_path)
	m_db, l_db, e_db = loader.load()
	GameState.member_db = m_db
	GameState.live_db = l_db
	except Exception as e:
	print(f"Failed to load real data: {e}")
	GameState.member_db = {}
	GameState.live_db = {}
	else:
	# For testing, ensure dbs are empty or mocked if not loading real data
	GameState.member_db = {}
	GameState.live_db = {}
	return GameState()


	def create_easy_cards():
	"""Create custom easy cards for testing scoring"""
	import numpy as np
	from game.game_state import LiveCard, MemberCard

	# Easy Member: Cost 1, provides 1 of each heart + 1 blade
	m = MemberCard(
	card_id=888,
	card_no="PL!-sd1-001-SD", # Correct field name
	name="Easy Member",
	cost=1,
	hearts=np.array([1, 1, 1, 1, 1, 1], dtype=np.int32),
	blade_hearts=np.array([0, 0, 0, 0, 0, 0], dtype=np.int32),
	blades=1,
	volume_icons=0,
	draw_icons=0,
	img_path="cards/PLSD01/PL!-sd1-001-SD.png",
	group="Easy",
	)

	# Easy Live: Score 1, Requires 1 Any Heart
	l = LiveCard(
	card_id=39999,
	card_no="PL!-pb1-019-SD", # Correct field name
	name="Easy Live",
	score=1,
	required_hearts=np.array([0, 0, 0, 0, 0, 0, 1], dtype=np.int32),
	volume_icons=0,
	draw_icons=0,
	img_path="cards/PLSD01/PL!-pb1-019-SD.png",
	group="Easy",
	)

	return m, l


	def setup_game(args):
	# Initialize game state
	use_easy = args.deck_type == "easy"

	state = initialize_game(use_real_data=(not use_easy), cards_path=args.cards_path)

	# Set seed
	np.random.seed(args.seed)
	random.seed(args.seed)

	if use_easy:
	# INJECT EASY CARDS
	m, l = create_easy_cards()
	state.member_db[888] = m
	state.live_db[39999] = l

	# Single main_deck with BOTH Members (40) and Lives (10), shuffled
	for p in state.players:
	m_list = [888] * 48
	l_list = [39999] * 12
	p.main_deck = m_list + l_list
	random.shuffle(p.main_deck)
	p.energy_deck = [40000] * 12
	p.hand = []
	p.energy_zone = []
	p.live_zone = []
	p.discard = []
	p.stage = np.array([-1, -1, -1], dtype=np.int32)
	else:
	# Normal Random Decks (Members + Lives mixed)
	member_keys = list(state.member_db.keys())

	if args.deck_type == "ability_only":
	# Filter for members with abilities
	member_keys = [mid for mid in member_keys if state.member_db[mid].abilities]
	if not member_keys:
	print("WARNING: No members with abilities found! Reverting to all members.")
	member_keys = list(state.member_db.keys())

	deck1, deck2 = generate_random_decks(member_keys, state.live_db.keys())
	state.players[0].main_deck = deck1
	state.players[0].energy_deck = [39999] * 10

	state.players[1].main_deck = deck2
	state.players[1].energy_deck = [39999] * 10

	# Clear hands/zones just in case
	for p in state.players:
	p.hand = []
	p.energy_zone = []

	# Initial Draw (5 cards from main_deck)
	for _ in range(5):
	if state.players[0].main_deck:
	state.players[0].hand.append(state.players[0].main_deck.pop())
	if state.players[1].main_deck:
	state.players[1].hand.append(state.players[1].main_deck.pop())

	# Setup Energy Decks (Rule 6.1.1.3: 12 cards)
	for p in state.players:
	p.energy_deck = [40000] * 12
	p.energy_zone = []
	# Initial Energy (Rule 6.2.1.7: Move 3 cards to energy zone)
	for _ in range(3):
	if p.energy_deck:
	p.energy_zone.append(p.energy_deck.pop(0))

	return state


	class AbilityFocusAgent(SmartHeuristicAgent):
	"""
	Agent that prioritizes activating abilities and playing cards with abilities.
	Used for stress-testing ability implementations.
	"""

	def choose_action(self, state: GameState, player_id: int) -> int:
	legal_mask = state.get_legal_actions()
	legal_indices = np.where(legal_mask)[0]
	if len(legal_indices) == 0:
	return 0

	# If we have pending choices, we MUST choose one of them (usually 500+)
	if state.pending_choices:
	non_zero = [i for i in legal_indices if i != 0]
	if non_zero:
	return int(np.random.choice(non_zero))
	return int(np.random.choice(legal_indices))

	p = state.players[player_id]

	# 1. (LIVE_SET is handled by superclass logic for smarter selection)

	# 2. MAIN Phase Priorities
	if state.phase == Phase.MAIN:
	priority_actions = []

	# Check Play Actions (1-180)
	play_actions = [i for i in legal_indices if 1 <= i <= 180]
	for action_id in play_actions:
	hand_idx = (action_id - 1) // 3
	if hand_idx < len(p.hand):
	card_id = p.hand[hand_idx]
	if card_id in state.member_db:
	card = state.member_db[card_id]
	if card.abilities:
	# Massive priority for cards with ON_PLAY or ACTIVATED
	has_prio = any(a.trigger in (1, 7) for a in card.abilities) # 1=ON_PLAY, 7=ACTIVATED
	if has_prio:
	priority_actions.append(action_id)

	# Check Activated Ability Actions (200-202)
	ability_actions = [i for i in legal_indices if 200 <= i <= 202]
	priority_actions.extend(ability_actions)

	if priority_actions:
	return int(np.random.choice(priority_actions))

	# Fallback to SmartHeuristic if no high-priority ability action found
	return super().choose_action(state, player_id)


	class ConservativeAgent(SmartHeuristicAgent):
	"""
	Very safe AI. Only sets Live cards if it has strictly sufficient hearts
	available on stage right now (untapped members). Never gambles on future draws.
	"""

	def choose_action(self, state: GameState, player_id: int) -> int:
	# Override LIVE_SET phase with ultra-conservative logic
	if state.phase == Phase.LIVE_SET:
	p = state.players[player_id]
	legal_indices = np.where(state.get_legal_actions())[0]
	live_actions = [i for i in legal_indices if 400 <= i <= 459]
	if not live_actions:
	return 0 # Pass

	# ONLY count hearts on stage (no assumptions about future)
	stage_hearts = p.get_total_hearts(state.member_db)

	# Calculate what we already need for pending live cards
	pending_req = np.zeros(7, dtype=np.int32)
	for live_id in p.live_zone:
	if live_id in state.live_db:
	pending_req += state.live_db[live_id].required_hearts

	best_action = -1
	max_value = -1

	for action in live_actions:
	hand_idx = action - 400
	card_id = p.hand[hand_idx]
	if card_id not in state.live_db:
	continue

	live = state.live_db[card_id]
	total_req = pending_req + live.required_hearts

	# Ultra-strict feasibility check: need EXACT hearts available
	needed = total_req.copy()
	have = stage_hearts.copy()

	# 1. Check colored hearts (must have exact matches)
	possible = True
	for c in range(6):
	if have[c] < needed[c]:
	possible = False
	break
	have[c] -= needed[c]
	needed[c] = 0

	if not possible:
	continue

	# 2. Check "Any" hearts (must have enough remaining)
	if np.sum(have) < needed[6]:
	continue

	# If strictly possible, calculate conservative value
	value = live.score * 10
	# Small bonus for having extra hearts (prefer safer plays)
	value += np.sum(have) - needed[6]

	if value > max_value:
	max_value = value
	best_action = action

	if best_action != -1:
	return int(best_action)
	return 0 # Pass if no 100% safe plays

	# For all other phases, use SmartHeuristicAgent logic
	return super().choose_action(state, player_id)


	class GambleAgent(SmartHeuristicAgent):
	"""
	Risk-taking AI. Sets Live cards if it has enough hearts OR if it has
	enough blades on stage to likely get the hearts from yell cards.
	"""

	def choose_action(self, state: GameState, player_id: int) -> int:
	if state.phase == Phase.LIVE_SET:
	p = state.players[player_id]
	legal_indices = np.where(state.get_legal_actions())[0]
	live_actions = [i for i in legal_indices if 400 <= i <= 459]
	if not live_actions:
	return 0

	# Current hearts on stage
	stage_hearts = p.get_total_hearts(state.member_db)
	# Total blades on stage (potential yells)
	total_blades = p.get_total_blades(state.member_db)

	# Estimated hearts from yells: Roughly 0.5 hearts per blade?
	# Or simplified: consider blades as "Any" hearts for feasibility check
	est_extra_hearts = total_blades // 2

	best_action = -1
	max_value = -1

	# Pending req
	pending_req = np.zeros(7, dtype=np.int32)
	for live_id in p.live_zone:
	if live_id in state.live_db:
	pending_req += state.live_db[live_id].required_hearts

	for action in live_actions:
	hand_idx = action - 400
	card_id = p.hand[hand_idx]
	if card_id not in state.live_db:
	continue

	live = state.live_db[card_id]
	total_req = pending_req + live.required_hearts

	# Feasibility check with "Gamble" factor
	needed = total_req.copy()
	have = stage_hearts.copy()

	# satisfy colors
	possible = True
	for c in range(6):
	if have[c] < needed[c]:
	# Can we gamble on this color?
	# Maybe if we have a lot of blades.
	# For simplicity, let's say we can only gamble on 'Any'
	possible = False
	break
	have[c] -= needed[c]

	if not possible:
	continue

	# Any hearts check with gamble
	total_have = np.sum(have) + est_extra_hearts
	if total_have >= needed[6]:
	value = live.score * 10 + (total_have - needed[6])
	if value > max_value:
	max_value = value
	best_action = action

	if best_action != -1:
	return int(best_action)
	return 0

	return super().choose_action(state, player_id)


	class NNAgent(Agent):
	"""
	Agent backed by a Neural Network (PyTorch), running on GPU if available.
	"""

	def __init__(self, device=None, model_path=None):
	try:
	# Lazy import to avoid hard dependency if not used
	# import torch
	from game.network import NetworkConfig
	from game.network_torch import TorchNetworkWrapper

	self.config = NetworkConfig()
	self.net = TorchNetworkWrapper(self.config, device=device)
	self.device = self.net.device

	if model_path:
	print(f"Loading model from {model_path}...")
	self.net.load(model_path)
	# print(f"NNAgent initialized on device: {self.device}")

	except ImportError as e:
	print(f"WARNING: PyTorch or network modules not found. NNAgent falling back to Random. Error: {e}")
	self.net = None
	except Exception as e:
	print(f"WARNING: Failed to initialize NNAgent: {e}")
	self.net = None

	def choose_action(self, state: GameState, player_id: int) -> int:
	if self.net is None:
	# Fallback to random if failed to load
	legal_mask = state.get_legal_actions()
	legal_indices = np.where(legal_mask)[0]
	return int(np.random.choice(legal_indices)) if len(legal_indices) > 0 else 0

	# Predict policy (this runs on GPU if available)
	policy, value = self.net.predict(state)

	# Choose action based on policy probabilities
	# Direct policy sampling (fastest way to use the network without MCTS)

	# Ensure probabilities sum to 1 (handling float errors)
	policy_sum = policy.sum()
	if policy_sum > 0:
	policy = policy / policy_sum
	return int(np.random.choice(len(policy), p=policy))
	else:
	# Fallback if policy is all zeros (shouldn't happen with proper masking)
	legal_mask = state.get_legal_actions()
	legal_indices = np.where(legal_mask)[0]
	return int(np.random.choice(legal_indices)) if len(legal_indices) > 0 else 0


	def run_simulation(args):
	import io

	# We will manage logging manually per game
	root_logger = logging.getLogger()
	root_logger.setLevel(logging.INFO)

	# Console handler for high-level info
	console = logging.StreamHandler()
	console.setLevel(logging.WARNING) # Only show warnings/errors to console during run
	root_logger.addHandler(console)

	best_combined_score = -1
	best_log_content = ""
	best_game_idx = -1
	best_winner = -1

	results = []

	start_total = time.time()

	for game_idx in range(args.num_games):
	# Capture logs for this game
	log_capture = io.StringIO()
	handler = logging.StreamHandler(log_capture)
	handler.setLevel(logging.INFO)
	# Use a simple format for game logs
	formatter = logging.Formatter("%(message)s")
	handler.setFormatter(formatter)

	root_logger.handlers = [console, handler] # Replace handlers (keep console)

	# Log Header
	logging.info(f"=== Game {game_idx + 1} ===")

	# Setup Game
	try:
	state = setup_game(args)
	current_seed = args.seed + game_idx
	random.seed(current_seed)
	np.random.seed(current_seed)

	# Agent Selection
	if args.agent == "random":
	p0_agent = RandomAgent()
	elif args.agent == "ability_focus":
	p0_agent = AbilityFocusAgent()
	elif args.agent == "conservative":
	p0_agent = ConservativeAgent()
	elif args.agent == "gamble":
	p0_agent = GambleAgent()
	elif args.agent == "nn":
	p0_agent = NNAgent()
	elif args.agent == "search":
	p0_agent = SearchProbAgent(depth=args.depth)
	else:
	p0_agent = SmartHeuristicAgent()

	# Agent Selection P1
	if args.agent_p2 == "ability_focus":
	p1_agent = AbilityFocusAgent()
	elif args.agent_p2 == "search":
	p1_agent = SearchProbAgent(depth=args.depth)
	elif args.agent_p2 == "smart":
	p1_agent = SmartHeuristicAgent()
	else:
	p1_agent = RandomAgent()

	agents = [p0_agent, p1_agent]

	action_count = 0
	while not state.game_over:
	# Limit safety
	if action_count > args.max_turns:
	break
	state.check_win_condition()
	if state.game_over:
	break

	active_pid = state.current_player

	# Detailed Log
	logging.info("-" * 40)
	logging.info(f"Turn {state.turn_number} \| Phase {state.phase.name} \| Active: P{active_pid}")
	p0 = state.players[0]
	p1 = state.players[1]
	logging.info(f"Score: P0({len(p0.success_lives)}) - P1({len(p1.success_lives)})")
	logging.info(f"Hand: P0({len(p0.hand)}) - P1({len(p1.hand)})")

	# Agent Act
	action = agents[active_pid].choose_action(state, active_pid)
	logging.info(f"Action: P{active_pid} chooses {action}")

	state = state.step(action)
	action_count += 1

	# Game End
	p0_score = len(state.players[0].success_lives)
	p1_score = len(state.players[1].success_lives)
	combined_score = p0_score + p1_score
	winner = state.winner

	logging.info("=" * 40)
	logging.info(f"Game Over. Winner: {winner}. Score: {p0_score}-{p1_score}")

	res = {
	"id": game_idx,
	"winner": winner,
	"score_total": combined_score,
	"p0_score": p0_score,
	"p1_score": p1_score,
	"actions": action_count,
	"game_turns": state.turn_number,
	}
	results.append(res)
	print(f"DEBUG: Game {game_idx} Winner: {winner}")

	# Check if this is the "best" game
	is_win = winner == 0 or winner == 1
	if is_win or combined_score > best_combined_score:
	if is_win and best_winner == -1:
	print(f"Found a Winner in Game {game_idx + 1}! (Winner: P{winner})")

	best_log_content = log_capture.getvalue()
	best_combined_score = combined_score
	best_winner = winner
	best_game_idx = game_idx # Added this line to update best_game_idx

	if (game_idx + 1) % 100 == 0:
	print(f"Simulated {game_idx + 1} games... Best Score: {best_combined_score}")

	except Exception as e:
	msg = f"Error in game {game_idx}: {e}"
	print(msg, file=sys.stderr)
	import traceback

	traceback.print_exc()

	finally:
	log_capture.close()

	total_time = time.time() - start_total

	# Write best log
	with open(args.log_file, "w", encoding="utf-8") as f:
	f.write(best_log_content)

	print("\n=== Simulation Complete ===")
	print(f"Total Games Ran: {len(results)}")
	print(f"Total Time: {total_time:.2f}s")

	wins0 = sum(1 for r in results if r["winner"] == 0)
	wins1 = sum(1 for r in results if r["winner"] == 1)
	draws = sum(1 for r in results if r["winner"] == 2)

	print(f"Wins: P0={wins0}, P1={wins1}, Draws={draws}")

	total_actions = sum(r["actions"] for r in results)
	total_game_turns = sum(r["game_turns"] for r in results)

	if total_time > 0:
	print(f"APS (Actions Per Second): {total_actions / total_time:.2f}")
	print(f"TPS (Turns Per Second): {total_game_turns / total_time:.2f}")

	print(
	f"Best Game was Game {best_game_idx + 1} with Score Total {best_combined_score if best_combined_score >= 0 else 0}"
	)
	print(f"Log for best game saved to {args.log_file}")
	import json

	if results:
	print(f"Last Game Summary: {json.dumps(results[-1], indent=2)}")


	if __name__ == "__main__":
	# Default path relative to this script
	script_dir = os.path.dirname(os.path.abspath(__file__))
	default_cards_path = os.path.join(script_dir, "..", "engine", "data", "cards.json")

	parser = argparse.ArgumentParser()
	parser.add_argument("--cards_path", default=default_cards_path, help="Path to cards.json")
	parser.add_argument(
	"--deck_type",
	default="normal",
	choices=["normal", "easy", "ability_only"],
	help="Deck type: normal, easy, or ability_only",
	)
	parser.add_argument("--max_turns", type=int, default=1000, help="Max steps/turns to run")
	parser.add_argument("--log_file", default="game_log.txt", help="Output log file")
	parser.add_argument("--seed", type=int, default=42, help="Random seed")
	parser.add_argument("--num_games", type=int, default=1, help="Number of games to run")
	parser.add_argument(
	"--agent",
	default="smart",
	choices=["random", "smart", "ability_focus", "conservative", "gamble", "nn", "search"],
	help="Agent type to control P0",
	)
	parser.add_argument(
	"--agent_p2",
	default="random",
	choices=["random", "smart", "ability_focus", "conservative", "gamble", "nn", "search"],
	help="Agent type to control P1",
	)
	parser.add_argument("--depth", type=int, default=2, help="Search depth for SearchProbAgent")

	args = parser.parse_args()

	run_simulation(args)