rabukasim / tools /debug /scripts /headless_runner_old.py
trioskosmos's picture
Upload folder using huggingface_hub
463f868 verified
import argparse
import logging
import os
import random
import sys
import time
import numpy as np
# Add parent dir to path
sys.path.append(os.path.dirname(os.path.abspath(__file__)))
from game.data_loader import CardDataLoader
from game.game_state import GameState, Phase
class Agent:
def choose_action(self, state: GameState, player_id: int) -> int:
raise NotImplementedError
class TrueRandomAgent(Agent):
"""Completely random agent with no heuristics"""
def choose_action(self, state: GameState, player_id: int) -> int:
legal_mask = state.get_legal_actions()
legal_indices = np.where(legal_mask)[0]
if len(legal_indices) == 0:
return 0
return int(np.random.choice(legal_indices))
class RandomAgent(Agent):
def choose_action(self, state: GameState, player_id: int) -> int:
legal_mask = state.get_legal_actions()
legal_indices = np.where(legal_mask)[0]
if len(legal_indices) == 0:
return 0
# SMART HEURISTICS
non_pass = [i for i in legal_indices if i != 0]
# MULLIGAN: Sometimes confirm (action 0)
if state.phase in (Phase.MULLIGAN_P1, Phase.MULLIGAN_P2):
# 30% chance to confirm, 70% to toggle cards
if random.random() < 0.3:
return 0
mulligan_actions = [i for i in legal_indices if 300 <= i <= 359]
if mulligan_actions:
return int(np.random.choice(mulligan_actions))
return 0
# Priority 1: In LIVE_SET, prioritize setting LIVE cards over passing
if state.phase == Phase.LIVE_SET:
live_set_actions = [i for i in legal_indices if 400 <= i <= 459]
if live_set_actions:
return int(np.random.choice(live_set_actions))
# Priority 2: In MAIN phase, try to play members to stage
if state.phase == Phase.MAIN:
play_actions = [i for i in legal_indices if 1 <= i <= 180]
if play_actions:
# 80% chance to play instead of pass
if random.random() < 0.8:
return int(np.random.choice(play_actions))
# Priority 3: Never pass if ANY other action available
if non_pass:
return int(np.random.choice(non_pass))
return 0
class SmartHeuristicAgent(Agent):
"""Advanced AI with better winning strategies"""
def __init__(self):
self.last_turn_num = -1
self.turn_action_counts = {}
def choose_action(self, state: GameState, player_id: int) -> int:
# --- Loop Protection ---
if state.turn_number != self.last_turn_num:
self.last_turn_num = state.turn_number
self.turn_action_counts = {}
legal_mask = state.get_legal_actions()
legal_indices = np.where(legal_mask)[0]
if len(legal_indices) == 0:
return 0
p = state.players[player_id]
# --- MULLIGAN PHASE ---
if state.phase in (Phase.MULLIGAN_P1, Phase.MULLIGAN_P2):
# Keep members with cost <= 3, discard others and all Live cards
# 300-359: index i is toggled
# Initialize mulligan_selection if not present
if not hasattr(p, "mulligan_selection"):
p.mulligan_selection = set()
to_toggle = []
for i, card_id in enumerate(p.hand):
should_keep = False
if card_id in state.member_db:
member = state.member_db[card_id]
if member.cost <= 3:
should_keep = True
# Check if already marked for return (mulligan_selection is a set of indices)
is_marked = i in p.mulligan_selection
if should_keep and is_marked:
# Unmark keepable card
to_toggle.append(300 + i)
elif not should_keep and not is_marked:
# Mark bad card
to_toggle.append(300 + i)
if to_toggle:
# Ensure scalar return - choice on single element can return array
choice = np.random.choice(to_toggle)
return int(choice) if np.isscalar(choice) else int(choice[0])
return 0 # Confirm
# --- LIVE SET PHASE ---
if state.phase == Phase.LIVE_SET:
live_actions = [i for i in legal_indices if 400 <= i <= 459]
if not live_actions:
return 0 # Pass
current_hearts = p.get_total_hearts(state.member_db)
# Calculate what we already need for pending live cards
pending_req = np.zeros(7, dtype=np.int32)
for live_id in p.live_zone:
if live_id in state.live_db:
pending_req += state.live_db[live_id].required_hearts
# --- Improved LIVE_SET Logic ---
best_action = -1
max_value = -1
for action in live_actions:
hand_idx = action - 400
card_id = p.hand[hand_idx]
if card_id not in state.live_db:
continue
live = state.live_db[card_id]
total_req = pending_req + live.required_hearts
# Check feasibility
needed = total_req.copy()
have = current_hearts.copy()
# 1. Colors
possible = True
for c in range(6):
if have[c] >= needed[c]:
have[c] -= needed[c]
needed[c] = 0
else:
possible = False
break
if not possible:
continue
# 2. Any hearts
if np.sum(have) < needed[6]:
continue
# If possible, calculate value
value = live.score * 10
# Prefer cards we have hearts for
value += np.sum(have) - needed[6]
if value > max_value:
max_value = value
best_action = action
if best_action != -1:
return int(best_action)
return 0 # Pass if no safe plays
# --- MAIN PHASE ---
if state.phase == Phase.MAIN:
# 1. Activate Abilities (Rule of thumb: Draw/Energy > Buff > Damage)
activate_actions = [i for i in legal_indices if 200 <= i <= 202]
best_ability_action = -1
best_ability_score = -1
for action in activate_actions:
area = action - 200
card_id = p.stage[area]
if card_id in state.member_db:
# HEURISTIC: Use 1-step lookahead to detect no-ops or loops
try:
next_state = state.step(action)
next_p = next_state.players[player_id]
# Comparison metrics
hand_delta = len(next_p.hand) - len(p.hand)
energy_delta = len(next_p.energy_zone) - len(p.energy_zone)
tap_delta = np.sum(next_p.tapped_energy) - np.sum(p.tapped_energy)
stage_changed = not np.array_equal(next_p.stage, p.stage)
choice_pending = len(next_state.pending_choices) > 0
# Repeating action penalty
reps = self.turn_action_counts.get(action, 0)
if (
not any([hand_delta > 0, energy_delta > 0, stage_changed, choice_pending])
and tap_delta <= 0
):
# State didn't meaningfully improve for the better (maybe it tapped something but didn't gain)
score = -10
else:
score = 15 if (hand_delta > 0 or energy_delta > 0) else 10
# Apply repetition penalty
score -= reps * 20
except:
score = -100 # Crashes are bad
if score > best_ability_score:
best_ability_score = score
best_ability_action = action
# 2. Play Members
play_actions = [i for i in legal_indices if 1 <= i <= 180]
best_play_action = -1
best_play_score = -1
if play_actions:
# Find current requirements from all live cards in zone
# Precise "Scanning" of what hearts are missing
pending_req = np.zeros(7, dtype=np.int32)
for live_id in p.live_zone:
if live_id in state.live_db:
pending_req += state.live_db[live_id].required_hearts
# What we have (excluding hand)
current_hearts = p.get_total_hearts(state.member_db)
# Calculate simple missing vector (ignoring Any for a moment to prioritize colors)
# We really want to find a card that reduces the "Distance" to completion
for action in play_actions:
hand_idx = (action - 1) // 3
card_id = p.hand[hand_idx]
member = state.member_db[card_id]
score = 0
# A. Heart Contribution
# Does this member provide a heart provided in 'pending_req' that we don't have enough of?
prov = member.hearts # Shape (6,)
matched_need = False
for c in range(6):
if pending_req[c] > current_hearts[c]:
# We need this color
if prov[c] > 0:
score += 20 # HUGE bonus for matching a need
matched_need = True
# A2. Total Heart Volume (Crucial for 'Any' requirements)
total_hearts = prov.sum()
score += total_hearts * 5
# B. Base Stats
score += member.blades # Power is good
score += member.draw_icons * 5 # Drawing is good
# C. Cost Efficiency
# If we are low on energy, cheap cards are better
# But don't punish so hard we don't play at all!
untapped_energy = p.count_untapped_energy()
if untapped_energy < 1 and member.cost > 1:
score -= 2 # Small penalty
# D. Slot Efficiency
area = (action - 1) % 3
if p.stage[area] >= 0:
# Replacing a member.
prev = state.member_db[p.stage[area]]
if prev.hearts.sum() > member.hearts.sum():
score -= 5
else:
score += 5 # Filling empty slot is good
if score > best_play_score:
best_play_score = score
best_play_action = action
# Decision
if best_ability_score > 0:
self.turn_action_counts[best_ability_action] = self.turn_action_counts.get(best_ability_action, 0) + 1
return int(best_ability_action)
if best_play_action != -1:
return int(best_play_action)
return 0 # Pass
# Default: pick random non-pass if available
non_pass = [i for i in legal_indices if i != 0]
if non_pass:
return int(np.random.choice(non_pass))
return 0
def generate_random_decks(member_ids, live_ids):
"""Generate two random decks: 40 members + 10 lives in ONE main_deck each"""
m_pool = list(member_ids)
l_pool = list(live_ids)
# Ensure pool is not empty
if not m_pool:
m_pool = [0]
if not l_pool:
l_pool = [0]
# Mix members and lives in one deck
deck1 = [random.choice(m_pool) for _ in range(40)] + [random.choice(l_pool) for _ in range(10)]
deck2 = [random.choice(m_pool) for _ in range(40)] + [random.choice(l_pool) for _ in range(10)]
random.shuffle(deck1)
random.shuffle(deck2)
return deck1, deck2
def initialize_game(use_real_data: bool = True, cards_path: str = "data/cards.json") -> GameState:
"""Initializes GameState with card data."""
if use_real_data:
try:
loader = CardDataLoader(cards_path)
m_db, l_db, e_db = loader.load()
GameState.member_db = m_db
GameState.live_db = l_db
except Exception as e:
print(f"Failed to load real data: {e}")
GameState.member_db = {}
GameState.live_db = {}
else:
# For testing, ensure dbs are empty or mocked if not loading real data
GameState.member_db = {}
GameState.live_db = {}
return GameState()
def create_easy_cards():
"""Create custom easy cards for testing scoring"""
import numpy as np
from game.game_state import LiveCard, MemberCard
# Easy Member: Cost 1, provides 1 of each heart + 1 blade
m = MemberCard(
card_id=888,
card_no="PL!-sd1-001-SD", # Correct field name
name="Easy Member",
cost=1,
hearts=np.array([1, 1, 1, 1, 1, 1], dtype=np.int32),
blade_hearts=np.array([0, 0, 0, 0, 0, 0], dtype=np.int32),
blades=1,
volume_icons=0,
draw_icons=0,
img_path="cards/PLSD01/PL!-sd1-001-SD.png",
group="Easy",
)
# Easy Live: Score 1, Requires 1 Any Heart
l = LiveCard(
card_id=999,
card_no="PL!-pb1-019-SD", # Correct field name
name="Easy Live",
score=1,
required_hearts=np.array([0, 0, 0, 0, 0, 0, 1], dtype=np.int32),
volume_icons=0,
draw_icons=0,
img_path="cards/PLSD01/PL!-pb1-019-SD.png",
group="Easy",
)
return m, l
def setup_game(args):
# Initialize game state
use_easy = args.deck_type == "easy"
state = initialize_game(use_real_data=(not use_easy), cards_path=args.cards_path)
# Set seed
np.random.seed(args.seed)
random.seed(args.seed)
if use_easy:
# INJECT EASY CARDS
m, l = create_easy_cards()
state.member_db[888] = m
state.live_db[999] = l
# Single main_deck with BOTH Members (40) and Lives (10), shuffled
for p in state.players:
m_list = [888] * 48
l_list = [999] * 12
p.main_deck = m_list + l_list
random.shuffle(p.main_deck)
p.energy_deck = [200] * 12
p.hand = []
p.energy_zone = []
p.live_zone = []
p.discard = []
p.stage = np.array([-1, -1, -1], dtype=np.int32)
else:
# Normal Random Decks (Members + Lives mixed)
member_keys = list(state.member_db.keys())
if args.deck_type == "ability_only":
# Filter for members with abilities
member_keys = [mid for mid in member_keys if state.member_db[mid].abilities]
if not member_keys:
print("WARNING: No members with abilities found! Reverting to all members.")
member_keys = list(state.member_db.keys())
deck1, deck2 = generate_random_decks(member_keys, state.live_db.keys())
state.players[0].main_deck = deck1
state.players[0].energy_deck = [999] * 10
state.players[1].main_deck = deck2
state.players[1].energy_deck = [999] * 10
# Clear hands/zones just in case
for p in state.players:
p.hand = []
p.energy_zone = []
# Initial Draw (5 cards from main_deck)
for _ in range(5):
if state.players[0].main_deck:
state.players[0].hand.append(state.players[0].main_deck.pop())
if state.players[1].main_deck:
state.players[1].hand.append(state.players[1].main_deck.pop())
# Setup Energy Decks (Rule 6.1.1.3: 12 cards)
for p in state.players:
p.energy_deck = [200] * 12
p.energy_zone = []
# Initial Energy (Rule 6.2.1.7: Move 3 cards to energy zone)
for _ in range(3):
if p.energy_deck:
p.energy_zone.append(p.energy_deck.pop(0))
return state
class AbilityFocusAgent(SmartHeuristicAgent):
"""
Agent that prioritizes activating abilities and playing cards with abilities.
Used for stress-testing ability implementations.
"""
def choose_action(self, state: GameState, player_id: int) -> int:
legal_mask = state.get_legal_actions()
legal_indices = np.where(legal_mask)[0]
if len(legal_indices) == 0:
return 0
p = state.players[player_id]
# 1. (LIVE_SET is handled by superclass logic for smarter selection)
# 2. MAIN Phase Priorities
if state.phase == Phase.MAIN:
priority_actions = []
# Check Play Actions (1-180)
play_actions = [i for i in legal_indices if 1 <= i <= 180]
for action_id in play_actions:
hand_idx = (action_id - 1) // 3
if hand_idx < len(p.hand):
card_id = p.hand[hand_idx]
if card_id in state.member_db:
card = state.member_db[card_id]
if card.abilities:
# Massive priority for cards with ON_PLAY or ACTIVATED
has_prio = any(a.trigger in (1, 7) for a in card.abilities) # 1=ON_PLAY, 7=ACTIVATED
if has_prio:
priority_actions.append(action_id)
# Check Activated Ability Actions (600+)
ability_actions = [i for i in legal_indices if i >= 600]
priority_actions.extend(ability_actions)
if priority_actions:
return int(np.random.choice(priority_actions))
# Fallback to SmartHeuristic if no high-priority ability action found
return super().choose_action(state, player_id)
class ConservativeAgent(SmartHeuristicAgent):
"""
Very safe AI. Only sets Live cards if it has strictly sufficient hearts
available on stage right now (untapped members). Never gambles on future draws.
"""
def choose_action(self, state: GameState, player_id: int) -> int:
# Override LIVE_SET phase with ultra-conservative logic
if state.phase == Phase.LIVE_SET:
p = state.players[player_id]
legal_indices = np.where(state.get_legal_actions())[0]
live_actions = [i for i in legal_indices if 400 <= i <= 459]
if not live_actions:
return 0 # Pass
# ONLY count hearts on stage (no assumptions about future)
stage_hearts = p.get_total_hearts(state.member_db)
# Calculate what we already need for pending live cards
pending_req = np.zeros(7, dtype=np.int32)
for live_id in p.live_zone:
if live_id in state.live_db:
pending_req += state.live_db[live_id].required_hearts
best_action = -1
max_value = -1
for action in live_actions:
hand_idx = action - 400
card_id = p.hand[hand_idx]
if card_id not in state.live_db:
continue
live = state.live_db[card_id]
total_req = pending_req + live.required_hearts
# Ultra-strict feasibility check: need EXACT hearts available
needed = total_req.copy()
have = stage_hearts.copy()
# 1. Check colored hearts (must have exact matches)
possible = True
for c in range(6):
if have[c] < needed[c]:
possible = False
break
have[c] -= needed[c]
needed[c] = 0
if not possible:
continue
# 2. Check "Any" hearts (must have enough remaining)
if np.sum(have) < needed[6]:
continue
# If strictly possible, calculate conservative value
value = live.score * 10
# Small bonus for having extra hearts (prefer safer plays)
value += np.sum(have) - needed[6]
if value > max_value:
max_value = value
best_action = action
if best_action != -1:
return int(best_action)
return 0 # Pass if no 100% safe plays
# For all other phases, use SmartHeuristicAgent logic
return super().choose_action(state, player_id)
class GambleAgent(SmartHeuristicAgent):
"""
Risk-taking AI. Sets Live cards if it has enough hearts OR if it has
enough blades on stage to likely get the hearts from yell cards.
"""
def choose_action(self, state: GameState, player_id: int) -> int:
if state.phase == Phase.LIVE_SET:
p = state.players[player_id]
legal_indices = np.where(state.get_legal_actions())[0]
live_actions = [i for i in legal_indices if 400 <= i <= 459]
if not live_actions:
return 0
# Current hearts on stage
stage_hearts = p.get_total_hearts(state.member_db)
# Total blades on stage (potential yells)
total_blades = p.get_total_blades(state.member_db)
# Estimated hearts from yells: Roughly 0.5 hearts per blade?
# Or simplified: consider blades as "Any" hearts for feasibility check
est_extra_hearts = total_blades // 2
best_action = -1
max_value = -1
# Pending req
pending_req = np.zeros(7, dtype=np.int32)
for live_id in p.live_zone:
if live_id in state.live_db:
pending_req += state.live_db[live_id].required_hearts
for action in live_actions:
hand_idx = action - 400
card_id = p.hand[hand_idx]
if card_id not in state.live_db:
continue
live = state.live_db[card_id]
total_req = pending_req + live.required_hearts
# Feasibility check with "Gamble" factor
needed = total_req.copy()
have = stage_hearts.copy()
# satisfy colors
possible = True
for c in range(6):
if have[c] < needed[c]:
# Can we gamble on this color?
# Maybe if we have a lot of blades.
# For simplicity, let's say we can only gamble on 'Any'
possible = False
break
have[c] -= needed[c]
if not possible:
continue
# Any hearts check with gamble
total_have = np.sum(have) + est_extra_hearts
if total_have >= needed[6]:
value = live.score * 10 + (total_have - needed[6])
if value > max_value:
max_value = value
best_action = action
if best_action != -1:
return int(best_action)
return 0
return super().choose_action(state, player_id)
class NNAgent(Agent):
"""
Agent backed by a Neural Network (PyTorch), running on GPU if available.
"""
def __init__(self, device=None):
try:
# Lazy import to avoid hard dependency if not used
import torch
from game.network import NetworkConfig
from game.network_torch import TorchNetworkWrapper
self.config = NetworkConfig()
self.net = TorchNetworkWrapper(self.config, device=device)
self.device = self.net.device
# print(f"NNAgent initialized on device: {self.device}")
except ImportError as e:
print(f"WARNING: PyTorch or network modules not found. NNAgent falling back to Random. Error: {e}")
self.net = None
def choose_action(self, state: GameState, player_id: int) -> int:
if self.net is None:
# Fallback to random if failed to load
legal_mask = state.get_legal_actions()
legal_indices = np.where(legal_mask)[0]
return int(np.random.choice(legal_indices)) if len(legal_indices) > 0 else 0
# Predict policy (this runs on GPU if available)
policy, value = self.net.predict(state)
# Choose action based on policy probabilities
# Direct policy sampling (fastest way to use the network without MCTS)
# Ensure probabilities sum to 1 (handling float errors)
policy_sum = policy.sum()
if policy_sum > 0:
policy = policy / policy_sum
return int(np.random.choice(len(policy), p=policy))
else:
# Fallback if policy is all zeros (shouldn't happen with proper masking)
legal_mask = state.get_legal_actions()
legal_indices = np.where(legal_mask)[0]
return int(np.random.choice(legal_indices)) if len(legal_indices) > 0 else 0
def run_simulation(args):
import io
# We will manage logging manually per game
root_logger = logging.getLogger()
root_logger.setLevel(logging.INFO)
# Console handler for high-level info
console = logging.StreamHandler()
console.setLevel(logging.WARNING) # Only show warnings/errors to console during run
root_logger.addHandler(console)
best_combined_score = -1
best_log_content = ""
best_game_idx = -1
results = []
start_total = time.time()
for game_idx in range(args.num_games):
# Capture logs for this game
log_capture = io.StringIO()
handler = logging.StreamHandler(log_capture)
handler.setLevel(logging.INFO)
# Use a simple format for game logs
formatter = logging.Formatter("%(message)s")
handler.setFormatter(formatter)
root_logger.handlers = [console, handler] # Replace handlers (keep console)
# Log Header
logging.info(f"=== Game {game_idx + 1} ===")
# Setup Game
try:
state = setup_game(args)
current_seed = args.seed + game_idx
random.seed(current_seed)
np.random.seed(current_seed)
# Agent Selection
p0_agent = SmartHeuristicAgent()
if args.agent == "random":
p0_agent = RandomAgent()
elif args.agent == "ability_focus":
p0_agent = AbilityFocusAgent()
elif args.agent == "conservative":
p0_agent = ConservativeAgent()
elif args.agent == "gamble":
p0_agent = GambleAgent()
elif args.agent == "nn":
p0_agent = NNAgent()
# P1 is always Random for now to provide chaos/noise
agents = [p0_agent, RandomAgent()]
turn_count = 0
while turn_count < args.max_turns:
if state.game_over:
break
state.check_win_condition()
if state.game_over:
break
active_pid = state.current_player
# Detailed Log
logging.info("-" * 40)
logging.info(f"Turn {state.turn_number} | Phase {state.phase.name} | Active: P{active_pid}")
p0 = state.players[0]
p1 = state.players[1]
logging.info(f"Score: P0({len(p0.success_lives)}) - P1({len(p1.success_lives)})")
logging.info(f"Hand: P0({len(p0.hand)}) - P1({len(p1.hand)})")
# Agent Act
action = agents[active_pid].choose_action(state, active_pid)
logging.info(f"Action: P{active_pid} chooses {action}")
state = state.step(action)
turn_count += 1
# Game End
p0_score = len(state.players[0].success_lives)
p1_score = len(state.players[1].success_lives)
combined_score = p0_score + p1_score
winner = state.winner
logging.info("=" * 40)
logging.info(f"Game Over. Winner: {winner}. Score: {p0_score}-{p1_score}")
res = {
"id": game_idx,
"winner": winner,
"score_total": combined_score,
"p0_score": p0_score,
"p1_score": p1_score,
"turns": turn_count,
}
results.append(res)
# Check if this is the "best" game
# Priority: Win (0 or 1) > High Score
is_win = winner == 0 or winner == 1
is_best_score = combined_score > best_combined_score
# Simple logic: If we found a winner, that's the absolute best, stop immediately
if is_win:
print(f"Found a Winner in Game {game_idx + 1}! (Winner: P{winner})")
best_log_content = log_capture.getvalue()
best_game_idx = game_idx
break
if is_best_score:
best_combined_score = combined_score
best_log_content = log_capture.getvalue()
best_game_idx = game_idx
if (game_idx + 1) % 100 == 0:
print(f"Simulated {game_idx + 1} games... Best Score: {best_combined_score}")
except Exception as e:
print(f"Error in game {game_idx}: {e}")
import traceback
traceback.print_exc()
finally:
log_capture.close()
total_time = time.time() - start_total
# Write best log
with open(args.log_file, "w", encoding="utf-8") as f:
f.write(best_log_content)
print("\n=== Simulation Complete ===")
print(f"Total Games Ran: {len(results)}")
print(f"Total Time: {total_time:.2f}s")
wins0 = sum(1 for r in results if r["winner"] == 0)
wins1 = sum(1 for r in results if r["winner"] == 1)
draws = sum(1 for r in results if r["winner"] == 2)
print(f"Wins: P0={wins0}, P1={wins1}, Draws={draws}")
print(
f"Best Game was Game {best_game_idx + 1} with Score Total {best_combined_score if best_combined_score >= 0 else 0}"
)
print(f"Log for best game saved to {args.log_file}")
if __name__ == "__main__":
parser = argparse.ArgumentParser()
parser.add_argument("--cards_path", default="data/cards.json", help="Path to cards.json")
parser.add_argument(
"--deck_type",
default="normal",
choices=["normal", "easy", "ability_only"],
help="Deck type: normal, easy, or ability_only",
)
parser.add_argument("--max_turns", type=int, default=1000, help="Max steps/turns to run")
parser.add_argument("--log_file", default="game_log.txt", help="Output log file")
parser.add_argument("--seed", type=int, default=42, help="Random seed")
parser.add_argument("--num_games", type=int, default=1, help="Number of games to run")
parser.add_argument(
"--agent",
default="smart",
choices=["random", "smart", "ability_focus", "conservative", "gamble", "nn"],
help="Agent type to control P0",
)
args = parser.parse_args()
run_simulation(args)