Spaces:

k-l-lambda
/

trigo

Sleeping

File size: 27,530 Bytes

15f353f

/**
 * Monte Carlo Tree Search (MCTS) Agent for Trigo
 *
 * Implements AlphaGo Zero-style MCTS with:
 * - PUCT (Polynomial Upper Confidence Trees) selection
 * - Neural network guidance for policy and value
 * - Visit count statistics for training data generation
 *
 * Based on: Silver et al., "Mastering the Game of Go without Human Knowledge"
 */

import { TrigoGame } from "./trigo/game";
import type { Move } from "./trigo/types";
import { TrigoTreeAgent } from "./trigoTreeAgent";
import { TrigoEvaluationAgent } from "./trigoEvaluationAgent";


/**
 * MCTS Configuration
 */
export interface MCTSConfig {
	numSimulations: number;      // Number of MCTS simulations per move (default: 600)
	cPuct: number;               // PUCT exploration constant (default: 1.0)
	temperature: number;         // Selection temperature for first 30 moves (default: 1.0)
	dirichletAlpha: number;      // Dirichlet noise alpha parameter (default: 0.03)
	dirichletEpsilon: number;    // Dirichlet noise mixing weight (default: 0.25)
}


/**
 * MCTS Tree Node
 * Stores search statistics for all legal actions from a given game state
 *
 * Memory optimization: Only root node stores the full game state.
 * Non-root nodes only store the action that led to them.
 * During simulation, a working state is cloned once and mutated along the path.
 */
interface MCTSNode {
	state: TrigoGame | null;     // Game state (only stored at root node for memory efficiency)
	parent: MCTSNode | null;     // Parent node (null for root)
	action: Move | null;         // Action that led to this node (null for root)

	// MCTS statistics per action (action key -> value)
	N: Map<string, number>;      // Visit counts N(s,a)
	W: Map<string, number>;      // Total action-value W(s,a)
	Q: Map<string, number>;      // Mean action-value Q(s,a) = W(s,a) / N(s,a)
	P: Map<string, number>;      // Prior probabilities P(s,a) from policy network

	children: Map<string, MCTSNode>;  // Child nodes (action key -> child node)
	expanded: boolean;           // Whether this node has been expanded
	terminalValue: number | null; // Cached terminal value (null if not terminal or not computed)

	// Terminal propagation optimization (GPT-5.1 suggestions)
	depth: number;               // Distance from root (0 for root)
	playerToMove: number;        // Player to move at this node (1=Black, 2=White)
}


/**
 * MCTS Agent
 * Combines tree search with neural network evaluation
 */
export class MCTSAgent {
	private treeAgent: TrigoTreeAgent;           // For policy priors
	private evaluationAgent: TrigoEvaluationAgent;  // For value evaluation
	private config: MCTSConfig;
	public debugMode: boolean = false;  // Enable debug logging


	constructor(
		treeAgent: TrigoTreeAgent,
		evaluationAgent: TrigoEvaluationAgent,
		config: Partial<MCTSConfig> = {}
	) {
		this.treeAgent = treeAgent;
		this.evaluationAgent = evaluationAgent;

		// Default configuration (AlphaGo Zero-inspired)
		this.config = {
			numSimulations: config.numSimulations ?? 600,
			cPuct: config.cPuct ?? 1.0,
			temperature: config.temperature ?? 1.0,
			dirichletAlpha: config.dirichletAlpha ?? 0.03,
			dirichletEpsilon: config.dirichletEpsilon ?? 0.25
		};
	}


	/**
	 * Select best move using MCTS
	 *
	 * @param game Current game state
	 * @param moveNumber Move number (for temperature schedule)
	 * @returns Selected move with visit count statistics
	 */
	async selectMove(game: TrigoGame, moveNumber: number): Promise<{
		move: Move;
		visitCounts: Map<string, number>;
		searchPolicy: Map<string, number>;  // Normalized visit counts π(a|s)
		rootValue: number;
	}> {
		// Create root node
		const root = this.createNode(game, null, null);

		// Check if root is already terminal (game over)
		const terminalResult = this.checkTerminal(game);
		if (terminalResult !== null) {
			const currentPlayer = game.getCurrentPlayer();
			return {
				move: { player: currentPlayer === 1 ? "black" : "white", isPass: true },
				visitCounts: new Map(),
				searchPolicy: new Map(),
				rootValue: terminalResult
			};
		}

		// Run MCTS simulations
		for (let i = 0; i < this.config.numSimulations; i++) {
			await this.runSimulation(root, i);
		}

		// Temperature schedule: τ=1 for first 30 moves, τ→0 afterward
		const temperature = moveNumber < 30 ? this.config.temperature : 0.01;

		// Select move based on visit counts
		const move = this.selectPlayAction(root, temperature);

		// Set correct player for returned move
		const currentPlayer = game.getCurrentPlayer();
		move.player = currentPlayer === 1 ? "black" : "white";

		// Compute search policy (normalized visit counts)
		const searchPolicy = this.computeSearchPolicy(root, temperature);

		// Get root value estimate (average Q-value weighted by visit counts)
		const rootValue = this.getRootValue(root);

		return {
			move,
			visitCounts: new Map(root.N),
			searchPolicy,
			rootValue
		};
	}


	/**
	 * Run a single MCTS simulation
	 * Select -> Expand & Evaluate -> Backup
	 *
	 * Memory optimization: Clone state once at start, mutate along path.
	 * This reduces memory from O(nodes) to O(simulations).
	 */
	private async runSimulation(root: MCTSNode, simIndex?: number): Promise<void> {
		// Invariant: root node must always have a non-null state
		if (!root.state) {
			throw new Error("runSimulation: root node must have a non-null state");
		}

		// Clone root state once for this simulation
		const workingState = root.state.clone();

		// 1. Selection: Traverse tree using PUCT until reaching unexpanded node
		const { node, path } = this.select(root, workingState);

		// 2. Expand and Evaluate: Get value from neural network
		const value = await this.expandAndEvaluate(node, workingState);

		// Debug logging
		if (this.debugMode && simIndex !== undefined && simIndex < 10) {
			const pathStr = path.map(p => p.actionKey).join(" → ");
			const terminalStr = node.terminalValue !== null ? " [TERMINAL]" : "";
			console.log(`Sim ${simIndex + 1}: ${pathStr || "(root)"} → value=${value.toFixed(4)}${terminalStr}`);
		}

		// 3. Backup: Propagate value up the tree
		this.backup(path, value);
	}


	/**
	 * Selection phase: Traverse tree using PUCT
	 *
	 * @param root Root node to start selection from
	 * @param workingState Mutable game state that gets updated along the path
	 * @returns Leaf node and path taken
	 */
	private select(root: MCTSNode, workingState: TrigoGame): {
		node: MCTSNode;
		path: Array<{ node: MCTSNode; actionKey: string }>;
	} {
		const path: Array<{ node: MCTSNode; actionKey: string }> = [];
		let node = root;

		// Traverse until we reach an unexpanded node
		while (node.expanded) {
			// GPT-5.1 recommendation: Stop at terminal nodes immediately
			// Terminal nodes should not be expanded or evaluated further
			if (node.terminalValue !== null) {
				break;  // Return terminal node, use its cached value
			}

			// Get all legal actions
			const actionKeys = Array.from(node.P.keys());

			// Terminal node check: if expanded but no actions, this is a terminal node
			if (actionKeys.length === 0) {
				break;  // Return this terminal node as leaf
			}

			// Select action with best PUCT value
			// Both players select HIGHEST PUCT value:
			// - Black: PUCT = -Q + U, max PUCT = max(-Q) = min(Q) ✓
			// - White: PUCT = Q + U, max PUCT = max(Q) ✓
			const currentPlayer = workingState.getCurrentPlayer();
			const isWhite = currentPlayer === 2;

			let bestActionKey = actionKeys[0];
			let bestPuct = this.calculatePUCT(node, bestActionKey, isWhite);

			for (let i = 1; i < actionKeys.length; i++) {
				const actionKey = actionKeys[i];
				const puct = this.calculatePUCT(node, actionKey, isWhite);

				if (puct > bestPuct) {
					bestPuct = puct;
					bestActionKey = actionKey;
				}
			}

			// Record path
			path.push({ node, actionKey: bestActionKey });

			// Apply action to working state (instead of cloning)
			const action = this.decodeAction(bestActionKey);
			if (action.isPass) {
				workingState.pass();
			} else if (action.x !== undefined && action.y !== undefined && action.z !== undefined) {
				workingState.drop({ x: action.x, y: action.y, z: action.z });
			}

			// Move to child (create if doesn't exist)
			if (!node.children.has(bestActionKey)) {
				// Create child node WITHOUT storing state (memory optimization)
				const childNode = this.createNode(null, node, action);
				node.children.set(bestActionKey, childNode);
			}

			node = node.children.get(bestActionKey)!;
		}

		return { node, path };
	}


	/**
	 * Expand and evaluate leaf node using neural networks
	 *
	 * @param node Leaf node to expand
	 * @param workingState Current game state at this node (passed from simulation)
	 * @returns Value estimate from evaluation network
	 */
	private async expandAndEvaluate(node: MCTSNode, workingState: TrigoGame): Promise<number> {
		// Check if terminal value is already cached
		if (node.terminalValue !== null) {
			return node.terminalValue;
		}

		// Check if game is over (terminal state)
		const terminalValue = this.checkTerminal(workingState);
		if (terminalValue !== null) {
			// Mark terminal node as expanded with empty action set to prevent revisits
			// Cache the terminal value to avoid repeated checks
			node.expanded = true;
			node.terminalValue = terminalValue;
			node.P = new Map();  // No actions available (terminal)
			node.N = new Map();
			node.W = new Map();
			node.Q = new Map();
			node.children = new Map();

			return terminalValue;
		}

		// Non-terminal state: expand with policy network and evaluate
		// Get all valid moves
		const currentPlayer = workingState.getCurrentPlayer() === 1 ? "black" : "white";
		const validPositions = workingState.validMovePositions();
		const moves: Move[] = validPositions.map(pos => ({
			x: pos.x,
			y: pos.y,
			z: pos.z,
			player: currentPlayer
		}));
		moves.push({ player: currentPlayer, isPass: true });

		// Get policy priors from tree agent
		const scoredMoves = await this.treeAgent.scoreMoves(workingState, moves);

		// Convert log probabilities to probabilities and normalize (stable softmax)
		const maxScore = Math.max(...scoredMoves.map(m => m.score));
		const expScores = scoredMoves.map(m => Math.exp(m.score - maxScore));
		const sumExp = expScores.reduce((sum, exp) => sum + exp, 0);

		// Initialize priors P(s,a)
		node.P = new Map();
		node.N = new Map();
		node.W = new Map();
		node.Q = new Map();

		// Handle edge case: if all scores are -Infinity or sumExp is 0/NaN
		const useFallback = !isFinite(sumExp) || sumExp < 1e-10;

		for (let i = 0; i < scoredMoves.length; i++) {
			const actionKey = this.encodeAction(scoredMoves[i].move);

			// Use uniform distribution as fallback if normalization fails
			const prior = useFallback ? (1.0 / scoredMoves.length) : (expScores[i] / sumExp);

			node.P.set(actionKey, prior);
			node.N.set(actionKey, 0);
			node.W.set(actionKey, 0);
			node.Q.set(actionKey, 0);
		}

		// Add Dirichlet noise at root
		if (node.parent === null) {
			this.addDirichletNoise(node.P);
		}

		// Mark as expanded
		node.expanded = true;

		// Get value estimate from evaluation agent
		const evaluation = await this.evaluationAgent.evaluatePosition(workingState);

		// Return value directly (value model returns white-positive by design)
		return evaluation.value;
	}


	/**
	 * Backup phase: Propagate value up the tree
	 *
	 * White-positive minimax propagation:
	 * - All Q-values represent White's advantage (positive = White winning)
	 * - When all children are terminal, mark parent as terminal with minimax value:
	 *   * White's turn: terminal_value = max(children terminal values)
	 *   * Black's turn: terminal_value = min(children terminal values)
	 *
	 * Improvements (based on GPT-5.1 review):
	 * - Uses stored playerToMove instead of computing from depth
	 * - Uses stored depth instead of recomputing via parent walk
	 *
	 * @param path Path from root to leaf
	 * @param value Value to propagate (white-positive: positive = white winning)
	 */
	private backup(path: Array<{ node: MCTSNode; actionKey: string }>, value: number): void {
		// Propagate value up the tree (white-positive throughout)
		// No sign flipping needed - Q values are always white-positive
		for (let i = path.length - 1; i >= 0; i--) {
			const { node, actionKey } = path[i];

			// Update statistics
			const n = node.N.get(actionKey) ?? 0;
			const w = node.W.get(actionKey) ?? 0;

			node.N.set(actionKey, n + 1);
			node.W.set(actionKey, w + value);
			node.Q.set(actionKey, (w + value) / (n + 1));

			// ========== Terminal State Propagation ==========
			// Check if this node should be marked as terminal
			// Condition: node is fully expanded AND all children are terminal AND node itself not yet marked
			if (node.expanded && node.terminalValue === null) {
				const actionKeys = Array.from(node.P.keys());

				// Skip propagation if no actions (already a terminal leaf, or error state)
				if (actionKeys.length === 0) {
					continue;
				}

				// Check if ALL children are terminal
				let allChildrenTerminal = true;
				const childTerminalValues: number[] = [];

				for (const key of actionKeys) {
					const child = node.children.get(key);

					// If child doesn't exist yet, not all children explored
					if (!child) {
						allChildrenTerminal = false;
						break;
					}

					// If child is not terminal, not all children terminal
					if (child.terminalValue === null) {
						allChildrenTerminal = false;
						break;
					}

					// Child is terminal, collect its value
					childTerminalValues.push(child.terminalValue);
				}

				// If all children are terminal, mark current node as terminal with minimax value
				if (allChildrenTerminal && childTerminalValues.length > 0) {
					// Use stored playerToMove instead of computing from depth (GPT-5.1 suggestion)
					const isWhiteTurn = node.playerToMove === 2;  // 2 = White, 1 = Black

					// Apply minimax: choose best child value from current player's perspective
					let terminalValue: number;

					if (isWhiteTurn) {
						// White maximizes Q-value (white-positive)
						terminalValue = Math.max(...childTerminalValues);
					} else {
						// Black minimizes Q-value (white-positive)
						terminalValue = Math.min(...childTerminalValues);
					}

					// Mark this node as terminal with the minimax value
					node.terminalValue = terminalValue;

					// Debug logging for terminal propagation
					if (this.debugMode) {
						const playerName = isWhiteTurn ? 'White' : 'Black';
						console.log(
							`[Terminal Propagation] Node at depth ${node.depth} (${playerName}) marked terminal: ` +
							`value=${terminalValue.toFixed(4)}, children=[${childTerminalValues.map(v => v.toFixed(2)).join(', ')}]`
						);
					}
				}
			}
			// ================================================
		}
	}


	/**
	 * Calculate PUCT value for action selection
	 *
	 * PUCT = Q(s,a) + U(s,a)  [for White, who maximizes]
	 * PUCT = -Q(s,a) + U(s,a) [for Black, who minimizes]
	 * where U(s,a) = c_puct * P(s,a) * sqrt(Σ_b N(s,b)) / (1 + N(s,a))
	 *
	 * @param node Current node
	 * @param actionKey Action to evaluate
	 * @param isWhite Whether current player is White
	 * @returns PUCT value
	 */
	private calculatePUCT(node: MCTSNode, actionKey: string, isWhite: boolean): number {
		const Q = node.Q.get(actionKey) ?? 0;
		const N = node.N.get(actionKey) ?? 0;
		const P = node.P.get(actionKey) ?? 0;

		// Sum of all visit counts at this node
		const totalN = Array.from(node.N.values()).reduce((sum, n) => sum + n, 0);

		// Exploration term: U(s,a) = c_puct * P(s,a) * sqrt(Σ_b N(s,b) + 1) / (1 + N(s,a))
		// +1 in sqrt to avoid zero exploration when node first expanded
		const U = this.config.cPuct * P * Math.sqrt(totalN + 1) / (1 + N);

		// Black minimizes Q (flips sign), White maximizes Q
		return (isWhite ? Q : -Q) + U;
	}


	/**
	 * Select action to play based on visit counts
	 * Uses temperature to control exploration vs exploitation
	 *
	 * @param node Root node
	 * @param temperature Selection temperature (τ=1 for exploration, τ→0 for greedy)
	 * @returns Selected move
	 */
	private selectPlayAction(node: MCTSNode, temperature: number): Move {
		const actionKeys = Array.from(node.N.keys());

		// Edge case: no actions available (unexpanded root or terminal state)
		if (actionKeys.length === 0) {
			// Fallback to priors if available
			const priorKeys = Array.from(node.P.keys());
			if (priorKeys.length > 0) {
				// Sample from prior distribution
				const priors = priorKeys.map(key => node.P.get(key) ?? 0);
				const sumP = priors.reduce((sum, p) => sum + p, 0);
				if (sumP > 0) {
					let rand = Math.random() * sumP;
					for (let i = 0; i < priorKeys.length; i++) {
						rand -= priors[i];
						if (rand <= 0) {
							return this.decodeAction(priorKeys[i]);
						}
					}
					return this.decodeAction(priorKeys[priorKeys.length - 1]);
				}
				// Uniform fallback
				const randomIndex = Math.floor(Math.random() * priorKeys.length);
				return this.decodeAction(priorKeys[randomIndex]);
			}
			// No actions at all - return Pass as last resort
			return { player: "black", isPass: true };
		}

		if (temperature < 0.01) {
			// Greedy: Select action with highest visit count
			let bestActionKey = actionKeys[0];
			let bestN = node.N.get(bestActionKey) ?? 0;

			for (let i = 1; i < actionKeys.length; i++) {
				const actionKey = actionKeys[i];
				const n = node.N.get(actionKey) ?? 0;
				if (n > bestN) {
					bestN = n;
					bestActionKey = actionKey;
				}
			}

			return this.decodeAction(bestActionKey);
		} else {
			// Temperature-based sampling: π(a|s) ∝ N(s,a)^(1/τ)
			const nValues = actionKeys.map(key => node.N.get(key) ?? 0);
			const nPowered = nValues.map(n => Math.pow(n, 1 / temperature));
			const sumN = nPowered.reduce((sum, n) => sum + n, 0);

			// Handle edge case: if all visits are 0 or sum is invalid
			if (!isFinite(sumN) || sumN <= 0) {
				// Fallback to uniform random selection (or use priors)
				const randomIndex = Math.floor(Math.random() * actionKeys.length);
				return this.decodeAction(actionKeys[randomIndex]);
			}

			// Sample from distribution
			let rand = Math.random() * sumN;
			for (let i = 0; i < actionKeys.length; i++) {
				rand -= nPowered[i];
				if (rand <= 0) {
					return this.decodeAction(actionKeys[i]);
				}
			}

			// Fallback (shouldn't reach here due to floating point precision)
			return this.decodeAction(actionKeys[actionKeys.length - 1]);
		}
	}


	/**
	 * Compute search policy from visit counts
	 * π(a|s) = N(s,a)^(1/τ) / Σ_b N(s,b)^(1/τ)
	 *
	 * @param node Root node
	 * @param temperature Selection temperature
	 * @returns Normalized policy distribution
	 */
	private computeSearchPolicy(node: MCTSNode, temperature: number): Map<string, number> {
		const policy = new Map<string, number>();
		const actionKeys = Array.from(node.N.keys());

		// Compute π(a|s) ∝ N(s,a)^(1/τ)
		const nPowered = actionKeys.map(key => Math.pow(node.N.get(key) ?? 0, 1 / temperature));
		const sumN = nPowered.reduce((sum, n) => sum + n, 0);

		// Handle edge case: if all visits are 0 or sum is invalid
		if (!isFinite(sumN) || sumN <= 0) {
			// Fallback to uniform distribution
			const uniform = 1 / actionKeys.length;
			for (const key of actionKeys) {
				policy.set(key, uniform);
			}
			return policy;
		}

		for (let i = 0; i < actionKeys.length; i++) {
			const actionKey = actionKeys[i];
			policy.set(actionKey, nPowered[i] / sumN);
		}

		return policy;
	}


	/**
	 * Get root value estimate (weighted average of Q-values)
	 */
	private getRootValue(node: MCTSNode): number {
		const actionKeys = Array.from(node.N.keys());
		const totalN = Array.from(node.N.values()).reduce((sum, n) => sum + n, 0);

		if (totalN === 0) {
			return 0;
		}

		let weightedSum = 0;
		for (const actionKey of actionKeys) {
			const q = node.Q.get(actionKey) ?? 0;
			const n = node.N.get(actionKey) ?? 0;
			weightedSum += q * n;
		}

		return weightedSum / totalN;
	}


	/**
	 * Add Dirichlet noise to prior probabilities at root
	 * P(s,a) = (1 - ε) * p_a + ε * η_a
	 * where η ~ Dir(α)
	 *
	 * Note: Pass move is excluded from noise to prevent exploration of
	 * clearly suboptimal opening passes.
	 */
	private addDirichletNoise(priors: Map<string, number>): void {
		// Exclude Pass from Dirichlet noise - it should not be explored at root
		const actionKeys = Array.from(priors.keys()).filter(key => key !== "pass");
		const alpha = this.config.dirichletAlpha;
		const epsilon = this.config.dirichletEpsilon;

		// If only Pass is available, no noise to add
		if (actionKeys.length === 0) {
			return;
		}

		// Generate Dirichlet noise (simplified using Gamma distribution)
		const noise: number[] = [];
		let noiseSum = 0;

		for (let i = 0; i < actionKeys.length; i++) {
			// Gamma(α, 1) approximation using rejection sampling
			const sample = this.sampleGamma(alpha);
			noise.push(sample);
			noiseSum += sample;
		}

		// Handle edge case: if all Gamma samples are 0 (extremely unlikely but possible)
		if (!isFinite(noiseSum) || noiseSum <= 0) {
			// Fallback: use uniform noise (no mixing, keep original priors)
			return;
		}

		// Normalize and mix with priors (only for non-Pass actions)
		for (let i = 0; i < actionKeys.length; i++) {
			const actionKey = actionKeys[i];
			const prior = priors.get(actionKey) ?? 0;
			const noiseFraction = noise[i] / noiseSum;
			priors.set(actionKey, (1 - epsilon) * prior + epsilon * noiseFraction);
		}
	}


	/**
	 * Sample from Gamma distribution using Marsaglia and Tsang method (2000)
	 * Used for Dirichlet noise generation
	 */
	private sampleGamma(alpha: number): number {
		if (alpha <= 0) {
			throw new Error("Gamma distribution alpha must be > 0");
		}

		// For alpha < 1, use transformation: sample Gamma(alpha+1) then multiply by U^(1/alpha)
		if (alpha < 1) {
			const u = Math.random();
			const g = this.sampleGamma(alpha + 1);
			return g * Math.pow(u, 1 / alpha);
		}

		// For alpha >= 1, use Marsaglia and Tsang's method
		const d = alpha - 1/3;
		const c = 1 / Math.sqrt(9 * d);

		while (true) {
			let x, v;
			do {
				x = this.randomNormal();
				v = 1 + c * x;
			} while (v <= 0);

			v = v * v * v;
			const u = Math.random();

			// Fast acceptance check
			if (u < 1 - 0.0331 * x * x * x * x) {
				return d * v;
			}

			// Fallback acceptance check
			if (Math.log(u) < 0.5 * x * x + d * (1 - v + Math.log(v))) {
				return d * v;
			}
		}
	}


	/**
	 * Sample from standard normal distribution (Box-Muller transform)
	 */
	private randomNormal(): number {
		const u1 = Math.random();
		const u2 = Math.random();
		return Math.sqrt(-2 * Math.log(u1)) * Math.cos(2 * Math.PI * u2);
	}


	/**
	 * Check if game state is terminal and return value if so
	 *
	 * Terminal conditions (checked in order of cost):
	 * 1. Game already finished (double-pass or resignation) - CHEAPEST
	 * 2. Board coverage > 50% AND naturally terminal (calls isNaturallyTerminal) - EXPENSIVE
	 *
	 * NOTE: The coverage check (> 50%) is an optimization to avoid expensive
	 * territory calculations on sparse boards where natural termination is unlikely.
	 *
	 * @param state Game state to check
	 * @returns Terminal value (white-positive) if terminal, null otherwise
	 */
	private checkTerminal(state: TrigoGame): number | null {
		// 1. Check if game is already finished (double-pass, resignation, etc.)
		// This is the cheapest check - just reading a status flag
		if (state.getGameStatus() === "finished") {
			const territory = state.getTerritory();
			return this.calculateTerminalValue(territory);
		}

		// 2. Check for "natural" game end (all territory claimed, no capturing moves)
		// Optimization: Only check if board is reasonably full (> 50% coverage)
		// because natural termination is unlikely on sparse boards
		const board = state.getBoard();
		const shape = state.getShape();
		const totalPositions = shape.x * shape.y * shape.z;

		// Count stones (cheap)
		let stoneCount = 0;

		for (let x = 0; x < shape.x; x++) {
			for (let y = 0; y < shape.y; y++) {
				for (let z = 0; z < shape.z; z++) {
					const stone = board[x][y][z];
					if (stone === 1 || stone === 2) {  // StoneType.BLACK or WHITE
						stoneCount++;
					}
				}
			}
		}

		const coverageRatio = stoneCount / totalPositions;

		// Only check for natural termination if board is reasonably full
		if (coverageRatio > 0.5) {
			if (state.isNaturallyTerminal()) {
				const territory = state.getTerritory();
				return this.calculateTerminalValue(territory);
			}
		}


		return null;  // Not terminal
	}


	/**
	 * Calculate terminal value from territory scores
	 * Uses logarithmic scaling matching the training code
	 *
	 * @param territory Territory counts from game
	 * @returns Value (white-positive: positive = white winning)
	 */
	private calculateTerminalValue(territory: { black: number; white: number; neutral: number }): number {
		const scoreDiff = territory.white - territory.black;

		if (Math.abs(scoreDiff) < 1e-6) {
			// Draw/tie case
			return 0.0;
		}

		// Match training formula from valueCausalLoss.py:_expand_value_targets
		// target = sign(score) * (1 + log(|score|)) * territory_value_factor
		// The log term incentivizes winning by larger margins (logarithmically)
		const territory_value_factor = 1.0;  // Default from training config
		const signScore = Math.sign(scoreDiff);
		return signScore * (1 + Math.log(Math.abs(scoreDiff))) * territory_value_factor;
	}


	/**
	 * Create a new MCTS node
	 *
	 * @param state Game state (only provided for root node, null for others to save memory)
	 * @param parent Parent node
	 * @param action Action that led to this node
	 * @param playerToMove Player to move at this node (derived from state if available)
	 */
	private createNode(state: TrigoGame | null, parent: MCTSNode | null, action: Move | null, playerToMove?: number): MCTSNode {
		// Determine player to move
		let player: number;
		if (playerToMove !== undefined) {
			player = playerToMove;
		} else if (state) {
			// Most reliable: derive from actual game state
			player = state.getCurrentPlayer();
		} else if (parent) {
			// NOTE: Fallback assumes strictly alternating turns (no passes keeping same player)
			// For standard Go-like games with strict alternation, this is safe.
			player = parent.playerToMove === 1 ? 2 : 1;
		} else {
			// Default to Black for root if no info
			player = 1;
		}

		return {
			state,
			parent,
			action,
			N: new Map(),
			W: new Map(),
			Q: new Map(),
			P: new Map(),
			children: new Map(),
			expanded: false,
			terminalValue: null,
			depth: parent ? parent.depth + 1 : 0,
			playerToMove: player
		};
	}


	/**
	 * Encode move to string key for storage in maps
	 * Note: Only encodes position, player info is handled separately
	 */
	private encodeAction(move: Move): string {
		if (move.isPass) {
			return "pass";
		}
		return `${move.x},${move.y},${move.z}`;
	}


	/**
	 * Decode string key back to move
	 * Note: Returns move with placeholder player - caller must set correct player
	 * based on game state before using the move externally
	 */
	private decodeAction(key: string): Move {
		if (key === "pass") {
			// Player is placeholder - will be set by caller (selectMove sets it from game state)
			return { player: "black", isPass: true };
		}

		const [x, y, z] = key.split(",").map(Number);
		// Player is placeholder - will be set by caller (selectMove sets it from game state)
		return { player: "black", x, y, z };
	}
}