Spaces:

k-l-lambda
/

trigo

Sleeping

File size: 15,916 Bytes

15f353f

/**
 * TGN File Evaluation Tool
 *
 * This script evaluates TGN files using the value prediction model.
 * It cleans TGN content by removing trailing comments, Pass moves, and spaces,
 * then evaluates the position value using TrigoEvaluationAgent.
 *
 * Features:
 * - Batch evaluation of all .tgn files in a directory
 * - TGN cleaning (remove trailing comments, Pass moves, trim spaces)
 * - Position value prediction in range [-1, 1]
 * - Output results to console and optional JSON file
 *
 * Usage:
 *   npx tsx tools/evaluateTgnFiles.ts [options]
 *
 * Options:
 *   --model <path>      Path to evaluation mode ONNX model (default: from ONNX_EVALUATION_MODEL env var)
 *   --input <path>      Directory containing TGN files or single TGN file (default: ./tools/output/selfplay)
 *   --output <path>     Optional JSON output file for results
 *   --verbose           Enable verbose logging
 *   --help              Show this help message
 *
 * Examples:
 *   # Evaluate all TGN files in directory (uses model from .env)
 *   npx tsx tools/evaluateTgnFiles.ts --input ./tools/output/selfplay
 *
 *   # Evaluate with specific model
 *   npx tsx tools/evaluateTgnFiles.ts --model ./public/onnx/model.onnx --input ./tools/output/selfplay
 *
 *   # Save results to JSON file
 *   npx tsx tools/evaluateTgnFiles.ts --model ./public/onnx/model.onnx --input ./games --output ./results.json
 */

import * as ort from "onnxruntime-node";
import * as path from "path";
import * as fs from "fs";
import { fileURLToPath } from "url";
import { TrigoGame } from "../inc/trigo/game";
import { ModelInferencer } from "../inc/modelInferencer";
import { TrigoEvaluationAgent } from "../inc/trigoEvaluationAgent";
import { initializeParsers } from "../inc/trigo/parserInit";
import { loadEnvConfig, getOnnxModelPaths, getAbsoluteModelPath, getOnnxSessionOptions } from "../inc/config";


// ES module equivalent of __dirname
const __filename = fileURLToPath(import.meta.url);
const __dirname = path.dirname(__filename);

// Load environment variables
await loadEnvConfig();

// Default model paths from environment
const defaultModelPaths = getOnnxModelPaths();


// Configuration
interface EvaluationConfig {
	modelPath: string;
	inputPath: string;
	outputPath?: string;
	verbose: boolean;
	vocabSize: number;
	seqLen: number;
}


// Evaluation result for a single TGN file
interface TgnEvaluationResult {
	filename: string;
	filepath: string;
	value: number;
	interpretation: string;
	moveCount: number;
	boardShape: string;
	cleanedTgn: string;
	commentValue?: number; // Value from tail comment
	signMatch?: boolean; // Whether model and comment signs match
	error?: string;
}


/**
 * Parse command line arguments
 */
function parseArgs(): EvaluationConfig {
	const args = process.argv.slice(2);
	const config: EvaluationConfig = {
		modelPath: getAbsoluteModelPath(defaultModelPaths.evaluationModel),
		inputPath: path.join(__dirname, "output/selfplay"),
		verbose: false,
		vocabSize: 128,
		seqLen: 256
	};

	for (let i = 0; i < args.length; i++) {
		switch (args[i]) {
			case "--model":
				config.modelPath = args[++i];
				break;
			case "--input":
				config.inputPath = args[++i];
				break;
			case "--output":
				config.outputPath = args[++i];
				break;
			case "--verbose":
				config.verbose = true;
				break;
			case "--help":
				printHelp();
				process.exit(0);
			default:
				if (args[i].startsWith("--")) {
					console.error(`Unknown option: ${args[i]}`);
					printHelp();
					process.exit(1);
				}
		}
	}

	// Validate model path (now optional since we have default)
	if (!config.modelPath) {
		console.error("Error: --model argument is required or set ONNX_EVALUATION_MODEL env variable");
		printHelp();
		process.exit(1);
	}

	if (!fs.existsSync(config.modelPath)) {
		console.error(`Error: Model file not found: ${config.modelPath}`);
		process.exit(1);
	}

	if (!fs.existsSync(config.inputPath)) {
		console.error(`Error: Input path not found: ${config.inputPath}`);
		process.exit(1);
	}

	return config;
}


/**
 * Print help message
 */
function printHelp(): void {
	console.log(`
Usage: npx tsx tools/evaluateTgnFiles.ts [options]

Options:
  --model <path>      Path to evaluation mode ONNX model (default: from ONNX_EVALUATION_MODEL env var)
  --input <path>      Directory containing TGN files or single TGN file (default: ./tools/output/selfplay)
  --output <path>     Optional JSON output file for results
  --verbose           Enable verbose logging
  --help              Show this help message

Examples:
  # Evaluate all TGN files in directory (uses model from .env)
  npx tsx tools/evaluateTgnFiles.ts --input ./tools/output/selfplay

  # Evaluate with specific model
  npx tsx tools/evaluateTgnFiles.ts --model ./public/onnx/model.onnx --input ./tools/output/selfplay

  # Save results to JSON file
  npx tsx tools/evaluateTgnFiles.ts --model ./public/onnx/model.onnx --input ./games --output ./results.json
`);
}


/**
 * Extract numeric value from tail comment before cleaning
 * Returns undefined if no valid numeric comment found
 */
function extractCommentValue(tgnContent: string): number | undefined {
	const lines = tgnContent.split('\n');

	// Look for comment lines at the end (lines starting with ';')
	for (let i = lines.length - 1; i >= 0; i--) {
		const line = lines[i].trim();

		// Skip empty lines
		if (line === '') continue;

		// Check if it's a comment line
		if (line.startsWith(';')) {
			// Extract numeric value from comment
			// Format: "; -16" or "; 10" etc.
			const match = line.match(/;\s*([+-]?\d+(?:\.\d+)?)/);
			if (match) {
				return parseFloat(match[1]);
			}
		} else {
			// Stop at first non-empty, non-comment line
			break;
		}
	}

	return undefined;
}


/**
 * Clean TGN content by removing trailing comments, Pass moves, and spaces
 * Matches the behavior of test_evaluation_mode.js
 */
function cleanTGN(tgnContent: string): string {
	let lines = tgnContent.split('\n');

	// Remove trailing empty lines and comments (lines starting with ';')
	while (lines.length > 0) {
		const lastLine = lines[lines.length - 1].trim();
		if (lastLine === '' || lastLine.startsWith(';')) {
			lines.pop();
		} else {
			break;
		}
	}

	// Remove trailing Pass moves
	while (lines.length > 0) {
		const lastLine = lines[lines.length - 1].trim();
		// Check for Pass move: "P" or ending with " P"
		if (lastLine === 'P' || lastLine.endsWith(' P')) {
			lines.pop();
		} else {
			break;
		}
	}

	// Join lines and trim trailing spaces
	return lines.join('\n').trim();
}


/**
 * Initialize the evaluation agent
 */
async function initializeAgent(config: EvaluationConfig): Promise<TrigoEvaluationAgent> {
	console.log("Initializing Evaluation Agent...");
	console.log(`  Model: ${config.modelPath}`);
	console.log(`  Vocab Size: ${config.vocabSize}`);
	console.log(`  Sequence Length: ${config.seqLen}`);

	const sessionOptions = getOnnxSessionOptions();
	const session = await ort.InferenceSession.create(config.modelPath, sessionOptions);

	const inferencer = new ModelInferencer(ort.Tensor as any, {
		vocabSize: config.vocabSize,
		seqLen: config.seqLen,
		modelPath: config.modelPath
	});
	inferencer.setSession(session as any);

	const agent = new TrigoEvaluationAgent(inferencer);
	console.log("✓ Agent initialized\n");

	return agent;
}


/**
 * Evaluate a single TGN file
 */
async function evaluateTgnFile(
	agent: TrigoEvaluationAgent,
	filepath: string,
	config: EvaluationConfig
): Promise<TgnEvaluationResult> {
	const filename = path.basename(filepath);

	try {
		// Read TGN file
		const rawContent = fs.readFileSync(filepath, "utf-8");

		// Extract comment value BEFORE cleaning
		const commentValue = extractCommentValue(rawContent);

		// Clean TGN content
		const cleanedTgn = cleanTGN(rawContent);

		if (config.verbose) {
			console.log(`\nProcessing: ${filename}`);
			console.log(`  Raw length: ${rawContent.length} chars`);
			console.log(`  Cleaned length: ${cleanedTgn.length} chars`);
			if (commentValue !== undefined) {
				console.log(`  Comment value: ${commentValue}`);
			}
		}

		// Parse TGN to create game
		const game = TrigoGame.fromTGN(cleanedTgn);

		if (config.verbose) {
			console.log(`  Game type: ${typeof game}`);
			console.log(`  Is TrigoGame: ${game instanceof TrigoGame}`);
			console.log(`  Has getHistory: ${typeof game.getHistory}`);
		}

		// Get board info
		const boardShape = game.getShape();
		const boardShapeStr = `${boardShape.x}×${boardShape.y}×${boardShape.z}`;
		const moveCount = game.getHistory().length;

		if (config.verbose) {
			console.log(`  Board: ${boardShapeStr}`);
			console.log(`  Moves: ${moveCount}`);
		}

		// Evaluate position
		const evaluation = await agent.evaluatePosition(game);

		// Compare signs if comment value exists
		let signMatch: boolean | undefined = undefined;
		if (commentValue !== undefined) {
			// Note: There appears to be a sign inconsistency between model output and training data
			const modelSign = Math.sign(evaluation.value);
			const commentSign = Math.sign(commentValue);
			signMatch = modelSign === commentSign;
		}

		if (config.verbose) {
			console.log(`  Value: ${evaluation.value.toFixed(4)}`);
			console.log(`  ${evaluation.interpretation}`);
			if (commentValue !== undefined && signMatch !== undefined) {
				console.log(`  Comment: ${commentValue}, Sign match: ${signMatch ? '✓' : '✗'}`);
			}
		}

		return {
			filename,
			filepath,
			value: evaluation.value,
			interpretation: evaluation.interpretation,
			moveCount,
			boardShape: boardShapeStr,
			cleanedTgn,
			commentValue,
			signMatch
		};

	} catch (error) {
		const errorMessage = error instanceof Error ? error.message : String(error);
		console.error(`✗ Error evaluating ${filename}: ${errorMessage}`);

		return {
			filename,
			filepath,
			value: 0,
			interpretation: "Error",
			moveCount: 0,
			boardShape: "Unknown",
			cleanedTgn: "",
			error: errorMessage
		};
	}
}


/**
 * Get list of TGN files to evaluate
 */
function getTgnFiles(inputPath: string): string[] {
	const stats = fs.statSync(inputPath);

	if (stats.isFile()) {
		// Single file
		if (!inputPath.endsWith('.tgn')) {
			console.error(`Error: Input file must be a .tgn file: ${inputPath}`);
			process.exit(1);
		}
		return [inputPath];
	} else if (stats.isDirectory()) {
		// Directory - get all .tgn files
		const files = fs.readdirSync(inputPath)
			.filter(file => file.endsWith('.tgn'))
			.map(file => path.join(inputPath, file));

		if (files.length === 0) {
			console.error(`Error: No .tgn files found in directory: ${inputPath}`);
			process.exit(1);
		}

		return files;
	} else {
		console.error(`Error: Invalid input path: ${inputPath}`);
		process.exit(1);
	}
}


/**
 * Main evaluation function
 */
async function evaluateDataset(config: EvaluationConfig): Promise<void> {
	console.log("=".repeat(80));
	console.log("TGN File Evaluation Tool");
	console.log("=".repeat(80));
	console.log(`Configuration:`);
	console.log(`  Model: ${config.modelPath}`);
	console.log(`  Input: ${config.inputPath}`);
	if (config.outputPath) {
		console.log(`  Output: ${config.outputPath}`);
	}
	console.log(`  Verbose: ${config.verbose}`);
	console.log();

	// Initialize TGN parser
	console.log("Initializing TGN parser...");
	await initializeParsers();
	console.log("✓ Parser initialized\n");

	// Get list of TGN files
	const tgnFiles = getTgnFiles(config.inputPath);
	console.log(`Found ${tgnFiles.length} TGN file(s) to evaluate\n`);

	// Initialize agent
	const agent = await initializeAgent(config);

	// Evaluate all files
	console.log("Evaluating files...");
	console.log("=".repeat(80));

	const results: TgnEvaluationResult[] = [];
	const startTime = Date.now();

	for (let i = 0; i < tgnFiles.length; i++) {
		const filepath = tgnFiles[i];
		const filename = path.basename(filepath);
		const progress = ((i + 1) / tgnFiles.length * 100).toFixed(1);

		// Evaluate file
		const result = await evaluateTgnFile(agent, filepath, config);
		results.push(result);

		// Progress update
		if (!config.verbose) {
			const statusIcon = result.error ? "✗" : "✓";
			const valueStr = result.error ? "ERROR" : result.value.toFixed(4);
			const commentStr = result.commentValue !== undefined
				? ` (comment: ${result.commentValue})`
				: "";
			const matchStr = result.signMatch !== undefined
				? (result.signMatch ? " ✓" : " ✗")
				: "";
			console.log(
				`[${progress}%] ${statusIcon} ${filename}: ${valueStr}${commentStr} - ${result.interpretation}${matchStr}`
			);
		}
	}

	const endTime = Date.now();
	const duration = endTime - startTime;

	// Calculate statistics
	const successfulResults = results.filter(r => !r.error);
	const failedResults = results.filter(r => r.error);
	const averageValue = successfulResults.length > 0
		? successfulResults.reduce((sum, r) => sum + r.value, 0) / successfulResults.length
		: 0;

	// Calculate sign accuracy
	const resultsWithComments = successfulResults.filter(r => r.signMatch !== undefined);
	const correctPredictions = resultsWithComments.filter(r => r.signMatch === true).length;
	const accuracy = resultsWithComments.length > 0
		? (correctPredictions / resultsWithComments.length) * 100
		: 0;

	// Print summary
	console.log("=".repeat(80));
	console.log("Evaluation Complete!");
	console.log("=".repeat(80));
	console.log(`Total files: ${results.length}`);
	console.log(`Successful: ${successfulResults.length}`);
	console.log(`Failed: ${failedResults.length}`);
	console.log(`Average value: ${averageValue.toFixed(4)}`);
	console.log(`Total time: ${(duration / 1000).toFixed(1)}s`);
	console.log(`Average time per file: ${(duration / results.length).toFixed(0)}ms`);

	// Sign accuracy statistics
	if (resultsWithComments.length > 0) {
		console.log(`\nSign Accuracy:`);
		console.log(`  Files with comments: ${resultsWithComments.length}`);
		console.log(`  Correct predictions: ${correctPredictions}`);
		console.log(`  Accuracy: ${accuracy.toFixed(1)}%`);
	}

	// Value distribution
	if (successfulResults.length > 0) {
		const blackAdvantage = successfulResults.filter(r => r.value > 0.1).length;
		const balanced = successfulResults.filter(r => r.value >= -0.1 && r.value <= 0.1).length;
		const whiteAdvantage = successfulResults.filter(r => r.value < -0.1).length;

		console.log(`\nValue Distribution:`);
		console.log(`  Black advantage (>0.1): ${blackAdvantage} (${(blackAdvantage / successfulResults.length * 100).toFixed(1)}%)`);
		console.log(`  Balanced (-0.1 to 0.1): ${balanced} (${(balanced / successfulResults.length * 100).toFixed(1)}%)`);
		console.log(`  White advantage (<-0.1): ${whiteAdvantage} (${(whiteAdvantage / successfulResults.length * 100).toFixed(1)}%)`);
	}

	// Save results to JSON if output path provided
	if (config.outputPath) {
		const outputData = {
			evaluationTime: new Date().toISOString(),
			modelPath: config.modelPath,
			inputPath: config.inputPath,
			totalFiles: results.length,
			successful: successfulResults.length,
			failed: failedResults.length,
			averageValue,
			durationMs: duration,
			signAccuracy: {
				filesWithComments: resultsWithComments.length,
				correctPredictions,
				accuracy: accuracy.toFixed(1) + "%"
			},
			results: results.map(r => ({
				filename: r.filename,
				filepath: r.filepath,
				value: r.value,
				interpretation: r.interpretation,
				moveCount: r.moveCount,
				boardShape: r.boardShape,
				commentValue: r.commentValue,
				signMatch: r.signMatch,
				error: r.error
			}))
		};

		fs.writeFileSync(config.outputPath, JSON.stringify(outputData, null, 2), "utf-8");
		console.log(`\n✓ Results saved to: ${config.outputPath}`);
	}

	console.log("=".repeat(80));
}


/**
 * Main function
 */
async function main() {
	try {
		const config = parseArgs();
		await evaluateDataset(config);
	} catch (error) {
		console.error("Error:", error);
		process.exit(1);
	}
}


// Run main function
main();