Spaces:

k-l-lambda
/

trigo

Sleeping

App Files Files Community

trigo / trigo-web /tools /evaluateTgnFiles.ts

k-l-lambda

Update trigo-web with VS People multiplayer mode

15f353f about 1 month ago

raw

history blame contribute delete

15.9 kB

	/**
	* TGN File Evaluation Tool
	*
	* This script evaluates TGN files using the value prediction model.
	* It cleans TGN content by removing trailing comments, Pass moves, and spaces,
	* then evaluates the position value using TrigoEvaluationAgent.
	*
	* Features:
	* - Batch evaluation of all .tgn files in a directory
	* - TGN cleaning (remove trailing comments, Pass moves, trim spaces)
	* - Position value prediction in range [-1, 1]
	* - Output results to console and optional JSON file
	*
	* Usage:
	* npx tsx tools/evaluateTgnFiles.ts [options]
	*
	* Options:
	* --model <path> Path to evaluation mode ONNX model (default: from ONNX_EVALUATION_MODEL env var)
	* --input <path> Directory containing TGN files or single TGN file (default: ./tools/output/selfplay)
	* --output <path> Optional JSON output file for results
	* --verbose Enable verbose logging
	* --help Show this help message
	*
	* Examples:
	* # Evaluate all TGN files in directory (uses model from .env)
	* npx tsx tools/evaluateTgnFiles.ts --input ./tools/output/selfplay
	*
	* # Evaluate with specific model
	* npx tsx tools/evaluateTgnFiles.ts --model ./public/onnx/model.onnx --input ./tools/output/selfplay
	*
	* # Save results to JSON file
	* npx tsx tools/evaluateTgnFiles.ts --model ./public/onnx/model.onnx --input ./games --output ./results.json
	*/

	import * as ort from "onnxruntime-node";
	import * as path from "path";
	import * as fs from "fs";
	import { fileURLToPath } from "url";
	import { TrigoGame } from "../inc/trigo/game";
	import { ModelInferencer } from "../inc/modelInferencer";
	import { TrigoEvaluationAgent } from "../inc/trigoEvaluationAgent";
	import { initializeParsers } from "../inc/trigo/parserInit";
	import { loadEnvConfig, getOnnxModelPaths, getAbsoluteModelPath, getOnnxSessionOptions } from "../inc/config";


	// ES module equivalent of __dirname
	const __filename = fileURLToPath(import.meta.url);
	const __dirname = path.dirname(__filename);

	// Load environment variables
	await loadEnvConfig();

	// Default model paths from environment
	const defaultModelPaths = getOnnxModelPaths();


	// Configuration
	interface EvaluationConfig {
	modelPath: string;
	inputPath: string;
	outputPath?: string;
	verbose: boolean;
	vocabSize: number;
	seqLen: number;
	}


	// Evaluation result for a single TGN file
	interface TgnEvaluationResult {
	filename: string;
	filepath: string;
	value: number;
	interpretation: string;
	moveCount: number;
	boardShape: string;
	cleanedTgn: string;
	commentValue?: number; // Value from tail comment
	signMatch?: boolean; // Whether model and comment signs match
	error?: string;
	}


	/**
	* Parse command line arguments
	*/
	function parseArgs(): EvaluationConfig {
	const args = process.argv.slice(2);
	const config: EvaluationConfig = {
	modelPath: getAbsoluteModelPath(defaultModelPaths.evaluationModel),
	inputPath: path.join(__dirname, "output/selfplay"),
	verbose: false,
	vocabSize: 128,
	seqLen: 256
	};

	for (let i = 0; i < args.length; i++) {
	switch (args[i]) {
	case "--model":
	config.modelPath = args[++i];
	break;
	case "--input":
	config.inputPath = args[++i];
	break;
	case "--output":
	config.outputPath = args[++i];
	break;
	case "--verbose":
	config.verbose = true;
	break;
	case "--help":
	printHelp();
	process.exit(0);
	default:
	if (args[i].startsWith("--")) {
	console.error(`Unknown option: ${args[i]}`);
	printHelp();
	process.exit(1);
	}
	}
	}

	// Validate model path (now optional since we have default)
	if (!config.modelPath) {
	console.error("Error: --model argument is required or set ONNX_EVALUATION_MODEL env variable");
	printHelp();
	process.exit(1);
	}

	if (!fs.existsSync(config.modelPath)) {
	console.error(`Error: Model file not found: ${config.modelPath}`);
	process.exit(1);
	}

	if (!fs.existsSync(config.inputPath)) {
	console.error(`Error: Input path not found: ${config.inputPath}`);
	process.exit(1);
	}

	return config;
	}


	/**
	* Print help message
	*/
	function printHelp(): void {
	console.log(`
	Usage: npx tsx tools/evaluateTgnFiles.ts [options]

	Options:
	--model <path> Path to evaluation mode ONNX model (default: from ONNX_EVALUATION_MODEL env var)
	--input <path> Directory containing TGN files or single TGN file (default: ./tools/output/selfplay)
	--output <path> Optional JSON output file for results
	--verbose Enable verbose logging
	--help Show this help message

	Examples:
	# Evaluate all TGN files in directory (uses model from .env)
	npx tsx tools/evaluateTgnFiles.ts --input ./tools/output/selfplay

	# Evaluate with specific model
	npx tsx tools/evaluateTgnFiles.ts --model ./public/onnx/model.onnx --input ./tools/output/selfplay

	# Save results to JSON file
	npx tsx tools/evaluateTgnFiles.ts --model ./public/onnx/model.onnx --input ./games --output ./results.json
	`);
	}


	/**
	* Extract numeric value from tail comment before cleaning
	* Returns undefined if no valid numeric comment found
	*/
	function extractCommentValue(tgnContent: string): number \| undefined {
	const lines = tgnContent.split('\n');

	// Look for comment lines at the end (lines starting with ';')
	for (let i = lines.length - 1; i >= 0; i--) {
	const line = lines[i].trim();

	// Skip empty lines
	if (line === '') continue;

	// Check if it's a comment line
	if (line.startsWith(';')) {
	// Extract numeric value from comment
	// Format: "; -16" or "; 10" etc.
	const match = line.match(/;\s*([+-]?\d+(?:\.\d+)?)/);
	if (match) {
	return parseFloat(match[1]);
	}
	} else {
	// Stop at first non-empty, non-comment line
	break;
	}
	}

	return undefined;
	}


	/**
	* Clean TGN content by removing trailing comments, Pass moves, and spaces
	* Matches the behavior of test_evaluation_mode.js
	*/
	function cleanTGN(tgnContent: string): string {
	let lines = tgnContent.split('\n');

	// Remove trailing empty lines and comments (lines starting with ';')
	while (lines.length > 0) {
	const lastLine = lines[lines.length - 1].trim();
	if (lastLine === '' \|\| lastLine.startsWith(';')) {
	lines.pop();
	} else {
	break;
	}
	}

	// Remove trailing Pass moves
	while (lines.length > 0) {
	const lastLine = lines[lines.length - 1].trim();
	// Check for Pass move: "P" or ending with " P"
	if (lastLine === 'P' \|\| lastLine.endsWith(' P')) {
	lines.pop();
	} else {
	break;
	}
	}

	// Join lines and trim trailing spaces
	return lines.join('\n').trim();
	}


	/**
	* Initialize the evaluation agent
	*/
	async function initializeAgent(config: EvaluationConfig): Promise<TrigoEvaluationAgent> {
	console.log("Initializing Evaluation Agent...");
	console.log(` Model: ${config.modelPath}`);
	console.log(` Vocab Size: ${config.vocabSize}`);
	console.log(` Sequence Length: ${config.seqLen}`);

	const sessionOptions = getOnnxSessionOptions();
	const session = await ort.InferenceSession.create(config.modelPath, sessionOptions);

	const inferencer = new ModelInferencer(ort.Tensor as any, {
	vocabSize: config.vocabSize,
	seqLen: config.seqLen,
	modelPath: config.modelPath
	});
	inferencer.setSession(session as any);

	const agent = new TrigoEvaluationAgent(inferencer);
	console.log("✓ Agent initialized\n");

	return agent;
	}


	/**
	* Evaluate a single TGN file
	*/
	async function evaluateTgnFile(
	agent: TrigoEvaluationAgent,
	filepath: string,
	config: EvaluationConfig
	): Promise<TgnEvaluationResult> {
	const filename = path.basename(filepath);

	try {
	// Read TGN file
	const rawContent = fs.readFileSync(filepath, "utf-8");

	// Extract comment value BEFORE cleaning
	const commentValue = extractCommentValue(rawContent);

	// Clean TGN content
	const cleanedTgn = cleanTGN(rawContent);

	if (config.verbose) {
	console.log(`\nProcessing: ${filename}`);
	console.log(` Raw length: ${rawContent.length} chars`);
	console.log(` Cleaned length: ${cleanedTgn.length} chars`);
	if (commentValue !== undefined) {
	console.log(` Comment value: ${commentValue}`);
	}
	}

	// Parse TGN to create game
	const game = TrigoGame.fromTGN(cleanedTgn);

	if (config.verbose) {
	console.log(` Game type: ${typeof game}`);
	console.log(` Is TrigoGame: ${game instanceof TrigoGame}`);
	console.log(` Has getHistory: ${typeof game.getHistory}`);
	}

	// Get board info
	const boardShape = game.getShape();
	const boardShapeStr = `${boardShape.x}×${boardShape.y}×${boardShape.z}`;
	const moveCount = game.getHistory().length;

	if (config.verbose) {
	console.log(` Board: ${boardShapeStr}`);
	console.log(` Moves: ${moveCount}`);
	}

	// Evaluate position
	const evaluation = await agent.evaluatePosition(game);

	// Compare signs if comment value exists
	let signMatch: boolean \| undefined = undefined;
	if (commentValue !== undefined) {
	// Note: There appears to be a sign inconsistency between model output and training data
	const modelSign = Math.sign(evaluation.value);
	const commentSign = Math.sign(commentValue);
	signMatch = modelSign === commentSign;
	}

	if (config.verbose) {
	console.log(` Value: ${evaluation.value.toFixed(4)}`);
	console.log(` ${evaluation.interpretation}`);
	if (commentValue !== undefined && signMatch !== undefined) {
	console.log(` Comment: ${commentValue}, Sign match: ${signMatch ? '✓' : '✗'}`);
	}
	}

	return {
	filename,
	filepath,
	value: evaluation.value,
	interpretation: evaluation.interpretation,
	moveCount,
	boardShape: boardShapeStr,
	cleanedTgn,
	commentValue,
	signMatch
	};

	} catch (error) {
	const errorMessage = error instanceof Error ? error.message : String(error);
	console.error(`✗ Error evaluating ${filename}: ${errorMessage}`);

	return {
	filename,
	filepath,
	value: 0,
	interpretation: "Error",
	moveCount: 0,
	boardShape: "Unknown",
	cleanedTgn: "",
	error: errorMessage
	};
	}
	}


	/**
	* Get list of TGN files to evaluate
	*/
	function getTgnFiles(inputPath: string): string[] {
	const stats = fs.statSync(inputPath);

	if (stats.isFile()) {
	// Single file
	if (!inputPath.endsWith('.tgn')) {
	console.error(`Error: Input file must be a .tgn file: ${inputPath}`);
	process.exit(1);
	}
	return [inputPath];
	} else if (stats.isDirectory()) {
	// Directory - get all .tgn files
	const files = fs.readdirSync(inputPath)
	.filter(file => file.endsWith('.tgn'))
	.map(file => path.join(inputPath, file));

	if (files.length === 0) {
	console.error(`Error: No .tgn files found in directory: ${inputPath}`);
	process.exit(1);
	}

	return files;
	} else {
	console.error(`Error: Invalid input path: ${inputPath}`);
	process.exit(1);
	}
	}


	/**
	* Main evaluation function
	*/
	async function evaluateDataset(config: EvaluationConfig): Promise<void> {
	console.log("=".repeat(80));
	console.log("TGN File Evaluation Tool");
	console.log("=".repeat(80));
	console.log(`Configuration:`);
	console.log(` Model: ${config.modelPath}`);
	console.log(` Input: ${config.inputPath}`);
	if (config.outputPath) {
	console.log(` Output: ${config.outputPath}`);
	}
	console.log(` Verbose: ${config.verbose}`);
	console.log();

	// Initialize TGN parser
	console.log("Initializing TGN parser...");
	await initializeParsers();
	console.log("✓ Parser initialized\n");

	// Get list of TGN files
	const tgnFiles = getTgnFiles(config.inputPath);
	console.log(`Found ${tgnFiles.length} TGN file(s) to evaluate\n`);

	// Initialize agent
	const agent = await initializeAgent(config);

	// Evaluate all files
	console.log("Evaluating files...");
	console.log("=".repeat(80));

	const results: TgnEvaluationResult[] = [];
	const startTime = Date.now();

	for (let i = 0; i < tgnFiles.length; i++) {
	const filepath = tgnFiles[i];
	const filename = path.basename(filepath);
	const progress = ((i + 1) / tgnFiles.length * 100).toFixed(1);

	// Evaluate file
	const result = await evaluateTgnFile(agent, filepath, config);
	results.push(result);

	// Progress update
	if (!config.verbose) {
	const statusIcon = result.error ? "✗" : "✓";
	const valueStr = result.error ? "ERROR" : result.value.toFixed(4);
	const commentStr = result.commentValue !== undefined
	? ` (comment: ${result.commentValue})`
	: "";
	const matchStr = result.signMatch !== undefined
	? (result.signMatch ? " ✓" : " ✗")
	: "";
	console.log(
	`[${progress}%] ${statusIcon} ${filename}: ${valueStr}${commentStr} - ${result.interpretation}${matchStr}`
	);
	}
	}

	const endTime = Date.now();
	const duration = endTime - startTime;

	// Calculate statistics
	const successfulResults = results.filter(r => !r.error);
	const failedResults = results.filter(r => r.error);
	const averageValue = successfulResults.length > 0
	? successfulResults.reduce((sum, r) => sum + r.value, 0) / successfulResults.length
	: 0;

	// Calculate sign accuracy
	const resultsWithComments = successfulResults.filter(r => r.signMatch !== undefined);
	const correctPredictions = resultsWithComments.filter(r => r.signMatch === true).length;
	const accuracy = resultsWithComments.length > 0
	? (correctPredictions / resultsWithComments.length) * 100
	: 0;

	// Print summary
	console.log("=".repeat(80));
	console.log("Evaluation Complete!");
	console.log("=".repeat(80));
	console.log(`Total files: ${results.length}`);
	console.log(`Successful: ${successfulResults.length}`);
	console.log(`Failed: ${failedResults.length}`);
	console.log(`Average value: ${averageValue.toFixed(4)}`);
	console.log(`Total time: ${(duration / 1000).toFixed(1)}s`);
	console.log(`Average time per file: ${(duration / results.length).toFixed(0)}ms`);

	// Sign accuracy statistics
	if (resultsWithComments.length > 0) {
	console.log(`\nSign Accuracy:`);
	console.log(` Files with comments: ${resultsWithComments.length}`);
	console.log(` Correct predictions: ${correctPredictions}`);
	console.log(` Accuracy: ${accuracy.toFixed(1)}%`);
	}

	// Value distribution
	if (successfulResults.length > 0) {
	const blackAdvantage = successfulResults.filter(r => r.value > 0.1).length;
	const balanced = successfulResults.filter(r => r.value >= -0.1 && r.value <= 0.1).length;
	const whiteAdvantage = successfulResults.filter(r => r.value < -0.1).length;

	console.log(`\nValue Distribution:`);
	console.log(` Black advantage (>0.1): ${blackAdvantage} (${(blackAdvantage / successfulResults.length * 100).toFixed(1)}%)`);
	console.log(` Balanced (-0.1 to 0.1): ${balanced} (${(balanced / successfulResults.length * 100).toFixed(1)}%)`);
	console.log(` White advantage (<-0.1): ${whiteAdvantage} (${(whiteAdvantage / successfulResults.length * 100).toFixed(1)}%)`);
	}

	// Save results to JSON if output path provided
	if (config.outputPath) {
	const outputData = {
	evaluationTime: new Date().toISOString(),
	modelPath: config.modelPath,
	inputPath: config.inputPath,
	totalFiles: results.length,
	successful: successfulResults.length,
	failed: failedResults.length,
	averageValue,
	durationMs: duration,
	signAccuracy: {
	filesWithComments: resultsWithComments.length,
	correctPredictions,
	accuracy: accuracy.toFixed(1) + "%"
	},
	results: results.map(r => ({
	filename: r.filename,
	filepath: r.filepath,
	value: r.value,
	interpretation: r.interpretation,
	moveCount: r.moveCount,
	boardShape: r.boardShape,
	commentValue: r.commentValue,
	signMatch: r.signMatch,
	error: r.error
	}))
	};

	fs.writeFileSync(config.outputPath, JSON.stringify(outputData, null, 2), "utf-8");
	console.log(`\n✓ Results saved to: ${config.outputPath}`);
	}

	console.log("=".repeat(80));
	}


	/**
	* Main function
	*/
	async function main() {
	try {
	const config = parseArgs();
	await evaluateDataset(config);
	} catch (error) {
	console.error("Error:", error);
	process.exit(1);
	}
	}


	// Run main function
	main();