/** * TGN File Evaluation Tool * * This script evaluates TGN files using the value prediction model. * It cleans TGN content by removing trailing comments, Pass moves, and spaces, * then evaluates the position value using TrigoEvaluationAgent. * * Features: * - Batch evaluation of all .tgn files in a directory * - TGN cleaning (remove trailing comments, Pass moves, trim spaces) * - Position value prediction in range [-1, 1] * - Output results to console and optional JSON file * * Usage: * npx tsx tools/evaluateTgnFiles.ts [options] * * Options: * --model Path to evaluation mode ONNX model (default: from ONNX_EVALUATION_MODEL env var) * --input Directory containing TGN files or single TGN file (default: ./tools/output/selfplay) * --output Optional JSON output file for results * --verbose Enable verbose logging * --help Show this help message * * Examples: * # Evaluate all TGN files in directory (uses model from .env) * npx tsx tools/evaluateTgnFiles.ts --input ./tools/output/selfplay * * # Evaluate with specific model * npx tsx tools/evaluateTgnFiles.ts --model ./public/onnx/model.onnx --input ./tools/output/selfplay * * # Save results to JSON file * npx tsx tools/evaluateTgnFiles.ts --model ./public/onnx/model.onnx --input ./games --output ./results.json */ import * as ort from "onnxruntime-node"; import * as path from "path"; import * as fs from "fs"; import { fileURLToPath } from "url"; import { TrigoGame } from "../inc/trigo/game"; import { ModelInferencer } from "../inc/modelInferencer"; import { TrigoEvaluationAgent } from "../inc/trigoEvaluationAgent"; import { initializeParsers } from "../inc/trigo/parserInit"; import { loadEnvConfig, getOnnxModelPaths, getAbsoluteModelPath, getOnnxSessionOptions } from "../inc/config"; // ES module equivalent of __dirname const __filename = fileURLToPath(import.meta.url); const __dirname = path.dirname(__filename); // Load environment variables await loadEnvConfig(); // Default model paths from environment const defaultModelPaths = getOnnxModelPaths(); // Configuration interface EvaluationConfig { modelPath: string; inputPath: string; outputPath?: string; verbose: boolean; vocabSize: number; seqLen: number; } // Evaluation result for a single TGN file interface TgnEvaluationResult { filename: string; filepath: string; value: number; interpretation: string; moveCount: number; boardShape: string; cleanedTgn: string; commentValue?: number; // Value from tail comment signMatch?: boolean; // Whether model and comment signs match error?: string; } /** * Parse command line arguments */ function parseArgs(): EvaluationConfig { const args = process.argv.slice(2); const config: EvaluationConfig = { modelPath: getAbsoluteModelPath(defaultModelPaths.evaluationModel), inputPath: path.join(__dirname, "output/selfplay"), verbose: false, vocabSize: 128, seqLen: 256 }; for (let i = 0; i < args.length; i++) { switch (args[i]) { case "--model": config.modelPath = args[++i]; break; case "--input": config.inputPath = args[++i]; break; case "--output": config.outputPath = args[++i]; break; case "--verbose": config.verbose = true; break; case "--help": printHelp(); process.exit(0); default: if (args[i].startsWith("--")) { console.error(`Unknown option: ${args[i]}`); printHelp(); process.exit(1); } } } // Validate model path (now optional since we have default) if (!config.modelPath) { console.error("Error: --model argument is required or set ONNX_EVALUATION_MODEL env variable"); printHelp(); process.exit(1); } if (!fs.existsSync(config.modelPath)) { console.error(`Error: Model file not found: ${config.modelPath}`); process.exit(1); } if (!fs.existsSync(config.inputPath)) { console.error(`Error: Input path not found: ${config.inputPath}`); process.exit(1); } return config; } /** * Print help message */ function printHelp(): void { console.log(` Usage: npx tsx tools/evaluateTgnFiles.ts [options] Options: --model Path to evaluation mode ONNX model (default: from ONNX_EVALUATION_MODEL env var) --input Directory containing TGN files or single TGN file (default: ./tools/output/selfplay) --output Optional JSON output file for results --verbose Enable verbose logging --help Show this help message Examples: # Evaluate all TGN files in directory (uses model from .env) npx tsx tools/evaluateTgnFiles.ts --input ./tools/output/selfplay # Evaluate with specific model npx tsx tools/evaluateTgnFiles.ts --model ./public/onnx/model.onnx --input ./tools/output/selfplay # Save results to JSON file npx tsx tools/evaluateTgnFiles.ts --model ./public/onnx/model.onnx --input ./games --output ./results.json `); } /** * Extract numeric value from tail comment before cleaning * Returns undefined if no valid numeric comment found */ function extractCommentValue(tgnContent: string): number | undefined { const lines = tgnContent.split('\n'); // Look for comment lines at the end (lines starting with ';') for (let i = lines.length - 1; i >= 0; i--) { const line = lines[i].trim(); // Skip empty lines if (line === '') continue; // Check if it's a comment line if (line.startsWith(';')) { // Extract numeric value from comment // Format: "; -16" or "; 10" etc. const match = line.match(/;\s*([+-]?\d+(?:\.\d+)?)/); if (match) { return parseFloat(match[1]); } } else { // Stop at first non-empty, non-comment line break; } } return undefined; } /** * Clean TGN content by removing trailing comments, Pass moves, and spaces * Matches the behavior of test_evaluation_mode.js */ function cleanTGN(tgnContent: string): string { let lines = tgnContent.split('\n'); // Remove trailing empty lines and comments (lines starting with ';') while (lines.length > 0) { const lastLine = lines[lines.length - 1].trim(); if (lastLine === '' || lastLine.startsWith(';')) { lines.pop(); } else { break; } } // Remove trailing Pass moves while (lines.length > 0) { const lastLine = lines[lines.length - 1].trim(); // Check for Pass move: "P" or ending with " P" if (lastLine === 'P' || lastLine.endsWith(' P')) { lines.pop(); } else { break; } } // Join lines and trim trailing spaces return lines.join('\n').trim(); } /** * Initialize the evaluation agent */ async function initializeAgent(config: EvaluationConfig): Promise { console.log("Initializing Evaluation Agent..."); console.log(` Model: ${config.modelPath}`); console.log(` Vocab Size: ${config.vocabSize}`); console.log(` Sequence Length: ${config.seqLen}`); const sessionOptions = getOnnxSessionOptions(); const session = await ort.InferenceSession.create(config.modelPath, sessionOptions); const inferencer = new ModelInferencer(ort.Tensor as any, { vocabSize: config.vocabSize, seqLen: config.seqLen, modelPath: config.modelPath }); inferencer.setSession(session as any); const agent = new TrigoEvaluationAgent(inferencer); console.log("✓ Agent initialized\n"); return agent; } /** * Evaluate a single TGN file */ async function evaluateTgnFile( agent: TrigoEvaluationAgent, filepath: string, config: EvaluationConfig ): Promise { const filename = path.basename(filepath); try { // Read TGN file const rawContent = fs.readFileSync(filepath, "utf-8"); // Extract comment value BEFORE cleaning const commentValue = extractCommentValue(rawContent); // Clean TGN content const cleanedTgn = cleanTGN(rawContent); if (config.verbose) { console.log(`\nProcessing: ${filename}`); console.log(` Raw length: ${rawContent.length} chars`); console.log(` Cleaned length: ${cleanedTgn.length} chars`); if (commentValue !== undefined) { console.log(` Comment value: ${commentValue}`); } } // Parse TGN to create game const game = TrigoGame.fromTGN(cleanedTgn); if (config.verbose) { console.log(` Game type: ${typeof game}`); console.log(` Is TrigoGame: ${game instanceof TrigoGame}`); console.log(` Has getHistory: ${typeof game.getHistory}`); } // Get board info const boardShape = game.getShape(); const boardShapeStr = `${boardShape.x}×${boardShape.y}×${boardShape.z}`; const moveCount = game.getHistory().length; if (config.verbose) { console.log(` Board: ${boardShapeStr}`); console.log(` Moves: ${moveCount}`); } // Evaluate position const evaluation = await agent.evaluatePosition(game); // Compare signs if comment value exists let signMatch: boolean | undefined = undefined; if (commentValue !== undefined) { // Note: There appears to be a sign inconsistency between model output and training data const modelSign = Math.sign(evaluation.value); const commentSign = Math.sign(commentValue); signMatch = modelSign === commentSign; } if (config.verbose) { console.log(` Value: ${evaluation.value.toFixed(4)}`); console.log(` ${evaluation.interpretation}`); if (commentValue !== undefined && signMatch !== undefined) { console.log(` Comment: ${commentValue}, Sign match: ${signMatch ? '✓' : '✗'}`); } } return { filename, filepath, value: evaluation.value, interpretation: evaluation.interpretation, moveCount, boardShape: boardShapeStr, cleanedTgn, commentValue, signMatch }; } catch (error) { const errorMessage = error instanceof Error ? error.message : String(error); console.error(`✗ Error evaluating ${filename}: ${errorMessage}`); return { filename, filepath, value: 0, interpretation: "Error", moveCount: 0, boardShape: "Unknown", cleanedTgn: "", error: errorMessage }; } } /** * Get list of TGN files to evaluate */ function getTgnFiles(inputPath: string): string[] { const stats = fs.statSync(inputPath); if (stats.isFile()) { // Single file if (!inputPath.endsWith('.tgn')) { console.error(`Error: Input file must be a .tgn file: ${inputPath}`); process.exit(1); } return [inputPath]; } else if (stats.isDirectory()) { // Directory - get all .tgn files const files = fs.readdirSync(inputPath) .filter(file => file.endsWith('.tgn')) .map(file => path.join(inputPath, file)); if (files.length === 0) { console.error(`Error: No .tgn files found in directory: ${inputPath}`); process.exit(1); } return files; } else { console.error(`Error: Invalid input path: ${inputPath}`); process.exit(1); } } /** * Main evaluation function */ async function evaluateDataset(config: EvaluationConfig): Promise { console.log("=".repeat(80)); console.log("TGN File Evaluation Tool"); console.log("=".repeat(80)); console.log(`Configuration:`); console.log(` Model: ${config.modelPath}`); console.log(` Input: ${config.inputPath}`); if (config.outputPath) { console.log(` Output: ${config.outputPath}`); } console.log(` Verbose: ${config.verbose}`); console.log(); // Initialize TGN parser console.log("Initializing TGN parser..."); await initializeParsers(); console.log("✓ Parser initialized\n"); // Get list of TGN files const tgnFiles = getTgnFiles(config.inputPath); console.log(`Found ${tgnFiles.length} TGN file(s) to evaluate\n`); // Initialize agent const agent = await initializeAgent(config); // Evaluate all files console.log("Evaluating files..."); console.log("=".repeat(80)); const results: TgnEvaluationResult[] = []; const startTime = Date.now(); for (let i = 0; i < tgnFiles.length; i++) { const filepath = tgnFiles[i]; const filename = path.basename(filepath); const progress = ((i + 1) / tgnFiles.length * 100).toFixed(1); // Evaluate file const result = await evaluateTgnFile(agent, filepath, config); results.push(result); // Progress update if (!config.verbose) { const statusIcon = result.error ? "✗" : "✓"; const valueStr = result.error ? "ERROR" : result.value.toFixed(4); const commentStr = result.commentValue !== undefined ? ` (comment: ${result.commentValue})` : ""; const matchStr = result.signMatch !== undefined ? (result.signMatch ? " ✓" : " ✗") : ""; console.log( `[${progress}%] ${statusIcon} ${filename}: ${valueStr}${commentStr} - ${result.interpretation}${matchStr}` ); } } const endTime = Date.now(); const duration = endTime - startTime; // Calculate statistics const successfulResults = results.filter(r => !r.error); const failedResults = results.filter(r => r.error); const averageValue = successfulResults.length > 0 ? successfulResults.reduce((sum, r) => sum + r.value, 0) / successfulResults.length : 0; // Calculate sign accuracy const resultsWithComments = successfulResults.filter(r => r.signMatch !== undefined); const correctPredictions = resultsWithComments.filter(r => r.signMatch === true).length; const accuracy = resultsWithComments.length > 0 ? (correctPredictions / resultsWithComments.length) * 100 : 0; // Print summary console.log("=".repeat(80)); console.log("Evaluation Complete!"); console.log("=".repeat(80)); console.log(`Total files: ${results.length}`); console.log(`Successful: ${successfulResults.length}`); console.log(`Failed: ${failedResults.length}`); console.log(`Average value: ${averageValue.toFixed(4)}`); console.log(`Total time: ${(duration / 1000).toFixed(1)}s`); console.log(`Average time per file: ${(duration / results.length).toFixed(0)}ms`); // Sign accuracy statistics if (resultsWithComments.length > 0) { console.log(`\nSign Accuracy:`); console.log(` Files with comments: ${resultsWithComments.length}`); console.log(` Correct predictions: ${correctPredictions}`); console.log(` Accuracy: ${accuracy.toFixed(1)}%`); } // Value distribution if (successfulResults.length > 0) { const blackAdvantage = successfulResults.filter(r => r.value > 0.1).length; const balanced = successfulResults.filter(r => r.value >= -0.1 && r.value <= 0.1).length; const whiteAdvantage = successfulResults.filter(r => r.value < -0.1).length; console.log(`\nValue Distribution:`); console.log(` Black advantage (>0.1): ${blackAdvantage} (${(blackAdvantage / successfulResults.length * 100).toFixed(1)}%)`); console.log(` Balanced (-0.1 to 0.1): ${balanced} (${(balanced / successfulResults.length * 100).toFixed(1)}%)`); console.log(` White advantage (<-0.1): ${whiteAdvantage} (${(whiteAdvantage / successfulResults.length * 100).toFixed(1)}%)`); } // Save results to JSON if output path provided if (config.outputPath) { const outputData = { evaluationTime: new Date().toISOString(), modelPath: config.modelPath, inputPath: config.inputPath, totalFiles: results.length, successful: successfulResults.length, failed: failedResults.length, averageValue, durationMs: duration, signAccuracy: { filesWithComments: resultsWithComments.length, correctPredictions, accuracy: accuracy.toFixed(1) + "%" }, results: results.map(r => ({ filename: r.filename, filepath: r.filepath, value: r.value, interpretation: r.interpretation, moveCount: r.moveCount, boardShape: r.boardShape, commentValue: r.commentValue, signMatch: r.signMatch, error: r.error })) }; fs.writeFileSync(config.outputPath, JSON.stringify(outputData, null, 2), "utf-8"); console.log(`\n✓ Results saved to: ${config.outputPath}`); } console.log("=".repeat(80)); } /** * Main function */ async function main() { try { const config = parseArgs(); await evaluateDataset(config); } catch (error) { console.error("Error:", error); process.exit(1); } } // Run main function main();