trigo / trigo-web /tools /evaluateTgnFiles.ts
k-l-lambda's picture
Update trigo-web with VS People multiplayer mode
15f353f
/**
* TGN File Evaluation Tool
*
* This script evaluates TGN files using the value prediction model.
* It cleans TGN content by removing trailing comments, Pass moves, and spaces,
* then evaluates the position value using TrigoEvaluationAgent.
*
* Features:
* - Batch evaluation of all .tgn files in a directory
* - TGN cleaning (remove trailing comments, Pass moves, trim spaces)
* - Position value prediction in range [-1, 1]
* - Output results to console and optional JSON file
*
* Usage:
* npx tsx tools/evaluateTgnFiles.ts [options]
*
* Options:
* --model <path> Path to evaluation mode ONNX model (default: from ONNX_EVALUATION_MODEL env var)
* --input <path> Directory containing TGN files or single TGN file (default: ./tools/output/selfplay)
* --output <path> Optional JSON output file for results
* --verbose Enable verbose logging
* --help Show this help message
*
* Examples:
* # Evaluate all TGN files in directory (uses model from .env)
* npx tsx tools/evaluateTgnFiles.ts --input ./tools/output/selfplay
*
* # Evaluate with specific model
* npx tsx tools/evaluateTgnFiles.ts --model ./public/onnx/model.onnx --input ./tools/output/selfplay
*
* # Save results to JSON file
* npx tsx tools/evaluateTgnFiles.ts --model ./public/onnx/model.onnx --input ./games --output ./results.json
*/
import * as ort from "onnxruntime-node";
import * as path from "path";
import * as fs from "fs";
import { fileURLToPath } from "url";
import { TrigoGame } from "../inc/trigo/game";
import { ModelInferencer } from "../inc/modelInferencer";
import { TrigoEvaluationAgent } from "../inc/trigoEvaluationAgent";
import { initializeParsers } from "../inc/trigo/parserInit";
import { loadEnvConfig, getOnnxModelPaths, getAbsoluteModelPath, getOnnxSessionOptions } from "../inc/config";
// ES module equivalent of __dirname
const __filename = fileURLToPath(import.meta.url);
const __dirname = path.dirname(__filename);
// Load environment variables
await loadEnvConfig();
// Default model paths from environment
const defaultModelPaths = getOnnxModelPaths();
// Configuration
interface EvaluationConfig {
modelPath: string;
inputPath: string;
outputPath?: string;
verbose: boolean;
vocabSize: number;
seqLen: number;
}
// Evaluation result for a single TGN file
interface TgnEvaluationResult {
filename: string;
filepath: string;
value: number;
interpretation: string;
moveCount: number;
boardShape: string;
cleanedTgn: string;
commentValue?: number; // Value from tail comment
signMatch?: boolean; // Whether model and comment signs match
error?: string;
}
/**
* Parse command line arguments
*/
function parseArgs(): EvaluationConfig {
const args = process.argv.slice(2);
const config: EvaluationConfig = {
modelPath: getAbsoluteModelPath(defaultModelPaths.evaluationModel),
inputPath: path.join(__dirname, "output/selfplay"),
verbose: false,
vocabSize: 128,
seqLen: 256
};
for (let i = 0; i < args.length; i++) {
switch (args[i]) {
case "--model":
config.modelPath = args[++i];
break;
case "--input":
config.inputPath = args[++i];
break;
case "--output":
config.outputPath = args[++i];
break;
case "--verbose":
config.verbose = true;
break;
case "--help":
printHelp();
process.exit(0);
default:
if (args[i].startsWith("--")) {
console.error(`Unknown option: ${args[i]}`);
printHelp();
process.exit(1);
}
}
}
// Validate model path (now optional since we have default)
if (!config.modelPath) {
console.error("Error: --model argument is required or set ONNX_EVALUATION_MODEL env variable");
printHelp();
process.exit(1);
}
if (!fs.existsSync(config.modelPath)) {
console.error(`Error: Model file not found: ${config.modelPath}`);
process.exit(1);
}
if (!fs.existsSync(config.inputPath)) {
console.error(`Error: Input path not found: ${config.inputPath}`);
process.exit(1);
}
return config;
}
/**
* Print help message
*/
function printHelp(): void {
console.log(`
Usage: npx tsx tools/evaluateTgnFiles.ts [options]
Options:
--model <path> Path to evaluation mode ONNX model (default: from ONNX_EVALUATION_MODEL env var)
--input <path> Directory containing TGN files or single TGN file (default: ./tools/output/selfplay)
--output <path> Optional JSON output file for results
--verbose Enable verbose logging
--help Show this help message
Examples:
# Evaluate all TGN files in directory (uses model from .env)
npx tsx tools/evaluateTgnFiles.ts --input ./tools/output/selfplay
# Evaluate with specific model
npx tsx tools/evaluateTgnFiles.ts --model ./public/onnx/model.onnx --input ./tools/output/selfplay
# Save results to JSON file
npx tsx tools/evaluateTgnFiles.ts --model ./public/onnx/model.onnx --input ./games --output ./results.json
`);
}
/**
* Extract numeric value from tail comment before cleaning
* Returns undefined if no valid numeric comment found
*/
function extractCommentValue(tgnContent: string): number | undefined {
const lines = tgnContent.split('\n');
// Look for comment lines at the end (lines starting with ';')
for (let i = lines.length - 1; i >= 0; i--) {
const line = lines[i].trim();
// Skip empty lines
if (line === '') continue;
// Check if it's a comment line
if (line.startsWith(';')) {
// Extract numeric value from comment
// Format: "; -16" or "; 10" etc.
const match = line.match(/;\s*([+-]?\d+(?:\.\d+)?)/);
if (match) {
return parseFloat(match[1]);
}
} else {
// Stop at first non-empty, non-comment line
break;
}
}
return undefined;
}
/**
* Clean TGN content by removing trailing comments, Pass moves, and spaces
* Matches the behavior of test_evaluation_mode.js
*/
function cleanTGN(tgnContent: string): string {
let lines = tgnContent.split('\n');
// Remove trailing empty lines and comments (lines starting with ';')
while (lines.length > 0) {
const lastLine = lines[lines.length - 1].trim();
if (lastLine === '' || lastLine.startsWith(';')) {
lines.pop();
} else {
break;
}
}
// Remove trailing Pass moves
while (lines.length > 0) {
const lastLine = lines[lines.length - 1].trim();
// Check for Pass move: "P" or ending with " P"
if (lastLine === 'P' || lastLine.endsWith(' P')) {
lines.pop();
} else {
break;
}
}
// Join lines and trim trailing spaces
return lines.join('\n').trim();
}
/**
* Initialize the evaluation agent
*/
async function initializeAgent(config: EvaluationConfig): Promise<TrigoEvaluationAgent> {
console.log("Initializing Evaluation Agent...");
console.log(` Model: ${config.modelPath}`);
console.log(` Vocab Size: ${config.vocabSize}`);
console.log(` Sequence Length: ${config.seqLen}`);
const sessionOptions = getOnnxSessionOptions();
const session = await ort.InferenceSession.create(config.modelPath, sessionOptions);
const inferencer = new ModelInferencer(ort.Tensor as any, {
vocabSize: config.vocabSize,
seqLen: config.seqLen,
modelPath: config.modelPath
});
inferencer.setSession(session as any);
const agent = new TrigoEvaluationAgent(inferencer);
console.log("✓ Agent initialized\n");
return agent;
}
/**
* Evaluate a single TGN file
*/
async function evaluateTgnFile(
agent: TrigoEvaluationAgent,
filepath: string,
config: EvaluationConfig
): Promise<TgnEvaluationResult> {
const filename = path.basename(filepath);
try {
// Read TGN file
const rawContent = fs.readFileSync(filepath, "utf-8");
// Extract comment value BEFORE cleaning
const commentValue = extractCommentValue(rawContent);
// Clean TGN content
const cleanedTgn = cleanTGN(rawContent);
if (config.verbose) {
console.log(`\nProcessing: ${filename}`);
console.log(` Raw length: ${rawContent.length} chars`);
console.log(` Cleaned length: ${cleanedTgn.length} chars`);
if (commentValue !== undefined) {
console.log(` Comment value: ${commentValue}`);
}
}
// Parse TGN to create game
const game = TrigoGame.fromTGN(cleanedTgn);
if (config.verbose) {
console.log(` Game type: ${typeof game}`);
console.log(` Is TrigoGame: ${game instanceof TrigoGame}`);
console.log(` Has getHistory: ${typeof game.getHistory}`);
}
// Get board info
const boardShape = game.getShape();
const boardShapeStr = `${boardShape.x}×${boardShape.y}×${boardShape.z}`;
const moveCount = game.getHistory().length;
if (config.verbose) {
console.log(` Board: ${boardShapeStr}`);
console.log(` Moves: ${moveCount}`);
}
// Evaluate position
const evaluation = await agent.evaluatePosition(game);
// Compare signs if comment value exists
let signMatch: boolean | undefined = undefined;
if (commentValue !== undefined) {
// Note: There appears to be a sign inconsistency between model output and training data
const modelSign = Math.sign(evaluation.value);
const commentSign = Math.sign(commentValue);
signMatch = modelSign === commentSign;
}
if (config.verbose) {
console.log(` Value: ${evaluation.value.toFixed(4)}`);
console.log(` ${evaluation.interpretation}`);
if (commentValue !== undefined && signMatch !== undefined) {
console.log(` Comment: ${commentValue}, Sign match: ${signMatch ? '✓' : '✗'}`);
}
}
return {
filename,
filepath,
value: evaluation.value,
interpretation: evaluation.interpretation,
moveCount,
boardShape: boardShapeStr,
cleanedTgn,
commentValue,
signMatch
};
} catch (error) {
const errorMessage = error instanceof Error ? error.message : String(error);
console.error(`✗ Error evaluating ${filename}: ${errorMessage}`);
return {
filename,
filepath,
value: 0,
interpretation: "Error",
moveCount: 0,
boardShape: "Unknown",
cleanedTgn: "",
error: errorMessage
};
}
}
/**
* Get list of TGN files to evaluate
*/
function getTgnFiles(inputPath: string): string[] {
const stats = fs.statSync(inputPath);
if (stats.isFile()) {
// Single file
if (!inputPath.endsWith('.tgn')) {
console.error(`Error: Input file must be a .tgn file: ${inputPath}`);
process.exit(1);
}
return [inputPath];
} else if (stats.isDirectory()) {
// Directory - get all .tgn files
const files = fs.readdirSync(inputPath)
.filter(file => file.endsWith('.tgn'))
.map(file => path.join(inputPath, file));
if (files.length === 0) {
console.error(`Error: No .tgn files found in directory: ${inputPath}`);
process.exit(1);
}
return files;
} else {
console.error(`Error: Invalid input path: ${inputPath}`);
process.exit(1);
}
}
/**
* Main evaluation function
*/
async function evaluateDataset(config: EvaluationConfig): Promise<void> {
console.log("=".repeat(80));
console.log("TGN File Evaluation Tool");
console.log("=".repeat(80));
console.log(`Configuration:`);
console.log(` Model: ${config.modelPath}`);
console.log(` Input: ${config.inputPath}`);
if (config.outputPath) {
console.log(` Output: ${config.outputPath}`);
}
console.log(` Verbose: ${config.verbose}`);
console.log();
// Initialize TGN parser
console.log("Initializing TGN parser...");
await initializeParsers();
console.log("✓ Parser initialized\n");
// Get list of TGN files
const tgnFiles = getTgnFiles(config.inputPath);
console.log(`Found ${tgnFiles.length} TGN file(s) to evaluate\n`);
// Initialize agent
const agent = await initializeAgent(config);
// Evaluate all files
console.log("Evaluating files...");
console.log("=".repeat(80));
const results: TgnEvaluationResult[] = [];
const startTime = Date.now();
for (let i = 0; i < tgnFiles.length; i++) {
const filepath = tgnFiles[i];
const filename = path.basename(filepath);
const progress = ((i + 1) / tgnFiles.length * 100).toFixed(1);
// Evaluate file
const result = await evaluateTgnFile(agent, filepath, config);
results.push(result);
// Progress update
if (!config.verbose) {
const statusIcon = result.error ? "✗" : "✓";
const valueStr = result.error ? "ERROR" : result.value.toFixed(4);
const commentStr = result.commentValue !== undefined
? ` (comment: ${result.commentValue})`
: "";
const matchStr = result.signMatch !== undefined
? (result.signMatch ? " ✓" : " ✗")
: "";
console.log(
`[${progress}%] ${statusIcon} ${filename}: ${valueStr}${commentStr} - ${result.interpretation}${matchStr}`
);
}
}
const endTime = Date.now();
const duration = endTime - startTime;
// Calculate statistics
const successfulResults = results.filter(r => !r.error);
const failedResults = results.filter(r => r.error);
const averageValue = successfulResults.length > 0
? successfulResults.reduce((sum, r) => sum + r.value, 0) / successfulResults.length
: 0;
// Calculate sign accuracy
const resultsWithComments = successfulResults.filter(r => r.signMatch !== undefined);
const correctPredictions = resultsWithComments.filter(r => r.signMatch === true).length;
const accuracy = resultsWithComments.length > 0
? (correctPredictions / resultsWithComments.length) * 100
: 0;
// Print summary
console.log("=".repeat(80));
console.log("Evaluation Complete!");
console.log("=".repeat(80));
console.log(`Total files: ${results.length}`);
console.log(`Successful: ${successfulResults.length}`);
console.log(`Failed: ${failedResults.length}`);
console.log(`Average value: ${averageValue.toFixed(4)}`);
console.log(`Total time: ${(duration / 1000).toFixed(1)}s`);
console.log(`Average time per file: ${(duration / results.length).toFixed(0)}ms`);
// Sign accuracy statistics
if (resultsWithComments.length > 0) {
console.log(`\nSign Accuracy:`);
console.log(` Files with comments: ${resultsWithComments.length}`);
console.log(` Correct predictions: ${correctPredictions}`);
console.log(` Accuracy: ${accuracy.toFixed(1)}%`);
}
// Value distribution
if (successfulResults.length > 0) {
const blackAdvantage = successfulResults.filter(r => r.value > 0.1).length;
const balanced = successfulResults.filter(r => r.value >= -0.1 && r.value <= 0.1).length;
const whiteAdvantage = successfulResults.filter(r => r.value < -0.1).length;
console.log(`\nValue Distribution:`);
console.log(` Black advantage (>0.1): ${blackAdvantage} (${(blackAdvantage / successfulResults.length * 100).toFixed(1)}%)`);
console.log(` Balanced (-0.1 to 0.1): ${balanced} (${(balanced / successfulResults.length * 100).toFixed(1)}%)`);
console.log(` White advantage (<-0.1): ${whiteAdvantage} (${(whiteAdvantage / successfulResults.length * 100).toFixed(1)}%)`);
}
// Save results to JSON if output path provided
if (config.outputPath) {
const outputData = {
evaluationTime: new Date().toISOString(),
modelPath: config.modelPath,
inputPath: config.inputPath,
totalFiles: results.length,
successful: successfulResults.length,
failed: failedResults.length,
averageValue,
durationMs: duration,
signAccuracy: {
filesWithComments: resultsWithComments.length,
correctPredictions,
accuracy: accuracy.toFixed(1) + "%"
},
results: results.map(r => ({
filename: r.filename,
filepath: r.filepath,
value: r.value,
interpretation: r.interpretation,
moveCount: r.moveCount,
boardShape: r.boardShape,
commentValue: r.commentValue,
signMatch: r.signMatch,
error: r.error
}))
};
fs.writeFileSync(config.outputPath, JSON.stringify(outputData, null, 2), "utf-8");
console.log(`\n✓ Results saved to: ${config.outputPath}`);
}
console.log("=".repeat(80));
}
/**
* Main function
*/
async function main() {
try {
const config = parseArgs();
await evaluateDataset(config);
} catch (error) {
console.error("Error:", error);
process.exit(1);
}
}
// Run main function
main();