Spaces:
Sleeping
Sleeping
| /** | |
| * TGN File Evaluation Tool | |
| * | |
| * This script evaluates TGN files using the value prediction model. | |
| * It cleans TGN content by removing trailing comments, Pass moves, and spaces, | |
| * then evaluates the position value using TrigoEvaluationAgent. | |
| * | |
| * Features: | |
| * - Batch evaluation of all .tgn files in a directory | |
| * - TGN cleaning (remove trailing comments, Pass moves, trim spaces) | |
| * - Position value prediction in range [-1, 1] | |
| * - Output results to console and optional JSON file | |
| * | |
| * Usage: | |
| * npx tsx tools/evaluateTgnFiles.ts [options] | |
| * | |
| * Options: | |
| * --model <path> Path to evaluation mode ONNX model (default: from ONNX_EVALUATION_MODEL env var) | |
| * --input <path> Directory containing TGN files or single TGN file (default: ./tools/output/selfplay) | |
| * --output <path> Optional JSON output file for results | |
| * --verbose Enable verbose logging | |
| * --help Show this help message | |
| * | |
| * Examples: | |
| * # Evaluate all TGN files in directory (uses model from .env) | |
| * npx tsx tools/evaluateTgnFiles.ts --input ./tools/output/selfplay | |
| * | |
| * # Evaluate with specific model | |
| * npx tsx tools/evaluateTgnFiles.ts --model ./public/onnx/model.onnx --input ./tools/output/selfplay | |
| * | |
| * # Save results to JSON file | |
| * npx tsx tools/evaluateTgnFiles.ts --model ./public/onnx/model.onnx --input ./games --output ./results.json | |
| */ | |
| import * as ort from "onnxruntime-node"; | |
| import * as path from "path"; | |
| import * as fs from "fs"; | |
| import { fileURLToPath } from "url"; | |
| import { TrigoGame } from "../inc/trigo/game"; | |
| import { ModelInferencer } from "../inc/modelInferencer"; | |
| import { TrigoEvaluationAgent } from "../inc/trigoEvaluationAgent"; | |
| import { initializeParsers } from "../inc/trigo/parserInit"; | |
| import { loadEnvConfig, getOnnxModelPaths, getAbsoluteModelPath, getOnnxSessionOptions } from "../inc/config"; | |
| // ES module equivalent of __dirname | |
| const __filename = fileURLToPath(import.meta.url); | |
| const __dirname = path.dirname(__filename); | |
| // Load environment variables | |
| await loadEnvConfig(); | |
| // Default model paths from environment | |
| const defaultModelPaths = getOnnxModelPaths(); | |
| // Configuration | |
| interface EvaluationConfig { | |
| modelPath: string; | |
| inputPath: string; | |
| outputPath?: string; | |
| verbose: boolean; | |
| vocabSize: number; | |
| seqLen: number; | |
| } | |
| // Evaluation result for a single TGN file | |
| interface TgnEvaluationResult { | |
| filename: string; | |
| filepath: string; | |
| value: number; | |
| interpretation: string; | |
| moveCount: number; | |
| boardShape: string; | |
| cleanedTgn: string; | |
| commentValue?: number; // Value from tail comment | |
| signMatch?: boolean; // Whether model and comment signs match | |
| error?: string; | |
| } | |
| /** | |
| * Parse command line arguments | |
| */ | |
| function parseArgs(): EvaluationConfig { | |
| const args = process.argv.slice(2); | |
| const config: EvaluationConfig = { | |
| modelPath: getAbsoluteModelPath(defaultModelPaths.evaluationModel), | |
| inputPath: path.join(__dirname, "output/selfplay"), | |
| verbose: false, | |
| vocabSize: 128, | |
| seqLen: 256 | |
| }; | |
| for (let i = 0; i < args.length; i++) { | |
| switch (args[i]) { | |
| case "--model": | |
| config.modelPath = args[++i]; | |
| break; | |
| case "--input": | |
| config.inputPath = args[++i]; | |
| break; | |
| case "--output": | |
| config.outputPath = args[++i]; | |
| break; | |
| case "--verbose": | |
| config.verbose = true; | |
| break; | |
| case "--help": | |
| printHelp(); | |
| process.exit(0); | |
| default: | |
| if (args[i].startsWith("--")) { | |
| console.error(`Unknown option: ${args[i]}`); | |
| printHelp(); | |
| process.exit(1); | |
| } | |
| } | |
| } | |
| // Validate model path (now optional since we have default) | |
| if (!config.modelPath) { | |
| console.error("Error: --model argument is required or set ONNX_EVALUATION_MODEL env variable"); | |
| printHelp(); | |
| process.exit(1); | |
| } | |
| if (!fs.existsSync(config.modelPath)) { | |
| console.error(`Error: Model file not found: ${config.modelPath}`); | |
| process.exit(1); | |
| } | |
| if (!fs.existsSync(config.inputPath)) { | |
| console.error(`Error: Input path not found: ${config.inputPath}`); | |
| process.exit(1); | |
| } | |
| return config; | |
| } | |
| /** | |
| * Print help message | |
| */ | |
| function printHelp(): void { | |
| console.log(` | |
| Usage: npx tsx tools/evaluateTgnFiles.ts [options] | |
| Options: | |
| --model <path> Path to evaluation mode ONNX model (default: from ONNX_EVALUATION_MODEL env var) | |
| --input <path> Directory containing TGN files or single TGN file (default: ./tools/output/selfplay) | |
| --output <path> Optional JSON output file for results | |
| --verbose Enable verbose logging | |
| --help Show this help message | |
| Examples: | |
| # Evaluate all TGN files in directory (uses model from .env) | |
| npx tsx tools/evaluateTgnFiles.ts --input ./tools/output/selfplay | |
| # Evaluate with specific model | |
| npx tsx tools/evaluateTgnFiles.ts --model ./public/onnx/model.onnx --input ./tools/output/selfplay | |
| # Save results to JSON file | |
| npx tsx tools/evaluateTgnFiles.ts --model ./public/onnx/model.onnx --input ./games --output ./results.json | |
| `); | |
| } | |
| /** | |
| * Extract numeric value from tail comment before cleaning | |
| * Returns undefined if no valid numeric comment found | |
| */ | |
| function extractCommentValue(tgnContent: string): number | undefined { | |
| const lines = tgnContent.split('\n'); | |
| // Look for comment lines at the end (lines starting with ';') | |
| for (let i = lines.length - 1; i >= 0; i--) { | |
| const line = lines[i].trim(); | |
| // Skip empty lines | |
| if (line === '') continue; | |
| // Check if it's a comment line | |
| if (line.startsWith(';')) { | |
| // Extract numeric value from comment | |
| // Format: "; -16" or "; 10" etc. | |
| const match = line.match(/;\s*([+-]?\d+(?:\.\d+)?)/); | |
| if (match) { | |
| return parseFloat(match[1]); | |
| } | |
| } else { | |
| // Stop at first non-empty, non-comment line | |
| break; | |
| } | |
| } | |
| return undefined; | |
| } | |
| /** | |
| * Clean TGN content by removing trailing comments, Pass moves, and spaces | |
| * Matches the behavior of test_evaluation_mode.js | |
| */ | |
| function cleanTGN(tgnContent: string): string { | |
| let lines = tgnContent.split('\n'); | |
| // Remove trailing empty lines and comments (lines starting with ';') | |
| while (lines.length > 0) { | |
| const lastLine = lines[lines.length - 1].trim(); | |
| if (lastLine === '' || lastLine.startsWith(';')) { | |
| lines.pop(); | |
| } else { | |
| break; | |
| } | |
| } | |
| // Remove trailing Pass moves | |
| while (lines.length > 0) { | |
| const lastLine = lines[lines.length - 1].trim(); | |
| // Check for Pass move: "P" or ending with " P" | |
| if (lastLine === 'P' || lastLine.endsWith(' P')) { | |
| lines.pop(); | |
| } else { | |
| break; | |
| } | |
| } | |
| // Join lines and trim trailing spaces | |
| return lines.join('\n').trim(); | |
| } | |
| /** | |
| * Initialize the evaluation agent | |
| */ | |
| async function initializeAgent(config: EvaluationConfig): Promise<TrigoEvaluationAgent> { | |
| console.log("Initializing Evaluation Agent..."); | |
| console.log(` Model: ${config.modelPath}`); | |
| console.log(` Vocab Size: ${config.vocabSize}`); | |
| console.log(` Sequence Length: ${config.seqLen}`); | |
| const sessionOptions = getOnnxSessionOptions(); | |
| const session = await ort.InferenceSession.create(config.modelPath, sessionOptions); | |
| const inferencer = new ModelInferencer(ort.Tensor as any, { | |
| vocabSize: config.vocabSize, | |
| seqLen: config.seqLen, | |
| modelPath: config.modelPath | |
| }); | |
| inferencer.setSession(session as any); | |
| const agent = new TrigoEvaluationAgent(inferencer); | |
| console.log("✓ Agent initialized\n"); | |
| return agent; | |
| } | |
| /** | |
| * Evaluate a single TGN file | |
| */ | |
| async function evaluateTgnFile( | |
| agent: TrigoEvaluationAgent, | |
| filepath: string, | |
| config: EvaluationConfig | |
| ): Promise<TgnEvaluationResult> { | |
| const filename = path.basename(filepath); | |
| try { | |
| // Read TGN file | |
| const rawContent = fs.readFileSync(filepath, "utf-8"); | |
| // Extract comment value BEFORE cleaning | |
| const commentValue = extractCommentValue(rawContent); | |
| // Clean TGN content | |
| const cleanedTgn = cleanTGN(rawContent); | |
| if (config.verbose) { | |
| console.log(`\nProcessing: ${filename}`); | |
| console.log(` Raw length: ${rawContent.length} chars`); | |
| console.log(` Cleaned length: ${cleanedTgn.length} chars`); | |
| if (commentValue !== undefined) { | |
| console.log(` Comment value: ${commentValue}`); | |
| } | |
| } | |
| // Parse TGN to create game | |
| const game = TrigoGame.fromTGN(cleanedTgn); | |
| if (config.verbose) { | |
| console.log(` Game type: ${typeof game}`); | |
| console.log(` Is TrigoGame: ${game instanceof TrigoGame}`); | |
| console.log(` Has getHistory: ${typeof game.getHistory}`); | |
| } | |
| // Get board info | |
| const boardShape = game.getShape(); | |
| const boardShapeStr = `${boardShape.x}×${boardShape.y}×${boardShape.z}`; | |
| const moveCount = game.getHistory().length; | |
| if (config.verbose) { | |
| console.log(` Board: ${boardShapeStr}`); | |
| console.log(` Moves: ${moveCount}`); | |
| } | |
| // Evaluate position | |
| const evaluation = await agent.evaluatePosition(game); | |
| // Compare signs if comment value exists | |
| let signMatch: boolean | undefined = undefined; | |
| if (commentValue !== undefined) { | |
| // Note: There appears to be a sign inconsistency between model output and training data | |
| const modelSign = Math.sign(evaluation.value); | |
| const commentSign = Math.sign(commentValue); | |
| signMatch = modelSign === commentSign; | |
| } | |
| if (config.verbose) { | |
| console.log(` Value: ${evaluation.value.toFixed(4)}`); | |
| console.log(` ${evaluation.interpretation}`); | |
| if (commentValue !== undefined && signMatch !== undefined) { | |
| console.log(` Comment: ${commentValue}, Sign match: ${signMatch ? '✓' : '✗'}`); | |
| } | |
| } | |
| return { | |
| filename, | |
| filepath, | |
| value: evaluation.value, | |
| interpretation: evaluation.interpretation, | |
| moveCount, | |
| boardShape: boardShapeStr, | |
| cleanedTgn, | |
| commentValue, | |
| signMatch | |
| }; | |
| } catch (error) { | |
| const errorMessage = error instanceof Error ? error.message : String(error); | |
| console.error(`✗ Error evaluating ${filename}: ${errorMessage}`); | |
| return { | |
| filename, | |
| filepath, | |
| value: 0, | |
| interpretation: "Error", | |
| moveCount: 0, | |
| boardShape: "Unknown", | |
| cleanedTgn: "", | |
| error: errorMessage | |
| }; | |
| } | |
| } | |
| /** | |
| * Get list of TGN files to evaluate | |
| */ | |
| function getTgnFiles(inputPath: string): string[] { | |
| const stats = fs.statSync(inputPath); | |
| if (stats.isFile()) { | |
| // Single file | |
| if (!inputPath.endsWith('.tgn')) { | |
| console.error(`Error: Input file must be a .tgn file: ${inputPath}`); | |
| process.exit(1); | |
| } | |
| return [inputPath]; | |
| } else if (stats.isDirectory()) { | |
| // Directory - get all .tgn files | |
| const files = fs.readdirSync(inputPath) | |
| .filter(file => file.endsWith('.tgn')) | |
| .map(file => path.join(inputPath, file)); | |
| if (files.length === 0) { | |
| console.error(`Error: No .tgn files found in directory: ${inputPath}`); | |
| process.exit(1); | |
| } | |
| return files; | |
| } else { | |
| console.error(`Error: Invalid input path: ${inputPath}`); | |
| process.exit(1); | |
| } | |
| } | |
| /** | |
| * Main evaluation function | |
| */ | |
| async function evaluateDataset(config: EvaluationConfig): Promise<void> { | |
| console.log("=".repeat(80)); | |
| console.log("TGN File Evaluation Tool"); | |
| console.log("=".repeat(80)); | |
| console.log(`Configuration:`); | |
| console.log(` Model: ${config.modelPath}`); | |
| console.log(` Input: ${config.inputPath}`); | |
| if (config.outputPath) { | |
| console.log(` Output: ${config.outputPath}`); | |
| } | |
| console.log(` Verbose: ${config.verbose}`); | |
| console.log(); | |
| // Initialize TGN parser | |
| console.log("Initializing TGN parser..."); | |
| await initializeParsers(); | |
| console.log("✓ Parser initialized\n"); | |
| // Get list of TGN files | |
| const tgnFiles = getTgnFiles(config.inputPath); | |
| console.log(`Found ${tgnFiles.length} TGN file(s) to evaluate\n`); | |
| // Initialize agent | |
| const agent = await initializeAgent(config); | |
| // Evaluate all files | |
| console.log("Evaluating files..."); | |
| console.log("=".repeat(80)); | |
| const results: TgnEvaluationResult[] = []; | |
| const startTime = Date.now(); | |
| for (let i = 0; i < tgnFiles.length; i++) { | |
| const filepath = tgnFiles[i]; | |
| const filename = path.basename(filepath); | |
| const progress = ((i + 1) / tgnFiles.length * 100).toFixed(1); | |
| // Evaluate file | |
| const result = await evaluateTgnFile(agent, filepath, config); | |
| results.push(result); | |
| // Progress update | |
| if (!config.verbose) { | |
| const statusIcon = result.error ? "✗" : "✓"; | |
| const valueStr = result.error ? "ERROR" : result.value.toFixed(4); | |
| const commentStr = result.commentValue !== undefined | |
| ? ` (comment: ${result.commentValue})` | |
| : ""; | |
| const matchStr = result.signMatch !== undefined | |
| ? (result.signMatch ? " ✓" : " ✗") | |
| : ""; | |
| console.log( | |
| `[${progress}%] ${statusIcon} ${filename}: ${valueStr}${commentStr} - ${result.interpretation}${matchStr}` | |
| ); | |
| } | |
| } | |
| const endTime = Date.now(); | |
| const duration = endTime - startTime; | |
| // Calculate statistics | |
| const successfulResults = results.filter(r => !r.error); | |
| const failedResults = results.filter(r => r.error); | |
| const averageValue = successfulResults.length > 0 | |
| ? successfulResults.reduce((sum, r) => sum + r.value, 0) / successfulResults.length | |
| : 0; | |
| // Calculate sign accuracy | |
| const resultsWithComments = successfulResults.filter(r => r.signMatch !== undefined); | |
| const correctPredictions = resultsWithComments.filter(r => r.signMatch === true).length; | |
| const accuracy = resultsWithComments.length > 0 | |
| ? (correctPredictions / resultsWithComments.length) * 100 | |
| : 0; | |
| // Print summary | |
| console.log("=".repeat(80)); | |
| console.log("Evaluation Complete!"); | |
| console.log("=".repeat(80)); | |
| console.log(`Total files: ${results.length}`); | |
| console.log(`Successful: ${successfulResults.length}`); | |
| console.log(`Failed: ${failedResults.length}`); | |
| console.log(`Average value: ${averageValue.toFixed(4)}`); | |
| console.log(`Total time: ${(duration / 1000).toFixed(1)}s`); | |
| console.log(`Average time per file: ${(duration / results.length).toFixed(0)}ms`); | |
| // Sign accuracy statistics | |
| if (resultsWithComments.length > 0) { | |
| console.log(`\nSign Accuracy:`); | |
| console.log(` Files with comments: ${resultsWithComments.length}`); | |
| console.log(` Correct predictions: ${correctPredictions}`); | |
| console.log(` Accuracy: ${accuracy.toFixed(1)}%`); | |
| } | |
| // Value distribution | |
| if (successfulResults.length > 0) { | |
| const blackAdvantage = successfulResults.filter(r => r.value > 0.1).length; | |
| const balanced = successfulResults.filter(r => r.value >= -0.1 && r.value <= 0.1).length; | |
| const whiteAdvantage = successfulResults.filter(r => r.value < -0.1).length; | |
| console.log(`\nValue Distribution:`); | |
| console.log(` Black advantage (>0.1): ${blackAdvantage} (${(blackAdvantage / successfulResults.length * 100).toFixed(1)}%)`); | |
| console.log(` Balanced (-0.1 to 0.1): ${balanced} (${(balanced / successfulResults.length * 100).toFixed(1)}%)`); | |
| console.log(` White advantage (<-0.1): ${whiteAdvantage} (${(whiteAdvantage / successfulResults.length * 100).toFixed(1)}%)`); | |
| } | |
| // Save results to JSON if output path provided | |
| if (config.outputPath) { | |
| const outputData = { | |
| evaluationTime: new Date().toISOString(), | |
| modelPath: config.modelPath, | |
| inputPath: config.inputPath, | |
| totalFiles: results.length, | |
| successful: successfulResults.length, | |
| failed: failedResults.length, | |
| averageValue, | |
| durationMs: duration, | |
| signAccuracy: { | |
| filesWithComments: resultsWithComments.length, | |
| correctPredictions, | |
| accuracy: accuracy.toFixed(1) + "%" | |
| }, | |
| results: results.map(r => ({ | |
| filename: r.filename, | |
| filepath: r.filepath, | |
| value: r.value, | |
| interpretation: r.interpretation, | |
| moveCount: r.moveCount, | |
| boardShape: r.boardShape, | |
| commentValue: r.commentValue, | |
| signMatch: r.signMatch, | |
| error: r.error | |
| })) | |
| }; | |
| fs.writeFileSync(config.outputPath, JSON.stringify(outputData, null, 2), "utf-8"); | |
| console.log(`\n✓ Results saved to: ${config.outputPath}`); | |
| } | |
| console.log("=".repeat(80)); | |
| } | |
| /** | |
| * Main function | |
| */ | |
| async function main() { | |
| try { | |
| const config = parseArgs(); | |
| await evaluateDataset(config); | |
| } catch (error) { | |
| console.error("Error:", error); | |
| process.exit(1); | |
| } | |
| } | |
| // Run main function | |
| main(); | |