Spaces:
Sleeping
Sleeping
| ; | |
| /** | |
| * ONNX Model Inferencer (Frontend/Backend Common) | |
| * | |
| * Platform-agnostic inference logic that accepts ONNX session from platform-specific code. | |
| * No direct dependency on onnxruntime packages - uses dependency injection pattern. | |
| * | |
| * Adapted from Node.js test_inference.js for cross-platform use | |
| * Provides causal language model inference using GPT-2 ONNX model | |
| * | |
| * Vocabulary Design (128 tokens): | |
| * 0-3: Special tokens (PAD=0, START=1, END=2, VALUE=3) | |
| * 4-7: Reserved for future use | |
| * 10: LF (newline) for multi-line game records | |
| * 32-127: ASCII printable characters (direct identity mapping) | |
| * | |
| * This design uses direct identity mapping: token_id = ascii_value | |
| * No complex formulas needed - simple and efficient. | |
| */ | |
| Object.defineProperty(exports, "__esModule", { value: true }); | |
| exports.ModelInferencer = void 0; | |
| /** | |
| * Model Inferencer for Causal Language Model | |
| * Compatible with both frontend (onnxruntime-web) and backend (onnxruntime-node) | |
| */ | |
| class ModelInferencer { | |
| constructor(TensorClass, config = {}) { | |
| this.session = null; | |
| // TGN tokenizer: Compact 128-token vocabulary with direct ASCII mapping | |
| // 0-3: Special tokens (PAD, START, END, VALUE) | |
| // 4-7: Reserved for future use | |
| // 10: Newline (LF) | |
| // 32-127: ASCII printable characters (direct identity mapping) | |
| this.PAD_TOKEN = 0; | |
| this.START_TOKEN = 1; | |
| this.END_TOKEN = 2; | |
| this.VALUE_TOKEN = 3; | |
| this.TensorClass = TensorClass; | |
| this.config = { | |
| vocabSize: config.vocabSize || 128, // Allow override via config | |
| seqLen: 256, | |
| ...config | |
| }; | |
| } | |
| /** | |
| * Set the inference session (created by platform-specific code) | |
| */ | |
| setSession(session) { | |
| this.session = session; | |
| console.log("[ModelInferencer] ✓ Session set successfully"); | |
| this.printModelInfo(); | |
| } | |
| /** | |
| * Run basic inference test | |
| */ | |
| async testBasicInference() { | |
| if (!this.session) { | |
| throw new Error("Inferencer not initialized. Call setSession() first."); | |
| } | |
| console.log("[ModelInferencer] Running basic inference test..."); | |
| const batchSize = 1; | |
| const seqLen = this.config.seqLen; | |
| // Create random input | |
| const inputIds = this.createRandomInput(batchSize, seqLen); | |
| const inputTensor = new this.TensorClass("int64", inputIds, [batchSize, seqLen]); | |
| // Run inference | |
| const startTime = performance.now(); | |
| const results = await this.session.run({ input_ids: inputTensor }); | |
| const inferenceTime = performance.now() - startTime; | |
| // Get logits | |
| const logits = results.logits; | |
| // Validate output | |
| this.validateOutput(logits, batchSize, seqLen); | |
| // Get predictions | |
| const predictions = this.getPredictions(logits.data, batchSize * seqLen); | |
| // Convert tokens to text | |
| const text = String.fromCharCode(...predictions.slice(0, 100)); | |
| console.log("[ModelInferencer] Inference completed:"); | |
| console.log(` Input shape: [${inputTensor.dims.join(", ")}]`); | |
| console.log(` Output shape: [${logits.dims.join(", ")}]`); | |
| console.log(` Output dtype: ${logits.type}`); | |
| console.log(` Inference time: ${inferenceTime.toFixed(2)}ms`); | |
| console.log(` Sample predictions: [${predictions.slice(0, 10).join(", ")}]`); | |
| const logitsArray = Array.from(logits.data); | |
| console.log(` Logits range: [${Math.min(...logitsArray).toFixed(3)}, ${Math.max(...logitsArray).toFixed(3)}]`); | |
| return { | |
| tokens: predictions, | |
| text, | |
| logits: logits.data, | |
| inferenceTime | |
| }; | |
| } | |
| /** | |
| * Generate tokens autoregressively from a prompt | |
| */ | |
| async generateText(prompt, numTokens = 10) { | |
| if (!this.session) { | |
| throw new Error("Inferencer not initialized. Call setSession() first."); | |
| } | |
| console.log(`[ModelInferencer] Generating ${numTokens} tokens from prompt: "${prompt}"`); | |
| // Convert prompt to token IDs (byte values) | |
| const promptTokens = Array.from(prompt).map((c) => c.charCodeAt(0)); | |
| console.log(` Prompt tokens (${promptTokens.length}): [${promptTokens.join(", ")}]`); | |
| // Start with prompt tokens | |
| const sequence = [...promptTokens]; | |
| const times = []; | |
| // Generate tokens | |
| for (let i = 0; i < numTokens; i++) { | |
| // Pad sequence to fixed length | |
| const paddedSequence = this.padSequence(sequence, this.config.seqLen); | |
| // Create input tensor | |
| const inputIds = new BigInt64Array(paddedSequence.map((t) => BigInt(t))); | |
| const inputTensor = new this.TensorClass("int64", inputIds, [1, this.config.seqLen]); | |
| // Run inference | |
| const startTime = performance.now(); | |
| const results = await this.session.run({ input_ids: inputTensor }); | |
| times.push(performance.now() - startTime); | |
| // Get prediction at the last non-padded position | |
| const logits = results.logits.data; | |
| const lastPos = sequence.length - 1; // Position before padding | |
| const offset = lastPos * this.config.vocabSize; | |
| // Find token with highest logit | |
| let maxIdx = 0; | |
| let maxVal = logits[offset]; | |
| for (let j = 1; j < this.config.vocabSize; j++) { | |
| if (logits[offset + j] > maxVal) { | |
| maxVal = logits[offset + j]; | |
| maxIdx = j; | |
| } | |
| } | |
| sequence.push(maxIdx); | |
| // Stop if END token is generated | |
| if (maxIdx === this.END_TOKEN) { | |
| console.log(" Generated END token, stopping..."); | |
| break; | |
| } | |
| } | |
| // Convert generated tokens to text | |
| const generatedText = String.fromCharCode(...sequence); | |
| const avgTime = times.reduce((a, b) => a + b, 0) / times.length; | |
| console.log(`[ModelInferencer] Generation complete:`); | |
| console.log(` Generated text: "${generatedText}"`); | |
| console.log(` Token sequence (${sequence.length}): [${sequence.join(", ")}]`); | |
| console.log(` Avg inference time: ${avgTime.toFixed(2)}ms`); | |
| console.log(` Tokens/sec: ${(1000 / avgTime).toFixed(2)}`); | |
| return { | |
| tokens: sequence, | |
| text: generatedText, | |
| logits: new Float32Array(), // Not returning full logits for generation | |
| inferenceTime: avgTime | |
| }; | |
| } | |
| /** | |
| * Get model information | |
| */ | |
| getModelInfo() { | |
| if (!this.session) | |
| return null; | |
| return { | |
| inputs: [...this.session.inputNames], | |
| outputs: [...this.session.outputNames] | |
| }; | |
| } | |
| /** | |
| * Get configuration | |
| */ | |
| getConfig() { | |
| return this.config; | |
| } | |
| /** | |
| * Run inference with token array input | |
| * Returns raw logits as Float32Array | |
| */ | |
| async runInference(tokens) { | |
| if (!this.session) { | |
| throw new Error("Inferencer not initialized. Call setSession() first."); | |
| } | |
| const seqLen = this.config.seqLen; | |
| // Prepend START_TOKEN to input | |
| const tokensWithStart = [this.START_TOKEN, ...tokens]; | |
| // Pad to fixed length | |
| const paddedTokens = new BigInt64Array(seqLen); | |
| for (let i = 0; i < seqLen; i++) { | |
| paddedTokens[i] = | |
| i < tokensWithStart.length ? BigInt(tokensWithStart[i]) : BigInt(this.PAD_TOKEN); | |
| } | |
| // Create input tensor | |
| const inputTensor = new this.TensorClass("int64", paddedTokens, [1, seqLen]); | |
| // Run inference | |
| const results = await this.session.run({ input_ids: inputTensor }); | |
| return results.logits.data; | |
| } | |
| /** | |
| * Run tree attention inference (evaluation mode) | |
| * For models exported with --evaluation flag | |
| * @param inputs - Prefix, evaluated tokens, and attention mask | |
| * @returns Logits for each evaluated position | |
| */ | |
| async runEvaluationInference(inputs) { | |
| if (!this.session) { | |
| throw new Error("Inferencer not initialized. Call setSession() first."); | |
| } | |
| const { prefixIds, evaluatedIds, evaluatedMask } = inputs; | |
| const batchSize = 1; | |
| const prefixLen = prefixIds.length; | |
| const m = evaluatedIds.length; | |
| // Convert to BigInt64Array for ONNX int64 tensors | |
| const prefixIdsArray = new BigInt64Array(batchSize * prefixLen); | |
| for (let i = 0; i < prefixLen; i++) { | |
| prefixIdsArray[i] = BigInt(prefixIds[i]); | |
| } | |
| const evaluatedIdsArray = new BigInt64Array(batchSize * m); | |
| for (let i = 0; i < m; i++) { | |
| evaluatedIdsArray[i] = BigInt(evaluatedIds[i]); | |
| } | |
| // Mask is Float32Array | |
| const maskArray = new Float32Array(m * m); | |
| for (let i = 0; i < m * m; i++) { | |
| maskArray[i] = evaluatedMask[i]; | |
| } | |
| // Create ONNX tensors | |
| const prefixIdsTensor = new this.TensorClass("int64", prefixIdsArray, [ | |
| batchSize, | |
| prefixLen | |
| ]); | |
| const evaluatedIdsTensor = new this.TensorClass("int64", evaluatedIdsArray, [batchSize, m]); | |
| const evaluatedMaskTensor = new this.TensorClass("float32", maskArray, [1, m, m]); | |
| // Run inference | |
| const results = await this.session.run({ | |
| prefix_ids: prefixIdsTensor, | |
| evaluated_ids: evaluatedIdsTensor, | |
| evaluated_mask: evaluatedMaskTensor | |
| }); | |
| // Extract logits | |
| const logits = results.logits.data; | |
| // Output shape: [batch, m+1, vocab_size] | |
| // We return flattened array and num_evaluated for reshaping | |
| return { | |
| logits, | |
| numEvaluated: m | |
| }; | |
| } | |
| /** | |
| * Run value prediction inference (for evaluation mode models) | |
| * For models exported with --evaluation-mode flag | |
| * @param tokens - Token IDs (already includes START/END tokens and padding) | |
| * @returns Predicted game outcome value in range [-1, 1] | |
| */ | |
| async runValuePrediction(tokens) { | |
| if (!this.session) { | |
| throw new Error("Inferencer not initialized. Call setSession() first."); | |
| } | |
| const seqLen = tokens.length; | |
| // Convert to BigInt64Array for ONNX int64 tensors | |
| const inputIds = new BigInt64Array(seqLen); | |
| for (let i = 0; i < seqLen; i++) { | |
| inputIds[i] = BigInt(tokens[i]); | |
| } | |
| // Create input tensor [1, seq_len] | |
| const inputTensor = new this.TensorClass("int64", inputIds, [1, seqLen]); | |
| // Run inference | |
| const results = await this.session.run({ | |
| input_ids: inputTensor | |
| }); | |
| // Extract value | |
| // Output shape: [batch_size] = [1] | |
| // For evaluation models, output name is "values" not "logits" | |
| const values = results.values; | |
| if (!values) { | |
| throw new Error("Evaluation model did not return 'values' output. Check model export."); | |
| } | |
| const predictedValue = values.data[0]; | |
| return predictedValue; | |
| } | |
| /** | |
| * Compute softmax for a single position's logits | |
| * @param logits - Full logits array | |
| * @param position - Which evaluated position (0 = last prefix, 1-m = evaluated tokens) | |
| * @returns Probability distribution over vocabulary | |
| */ | |
| softmax(logits, position) { | |
| const vocabSize = this.config.vocabSize; | |
| const offset = position * vocabSize; | |
| const probs = new Float32Array(vocabSize); | |
| // Find max for numerical stability | |
| let maxLogit = -Infinity; | |
| for (let i = 0; i < vocabSize; i++) { | |
| maxLogit = Math.max(maxLogit, logits[offset + i]); | |
| } | |
| // Compute exp and sum | |
| let sumExp = 0; | |
| for (let i = 0; i < vocabSize; i++) { | |
| probs[i] = Math.exp(logits[offset + i] - maxLogit); | |
| sumExp += probs[i]; | |
| } | |
| // Normalize | |
| for (let i = 0; i < vocabSize; i++) { | |
| probs[i] /= sumExp; | |
| } | |
| return probs; | |
| } | |
| /** | |
| * Check if inferencer is ready | |
| */ | |
| isReady() { | |
| return this.session !== null; | |
| } | |
| /** | |
| * Destroy the session and free resources | |
| */ | |
| destroy() { | |
| this.session = null; | |
| console.log("[ModelInferencer] Session destroyed"); | |
| } | |
| // Private helper methods | |
| printModelInfo() { | |
| if (!this.session) | |
| return; | |
| console.log("[ModelInferencer] Model Information:"); | |
| console.log(" Inputs:"); | |
| this.session.inputNames.forEach((name, i) => { | |
| console.log(` [${i}] ${name}`); | |
| }); | |
| console.log(" Outputs:"); | |
| this.session.outputNames.forEach((name, i) => { | |
| console.log(` [${i}] ${name}`); | |
| }); | |
| } | |
| createRandomInput(batchSize, seqLen) { | |
| const size = batchSize * seqLen; | |
| const data = new BigInt64Array(size); | |
| for (let i = 0; i < size; i++) { | |
| data[i] = BigInt(Math.floor(Math.random() * this.config.vocabSize)); | |
| } | |
| return data; | |
| } | |
| padSequence(tokens, targetLen) { | |
| const padded = [...tokens]; | |
| while (padded.length < targetLen) { | |
| padded.push(this.PAD_TOKEN); | |
| } | |
| return padded.slice(0, targetLen); // Truncate if too long | |
| } | |
| validateOutput(logits, batchSize, seqLen) { | |
| const expectedShape = [batchSize, seqLen, this.config.vocabSize]; | |
| if (logits.dims.length !== 3) { | |
| throw new Error(`Expected 3D output, got ${logits.dims.length}D`); | |
| } | |
| if (logits.dims[0] !== expectedShape[0] || | |
| logits.dims[1] !== expectedShape[1] || | |
| logits.dims[2] !== expectedShape[2]) { | |
| throw new Error(`Shape mismatch! Expected [${expectedShape.join(", ")}], ` + | |
| `got [${logits.dims.join(", ")}]`); | |
| } | |
| if (logits.type !== "float32") { | |
| throw new Error(`Expected float32 output, got ${logits.type}`); | |
| } | |
| } | |
| getPredictions(logitsData, numPositions) { | |
| const predictions = []; | |
| for (let i = 0; i < numPositions; i++) { | |
| let maxIdx = 0; | |
| let maxVal = logitsData[i * this.config.vocabSize]; | |
| for (let j = 1; j < this.config.vocabSize; j++) { | |
| const val = logitsData[i * this.config.vocabSize + j]; | |
| if (val > maxVal) { | |
| maxVal = val; | |
| maxIdx = j; | |
| } | |
| } | |
| predictions.push(maxIdx); | |
| } | |
| return predictions; | |
| } | |
| } | |
| exports.ModelInferencer = ModelInferencer; | |
| //# sourceMappingURL=modelInferencer.js.map |