/** * A class to handle cosine similarity calculations using Google's TensorFlow.js. * Coded by Jason Mayes 2026. */ export class CosineSimilarity { constructor() { this.cachedMatrix = undefined; } /** * Resets the cached GPU matrix. */ async deleteGPUVectorCache() { if (this.cachedMatrix) { this.cachedMatrix.dispose(); this.cachedMatrix = undefined; } } /** * Calculates cosine similarity between two 1D tensors. * @param {tf.Tensor} emb1 First embedding vector. * @param {tf.Tensor} emb2 Second embedding vector. * @return {Promise} Cosine similarity score. */ async calculateCosineSimilarity(emb1, emb2) { // Cosine Similarity = (A . B) / (||A|| * ||B||) return tf.tidy(() => { // Squeeze for 1D vectors const V1 = emb1.squeeze(); const V2 = emb2.squeeze(); const DOT_PRODUCT = tf.dot(V1, V2); const NORM1 = tf.norm(V1); const NORM2 = tf.norm(V2); return DOT_PRODUCT.div(NORM1.mul(NORM2)).dataSync()[0]; }); } /** * Performs matrix-based cosine similarity search on the GPU. * @param {Array>} matrixData Array of embedding vectors. * @param {Array} vectorData The query embedding vector. * @param {number} topK The number of top results to return. * @return {Promise<{values: Float32Array, indices: Int32Array}>} Top K similarity scores and their indices. */ async cosineSimilarityTFJSGPUMatrix(matrixData, vectorData, topK) { // 1. Convert to Tensors if (!this.cachedMatrix) { console.log('Rebuilding GPU VectorDB Matrix'); this.cachedMatrix = tf.tensor2d(matrixData); // Shape: [N, D] } let results = tf.tidy(() => { const vector = tf.tensor1d(vectorData); // Shape: [D] const epsilon = 1e-9; // 2. Normalize the Matrix (Row-wise) // l2 norm along the horizontal axis (1) const matrixNorms = this.cachedMatrix.norm(2, 1, true); const normalizedMatrix = this.cachedMatrix.div(matrixNorms.add(epsilon)); // 3. Normalize the Vector const vectorNorm = vector.norm(2); const normalizedVector = vector.div(vectorNorm.add(epsilon)); // 4. Compute Dot Product // We can use matMul by reshaping vector to [D, 1] // [N, D] * [D, 1] -> [N, 1] let dotProduct = undefined; try { dotProduct = tf.matMul( normalizedMatrix, normalizedVector.reshape([-1, 1]) ); } catch (error) { console.error('VectorDB you are trying to use was encoded using embedding model that generated different number of dimensions. Please re-encode DB or use correct Embedding Model', error); } return dotProduct.squeeze(); }); const topKResults = tf.topk(results, topK, false); const topValues = await topKResults.values.data(); const topIndices = await topKResults.indices.data(); return {values: topValues, indices: topIndices}; } }