import 'https://cdn.jsdelivr.net/npm/@tensorflow/tfjs/dist/tf.min.js'; import 'https://cdn.jsdelivr.net/npm/@tensorflow/tfjs-backend-webgpu/dist/tf-backend-webgpu.js'; import * as LiteRT from 'https://cdn.jsdelivr.net/npm/@litertjs/core@0.2.1/+esm'; import { VectorStore } from './VectorStore.js'; import { CosineSimilarity } from './CosineSimilarity.js'; import { EmbeddingModel } from './EmbeddingModel.js'; import { Tokenizer } from './Tokenizer.js'; import { VisualizeTokens } from './VisualizeTokens.js'; import { VisualizeEmbedding } from './VisualizeEmbedding.js'; /** * VectorSearch - A master orchestrator class for the RAG system. * Coded by Jason Mayes 2026. */ export class VectorSearch { /** * @param {string} modelConfig Config object for VectorSearch setup. */ constructor(modelConfig) { this.modelUrl = modelConfig.url; this.modelRuntime = modelConfig.runtime; this.litertHostedWasmUrl = modelConfig.litertjsWasmUrl ? modelConfig.litertjsWasmUrl : 'https://cdn.jsdelivr.net/npm/@litertjs/core@0.2.1/wasm/'; this.tokenizerId = modelConfig.tokenizer; this.seqLength = modelConfig.sequenceLength; this.vectorStore = new VectorStore(); this.cosineSimilarity = new CosineSimilarity(); this.embeddingModel = new EmbeddingModel(this.modelRuntime); this.tokenizer = new Tokenizer(); this.visualizeTokens = new VisualizeTokens(); this.visualizeEmbedding = new VisualizeEmbedding(); this.allStoredData = undefined; this.lastDBName = ''; } /** * Initializes the embedding model and tokenizer. * @return {Promise} */ async load(STATUS_EL) { if (STATUS_EL) { STATUS_EL.innerText = 'Setting WebGPU Backend for TFJS...'; } await tf.setBackend('webgpu'); if (STATUS_EL) { STATUS_EL.innerText = 'Initializing Model Runtime...'; } if (this.modelRuntime === 'litertjs') { const LITERTJS_WASM_PATH = this.litertHostedWasmUrl; await LiteRT.loadLiteRt(LITERTJS_WASM_PATH); const TF_BACKEND = tf.backend(); LiteRT.setWebGpuDevice(TF_BACKEND.device); } if (STATUS_EL) { STATUS_EL.innerText = 'Loading Tokenizer & Embedding Model...'; } await this.embeddingModel.load(this.modelUrl, this.modelRuntime); if (STATUS_EL) { STATUS_EL.innerText = 'Loading Tokenizer...'; } if (this.modelRuntime === 'litertjs') { await this.tokenizer.load(this.tokenizerId); } } /** * Sets the current database name in the vector store. * @param {string} dbName */ setDb(dbName) { this.vectorStore.setDb(dbName); } /** * Encodes text and generates an embedding. * @param {string} text * @return {Promise<{embedding: Array, tokens: Array}>} */ async getEmbedding(text) { if (this.modelRuntime === 'litertjs') { const tokens = await this.tokenizer.encode(text); const { embedding } = await this.embeddingModel.getEmbeddingLiteRTJS(tokens, this.seqLength); const result = await embedding.array(); embedding.dispose(); return { embedding: result[0], tokens }; } else { // Transformers.js (no tokens returned). const { embedding } = await this.embeddingModel.getEmbeddingTransformers(text); return { embedding: embedding }; } } /** * Renders tokens in the given container. * @param {Array} tokens * @param {HTMLElement} containerEl */ renderTokens(tokens, containerEl) { this.visualizeTokens.render(tokens, containerEl, this.seqLength); } /** * Renders an embedding visualization. * @param {Array} data * @param {HTMLElement} vizEl * @param {HTMLElement} textEl */ async renderEmbedding(data, vizEl, textEl) { await this.visualizeEmbedding.render(data, vizEl, textEl); } /** * Deletes the GPU vector cache. */ async deleteGPUVectorCache() { await this.cosineSimilarity.deleteGPUVectorCache(); } /** * Performs a vector search. * @param {Array} queryVector * @param {number} threshold * @param {string} selectedDB * @param {number} maxMatches * @return {Promise<{results: Array, bestScore: number, bestIndex: number}>} */ async search(queryVector, threshold, selectedDB, maxMatches = 5) { let matrixData = undefined; if (this.lastDBName !== selectedDB) { await this.deleteGPUVectorCache(); this.lastDBName = selectedDB; this.allStoredData = await this.vectorStore.getAllVectors(); matrixData = this.allStoredData.map(item => item.embedding); } else { matrixData = this.allStoredData.map(item => item.embedding); } if (matrixData.length === 0) { console.warn('No data in chosen vector store. Store some data first before searching'); return { results: [], bestScore: 0, bestIndex: 0 }; } const { values, indices } = await this.cosineSimilarity.cosineSimilarityTFJSGPUMatrix(matrixData, queryVector, maxMatches); let topMatches = []; let bestIndex = 0; let bestScore = 0; for (let i = 0; i < values.length; i++) { if (values[i] >= threshold) { if (topMatches.length < maxMatches) { topMatches.push({ id: this.allStoredData[indices[i]].id, score: values[i], vector: this.allStoredData[indices[i]].embedding }); if (values[i] > bestScore) { bestIndex = topMatches.length - 1; bestScore = values[i]; } } } } const results = []; for (const match of topMatches) { const text = await this.vectorStore.getTextByID(match.id); results.push({ ...match, text }); } return { results, bestScore, bestIndex }; } /** * Stores multiple items in the vector store. * @param {Array<{embedding: Array, text: string}>} storagePayload */ async storeBatch(storagePayload) { await this.vectorStore.storeBatch(storagePayload); } /** * Embeds and stores multiple texts. * @param {Array} texts * @param {string} dbName * @param {Function} progressCallback */ async storeTexts(texts, dbName, statusElement, batchSize = 2) { this.setDb(dbName); let textBatch = []; let tensorBatch = []; for (let i = 0; i < texts.length; i++) { if (statusElement) { statusElement.innerText = `Embedding paragraph ${i + 1} of ${texts.length}...`; } // TODO: Update batching for LiteRT - currently batches DB sends not model inference batching. if (this.modelRuntime === 'litertjs') { const tokens = await this.tokenizer.encode(texts[i]); const { embedding } = await this.embeddingModel.getEmbeddingLiteRTJS(tokens, this.seqLength); tensorBatch.push(embedding); textBatch.push(texts[i]); if (tensorBatch.length >= batchSize || i === texts.length - 1) { const stackedTensors = tf.stack(tensorBatch); const allVectors = await stackedTensors.array(); const storagePayload = allVectors.map((vector, index) => ({ embedding: vector[0], text: textBatch[index] })); await this.vectorStore.storeBatch(storagePayload); tensorBatch.forEach(t => t.dispose()); stackedTensors.dispose(); tensorBatch = []; textBatch = []; } } else { // Using Transformers.js model. textBatch.push(texts[i]); // Parallelize embedding of texts as such tiny model. if (textBatch.length >= batchSize || i === texts.length - 1) { // Returns huge 1D array of embeddings. const { embedding: EMBEDDINGS } = await this.embeddingModel.getEmbeddingTransformers(textBatch); const EMBEDDING_LENGTH = EMBEDDINGS.length / batchSize; for (let e = 0; e < (EMBEDDINGS.length / EMBEDDING_LENGTH); e++) { const storagePayload = { embedding: EMBEDDINGS.slice(e * EMBEDDING_LENGTH, (e + 1) * EMBEDDING_LENGTH), text: textBatch[e] }; await this.vectorStore.storeBatch([storagePayload]); } textBatch = []; } } } await this.deleteGPUVectorCache(); } }