gemma-rag-webgpu / VectorSearch-min.js
huggingworld's picture
Upload 78 files
e3aec01 verified
/*********************************************************************
* Client side vector search using EmbeddingGemma or all-Mini-L6-v2,
* Accelerated with WebGPU via LiteRT.js or Transformers.js.
* Tokenizer via Transformers.js, and pre/post processing via
* TensorFlow.js - all accelerated via WebGPU for speed.
* Version: 1.1.2
* Coded by Jason Mayes 2026.
*--------------------------------------------------------------------
* Connect with me on social if aquestions or comments:
*
* LinkedIn: https://www.linkedin.com/in/webai/
* Twitter / X: https://x.com/jason_mayes
* Github: https://github.com/jasonmayes
* CodePen: https://codepen.io/jasonmayes
*********************************************************************/
import"https://cdn.jsdelivr.net/npm/@tensorflow/tfjs/dist/tf.min.js";import"https://cdn.jsdelivr.net/npm/@tensorflow/tfjs-backend-webgpu/dist/tf-backend-webgpu.js";import*as T from"https://cdn.jsdelivr.net/npm/@litertjs/core@0.2.1/+esm";var f=class{constructor(e="unnamed"){this.dbName=e,this.db=null}setDb(e){this.dbName!==e&&(this.dbName=e,this.db&&(this.db.close(),this.db=null))}async initDb(){return this.db?this.db:new Promise((e,s)=>{let i=indexedDB.open(this.dbName,1);i.onupgradeneeded=n=>{let t=n.target.result;t.objectStoreNames.contains("embeddings")||t.createObjectStore("embeddings",{keyPath:"id",autoIncrement:!0})},i.onsuccess=n=>{this.db=n.target.result,e(this.db)},i.onerror=n=>s(n.target.error)})}async storeBatch(e){let i=(await this.initDb()).transaction(["embeddings"],"readwrite"),n=i.objectStore("embeddings");return new Promise((t,o)=>{i.oncomplete=()=>t(),i.onerror=r=>o(r.target.error);for(let r of e)n.add({text:r.text,embedding:r.embedding})})}async getAllVectors(){let e=await this.initDb();return new Promise((s,i)=>{let t=e.transaction(["embeddings"],"readonly").objectStore("embeddings"),o=[],r=t.openCursor();r.onsuccess=a=>{let c=a.target.result;c?(o.push({id:c.value.id,embedding:c.value.embedding}),c.continue()):s(o)},r.onerror=a=>i(a.target.error)})}async getTextByID(e){let s=await this.initDb();return new Promise((i,n)=>{let r=s.transaction(["embeddings"],"readonly").objectStore("embeddings").get(e);r.onsuccess=()=>{i(r.result?r.result.text:null)},r.onerror=a=>n(a.target.error)})}};var b=class{constructor(){this.cachedMatrix=void 0}async deleteGPUVectorCache(){this.cachedMatrix&&(this.cachedMatrix.dispose(),this.cachedMatrix=void 0)}async calculateCosineSimilarity(e,s){return tf.tidy(()=>{let i=e.squeeze(),n=s.squeeze(),t=tf.dot(i,n),o=tf.norm(i),r=tf.norm(n);return t.div(o.mul(r)).dataSync()[0]})}async cosineSimilarityTFJSGPUMatrix(e,s,i){this.cachedMatrix||(console.log("Rebuilding GPU VectorDB Matrix"),this.cachedMatrix=tf.tensor2d(e));let n=tf.tidy(()=>{let a=tf.tensor1d(s),c=1e-9,l=this.cachedMatrix.norm(2,1,!0),m=this.cachedMatrix.div(l.add(c)),d=a.norm(2),h=a.div(d.add(c)),g;try{g=tf.matMul(m,h.reshape([-1,1]))}catch(R){console.error("VectorDB you are trying to use was encoded using embedding model that generated different number of dimensions. Please re-encode DB or use correct Embedding Model",R)}return g.squeeze()}),t=tf.topk(n,i,!1),o=await t.values.data(),r=await t.indices.data();return{values:o,indices:r}}};import*as D from"https://cdn.jsdelivr.net/npm/@litertjs/core@0.2.1/+esm";import*as M from"https://cdn.jsdelivr.net/npm/@litertjs/tfjs-interop@1.0.1/+esm";import{pipeline as k}from"https://cdn.jsdelivr.net/npm/@xenova/transformers@2.17.2";var p=class{constructor(e){this.model=void 0,this.runtime=e}async load(e){this.runtime==="litertjs"?this.model=await D.loadAndCompile(e,{accelerator:"webgpu"}):this.model=await k("feature-extraction",e)}async getEmbeddingLiteRTJS(e,s){if(!this.model)throw new Error("Model not loaded. Call load() first.");if(this.runtime==="litertjs"){let i=tf.tensor1d(e,"int32");e.length<s?i=i.pad([[0,s-e.length]]):e.length>s&&(i=i.slice([0],[s]));let n=i.expandDims(0),t=M.runWithTfjsTensors(this.model,n);return i.dispose(),n.dispose(),{embedding:t[0],tokens:e}}}async getEmbeddingTransformers(e){if(this.runtime==="transformersjs"){let s=await this.model(e,{pooling:"mean",normalize:!0});return{embedding:Array.from(s.data)}}}};import{AutoTokenizer as N}from"https://cdn.jsdelivr.net/npm/@huggingface/transformers@3.3.0";var w=class{constructor(){this.tokenizer=void 0}async load(e){this.tokenizer=await N.from_pretrained(e)}async encode(e){if(!this.tokenizer)throw new Error("Tokenizer not loaded. Call load() first.");return await this.tokenizer.encode(e)}};var y=class{render(e,s,i){s.innerHTML="",e.forEach(t=>{let o=document.createElement("div");o.className="token-chip",o.innerText=t,s.appendChild(o)});let n=Math.min(20,i-e.length);for(let t=0;t<n;t++){let o=document.createElement("div");o.className="token-chip padded",o.innerText="0",s.appendChild(o)}if(e.length<i){let t=document.createElement("div");t.className="mini-subtitle",t.innerText=`... and ${i-e.length-n} more padding tokens`,s.appendChild(t)}}};var x=class{async render(e,s,i){let n=e instanceof tf.Tensor?await e.data():e;s.innerHTML="";for(let t=0;t<n.length;t++){let o=n[t],r=document.createElement("div");r.className="viz-cell";let a=Math.max(0,Math.min(255,(o+.1)*1e3+128));r.style.backgroundColor=`rgb(${a}, ${a/2}, 255)`,r.title=`Dim ${t}: ${o.toFixed(4)}`,s.appendChild(r)}i.innerText="Full Vector: ["+Array.from(n).map(t=>t.toFixed(4)).join(", ")+"]"}};var v=class{constructor(e){this.modelUrl=e.url,this.modelRuntime=e.runtime,this.litertHostedWasmUrl=e.litertjsWasmUrl?e.litertjsWasmUrl:"https://cdn.jsdelivr.net/npm/@litertjs/core@0.2.1/wasm/",this.tokenizerId=e.tokenizer,this.seqLength=e.sequenceLength,this.vectorStore=new f,this.cosineSimilarity=new b,this.embeddingModel=new p(this.modelRuntime),this.tokenizer=new w,this.visualizeTokens=new y,this.visualizeEmbedding=new x,this.allStoredData=void 0,this.lastDBName=""}async load(e){if(e&&(e.innerText="Setting WebGPU Backend for TFJS..."),await tf.setBackend("webgpu"),e&&(e.innerText="Initializing Model Runtime..."),this.modelRuntime==="litertjs"){let s=this.litertHostedWasmUrl;await T.loadLiteRt(s);let i=tf.backend();T.setWebGpuDevice(i.device)}e&&(e.innerText="Loading Tokenizer & Embedding Model..."),await this.embeddingModel.load(this.modelUrl,this.modelRuntime),e&&(e.innerText="Loading Tokenizer..."),this.modelRuntime==="litertjs"&&await this.tokenizer.load(this.tokenizerId)}setDb(e){this.vectorStore.setDb(e)}async getEmbedding(e){if(this.modelRuntime==="litertjs"){let s=await this.tokenizer.encode(e),{embedding:i}=await this.embeddingModel.getEmbeddingLiteRTJS(s,this.seqLength),n=await i.array();return i.dispose(),{embedding:n[0],tokens:s}}else{let{embedding:s}=await this.embeddingModel.getEmbeddingTransformers(e);return{embedding:s}}}renderTokens(e,s){this.visualizeTokens.render(e,s,this.seqLength)}async renderEmbedding(e,s,i){await this.visualizeEmbedding.render(e,s,i)}async deleteGPUVectorCache(){await this.cosineSimilarity.deleteGPUVectorCache()}async search(e,s,i,n=5){let t;if(this.lastDBName!==i?(await this.deleteGPUVectorCache(),this.lastDBName=i,this.allStoredData=await this.vectorStore.getAllVectors(),t=this.allStoredData.map(d=>d.embedding)):t=this.allStoredData.map(d=>d.embedding),t.length===0)return console.warn("No data in chosen vector store. Store some data first before searching"),{results:[],bestScore:0,bestIndex:0};let{values:o,indices:r}=await this.cosineSimilarity.cosineSimilarityTFJSGPUMatrix(t,e,n),a=[],c=0,l=0;for(let d=0;d<o.length;d++)o[d]>=s&&a.length<n&&(a.push({id:this.allStoredData[r[d]].id,score:o[d],vector:this.allStoredData[r[d]].embedding}),o[d]>l&&(c=a.length-1,l=o[d]));let m=[];for(let d of a){let h=await this.vectorStore.getTextByID(d.id);m.push({...d,text:h})}return{results:m,bestScore:l,bestIndex:c}}async storeBatch(e){await this.vectorStore.storeBatch(e)}async storeTexts(e,s,i,n=2){this.setDb(s);let t=[],o=[];for(let r=0;r<e.length;r++)if(i&&(i.innerText=`Embedding paragraph ${r+1} of ${e.length}...`),this.modelRuntime==="litertjs"){let a=await this.tokenizer.encode(e[r]),{embedding:c}=await this.embeddingModel.getEmbeddingLiteRTJS(a,this.seqLength);if(o.push(c),t.push(e[r]),o.length>=n||r===e.length-1){let l=tf.stack(o),d=(await l.array()).map((h,g)=>({embedding:h[0],text:t[g]}));await this.vectorStore.storeBatch(d),o.forEach(h=>h.dispose()),l.dispose(),o=[],t=[]}}else if(t.push(e[r]),t.length>=n||r===e.length-1){let{embedding:a}=await this.embeddingModel.getEmbeddingTransformers(t),c=a.length/n;for(let l=0;l<a.length/c;l++){let m={embedding:a.slice(l*c,(l+1)*c),text:t[l]};await this.vectorStore.storeBatch([m])}t=[]}await this.deleteGPUVectorCache()}};export{v as VectorSearch};