quickgrid commited on
Commit
c2e2021
·
verified ·
1 Parent(s): 8a5dba2

Update index.html

Browse files
Files changed (1) hide show
  1. index.html +23 -2
index.html CHANGED
@@ -451,10 +451,10 @@ async function loadModel(type, modelId) {
451
  device: 'webgpu'
452
  });
453
  } else if (type === 'embedder') {
454
- STATE.models.embedder = await tf.pipeline('feature-extraction', modelId, { progress_callback: handleProgress });
455
  } else if (type === 'reranker') {
456
  try {
457
- STATE.models.reranker = await tf.pipeline('text-classification', modelId, { progress_callback: handleProgress });
458
  } catch {
459
  STATE.models.reranker = 'heuristic';
460
  toast('Reranker model unavailable, using heuristic fallback', 'info');
@@ -474,6 +474,7 @@ async function loadModel(type, modelId) {
474
 
475
  async function loadAllModels() {
476
  await loadModel('embedder', STATE.modelIds.embedder);
 
477
  await loadModel('llm', STATE.modelIds.llm);
478
  await loadModel('reranker', STATE.modelIds.reranker);
479
  document.getElementById('sendBtn').disabled = !STATE.models.llm;
@@ -1173,6 +1174,26 @@ function initEvents() {
1173
  });
1174
  }
1175
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1176
  // =============================================
1177
  // Init
1178
  // =============================================
 
451
  device: 'webgpu'
452
  });
453
  } else if (type === 'embedder') {
454
+ STATE.models.embedder = await tf.pipeline('feature-extraction', modelId, { progress_callback: handleProgress, device: 'webgpu' });
455
  } else if (type === 'reranker') {
456
  try {
457
+ STATE.models.reranker = await tf.pipeline('text-classification', modelId, { progress_callback: handleProgress, device: 'webgpu' });
458
  } catch {
459
  STATE.models.reranker = 'heuristic';
460
  toast('Reranker model unavailable, using heuristic fallback', 'info');
 
474
 
475
  async function loadAllModels() {
476
  await loadModel('embedder', STATE.modelIds.embedder);
477
+ await seedVectorDB(); // ADD THIS LINE
478
  await loadModel('llm', STATE.modelIds.llm);
479
  await loadModel('reranker', STATE.modelIds.reranker);
480
  document.getElementById('sendBtn').disabled = !STATE.models.llm;
 
1174
  });
1175
  }
1176
 
1177
+ async function seedVectorDB() {
1178
+ const seeds = [
1179
+ { text: "Transformers.js is a library that enables running machine learning models directly in the browser using ONNX Runtime. It supports text generation, image classification, audio processing, and more without requiring a server.", metadata: { source: "docs", topic: "transformers.js" } },
1180
+ { text: "Vector databases store data as high-dimensional numerical vectors. They enable fast similarity search by comparing vector distances using metrics like cosine similarity, euclidean distance, or dot product.", metadata: { source: "docs", topic: "vector-db" } },
1181
+ { text: "RAG (Retrieval-Augmented Generation) combines information retrieval with language model generation. It first searches a knowledge base for relevant passages, then feeds those passages as context to an LLM to produce grounded answers.", metadata: { source: "docs", topic: "rag" } },
1182
+ { text: "LanceDB is an open-source vector database designed for fast similarity search. It uses the Lance columnar format for efficient storage and retrieval of vector embeddings, supporting ANN indexes and full-text search.", metadata: { source: "docs", topic: "lancedb" } },
1183
+ { text: "Embedding models convert text into fixed-size numerical vectors that capture semantic meaning. Popular models include all-MiniLM-L6-v2 (384 dimensions), BGE embeddings, and E5 models. Higher dimensions generally capture more nuance.", metadata: { source: "docs", topic: "embeddings" } },
1184
+ { text: "Reranking improves search quality by re-scoring initially retrieved results with a more expensive cross-encoder model. Cross-encoders jointly process the query and each document, producing more accurate relevance scores than bi-encoder similarity.", metadata: { source: "docs", topic: "reranking" } },
1185
+ { text: "Quantization reduces model size by lowering numerical precision. Q4 uses 4-bit integers, Q4F16 uses 4-bit weights with 16-bit activations for WebGPU. Q4 runs on CPU/WASM, Q4F16 requires WebGPU due to specialized GPU-only operators.", metadata: { source: "docs", topic: "quantization" } },
1186
+ { text: "WebGPU is a modern browser API for general-purpose GPU computation. It replaces WebGL for compute workloads and is required for running quantized models like Q4F16 in transformers.js. Chrome 113+ supports WebGPU natively.", metadata: { source: "docs", topic: "webgpu" } }
1187
+ ];
1188
+ for (const s of seeds) {
1189
+ const vector = await embedText(s.text);
1190
+ const entry = { id: STATE.nextId++, text: s.text, metadata: s.metadata, vector, date: new Date().toISOString() };
1191
+ STATE.entries.push(entry);
1192
+ vTable.add([{ ...entry }]);
1193
+ }
1194
+ renderTable();
1195
+ }
1196
+
1197
  // =============================================
1198
  // Init
1199
  // =============================================