Spaces:
Running
Running
Update index.html
Browse files- index.html +23 -2
index.html
CHANGED
|
@@ -451,10 +451,10 @@ async function loadModel(type, modelId) {
|
|
| 451 |
device: 'webgpu'
|
| 452 |
});
|
| 453 |
} else if (type === 'embedder') {
|
| 454 |
-
STATE.models.embedder = await tf.pipeline('feature-extraction', modelId, { progress_callback: handleProgress });
|
| 455 |
} else if (type === 'reranker') {
|
| 456 |
try {
|
| 457 |
-
STATE.models.reranker = await tf.pipeline('text-classification', modelId, { progress_callback: handleProgress });
|
| 458 |
} catch {
|
| 459 |
STATE.models.reranker = 'heuristic';
|
| 460 |
toast('Reranker model unavailable, using heuristic fallback', 'info');
|
|
@@ -474,6 +474,7 @@ async function loadModel(type, modelId) {
|
|
| 474 |
|
| 475 |
async function loadAllModels() {
|
| 476 |
await loadModel('embedder', STATE.modelIds.embedder);
|
|
|
|
| 477 |
await loadModel('llm', STATE.modelIds.llm);
|
| 478 |
await loadModel('reranker', STATE.modelIds.reranker);
|
| 479 |
document.getElementById('sendBtn').disabled = !STATE.models.llm;
|
|
@@ -1173,6 +1174,26 @@ function initEvents() {
|
|
| 1173 |
});
|
| 1174 |
}
|
| 1175 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1176 |
// =============================================
|
| 1177 |
// Init
|
| 1178 |
// =============================================
|
|
|
|
| 451 |
device: 'webgpu'
|
| 452 |
});
|
| 453 |
} else if (type === 'embedder') {
|
| 454 |
+
STATE.models.embedder = await tf.pipeline('feature-extraction', modelId, { progress_callback: handleProgress, device: 'webgpu' });
|
| 455 |
} else if (type === 'reranker') {
|
| 456 |
try {
|
| 457 |
+
STATE.models.reranker = await tf.pipeline('text-classification', modelId, { progress_callback: handleProgress, device: 'webgpu' });
|
| 458 |
} catch {
|
| 459 |
STATE.models.reranker = 'heuristic';
|
| 460 |
toast('Reranker model unavailable, using heuristic fallback', 'info');
|
|
|
|
| 474 |
|
| 475 |
async function loadAllModels() {
|
| 476 |
await loadModel('embedder', STATE.modelIds.embedder);
|
| 477 |
+
await seedVectorDB(); // ADD THIS LINE
|
| 478 |
await loadModel('llm', STATE.modelIds.llm);
|
| 479 |
await loadModel('reranker', STATE.modelIds.reranker);
|
| 480 |
document.getElementById('sendBtn').disabled = !STATE.models.llm;
|
|
|
|
| 1174 |
});
|
| 1175 |
}
|
| 1176 |
|
| 1177 |
+
async function seedVectorDB() {
|
| 1178 |
+
const seeds = [
|
| 1179 |
+
{ text: "Transformers.js is a library that enables running machine learning models directly in the browser using ONNX Runtime. It supports text generation, image classification, audio processing, and more without requiring a server.", metadata: { source: "docs", topic: "transformers.js" } },
|
| 1180 |
+
{ text: "Vector databases store data as high-dimensional numerical vectors. They enable fast similarity search by comparing vector distances using metrics like cosine similarity, euclidean distance, or dot product.", metadata: { source: "docs", topic: "vector-db" } },
|
| 1181 |
+
{ text: "RAG (Retrieval-Augmented Generation) combines information retrieval with language model generation. It first searches a knowledge base for relevant passages, then feeds those passages as context to an LLM to produce grounded answers.", metadata: { source: "docs", topic: "rag" } },
|
| 1182 |
+
{ text: "LanceDB is an open-source vector database designed for fast similarity search. It uses the Lance columnar format for efficient storage and retrieval of vector embeddings, supporting ANN indexes and full-text search.", metadata: { source: "docs", topic: "lancedb" } },
|
| 1183 |
+
{ text: "Embedding models convert text into fixed-size numerical vectors that capture semantic meaning. Popular models include all-MiniLM-L6-v2 (384 dimensions), BGE embeddings, and E5 models. Higher dimensions generally capture more nuance.", metadata: { source: "docs", topic: "embeddings" } },
|
| 1184 |
+
{ text: "Reranking improves search quality by re-scoring initially retrieved results with a more expensive cross-encoder model. Cross-encoders jointly process the query and each document, producing more accurate relevance scores than bi-encoder similarity.", metadata: { source: "docs", topic: "reranking" } },
|
| 1185 |
+
{ text: "Quantization reduces model size by lowering numerical precision. Q4 uses 4-bit integers, Q4F16 uses 4-bit weights with 16-bit activations for WebGPU. Q4 runs on CPU/WASM, Q4F16 requires WebGPU due to specialized GPU-only operators.", metadata: { source: "docs", topic: "quantization" } },
|
| 1186 |
+
{ text: "WebGPU is a modern browser API for general-purpose GPU computation. It replaces WebGL for compute workloads and is required for running quantized models like Q4F16 in transformers.js. Chrome 113+ supports WebGPU natively.", metadata: { source: "docs", topic: "webgpu" } }
|
| 1187 |
+
];
|
| 1188 |
+
for (const s of seeds) {
|
| 1189 |
+
const vector = await embedText(s.text);
|
| 1190 |
+
const entry = { id: STATE.nextId++, text: s.text, metadata: s.metadata, vector, date: new Date().toISOString() };
|
| 1191 |
+
STATE.entries.push(entry);
|
| 1192 |
+
vTable.add([{ ...entry }]);
|
| 1193 |
+
}
|
| 1194 |
+
renderTable();
|
| 1195 |
+
}
|
| 1196 |
+
|
| 1197 |
// =============================================
|
| 1198 |
// Init
|
| 1199 |
// =============================================
|