Add Indexing & Vector DB
Browse files- index.html +127 -0
index.html
CHANGED
|
@@ -376,6 +376,34 @@
|
|
| 376 |
font-size: 0.85em;
|
| 377 |
box-shadow: 0 2px 4px rgba(0,0,0,0.1);
|
| 378 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 379 |
</style>
|
| 380 |
</head>
|
| 381 |
<body>
|
|
@@ -385,6 +413,7 @@
|
|
| 385 |
<div class="tabs">
|
| 386 |
<button class="tab active" onclick="switchTab('rag')">RAG</button>
|
| 387 |
<button class="tab" onclick="switchTab('chunking')">Chunking</button>
|
|
|
|
| 388 |
</div>
|
| 389 |
|
| 390 |
<!-- RAG TYPES TAB -->
|
|
@@ -484,7 +513,54 @@
|
|
| 484 |
</div>
|
| 485 |
</div>
|
| 486 |
</div>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 487 |
</div>
|
|
|
|
| 488 |
|
| 489 |
<script>
|
| 490 |
const ragData = [
|
|
@@ -1050,6 +1126,57 @@
|
|
| 1050 |
chunkingYearTimeline.appendChild(item);
|
| 1051 |
});
|
| 1052 |
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1053 |
</script>
|
| 1054 |
</body>
|
| 1055 |
</html>
|
|
|
|
| 376 |
font-size: 0.85em;
|
| 377 |
box-shadow: 0 2px 4px rgba(0,0,0,0.1);
|
| 378 |
}
|
| 379 |
+
body { font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif; background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); padding: 20px; min-height: 100vh; }
|
| 380 |
+
.container { max-width: 1400px; margin: 0 auto; }
|
| 381 |
+
h1 { text-align: center; color: white; margin-bottom: 30px; font-size: 2.5em; text-shadow: 2px 2px 4px rgba(0,0,0,0.3); }
|
| 382 |
+
.tabs { display: flex; gap: 10px; margin-bottom: 25px; }
|
| 383 |
+
.tab { padding: 15px 30px; background: rgba(255, 255, 255, 0.3); color: white; border: none; border-radius: 10px 10px 0 0; cursor: pointer; font-size: 1.1em; font-weight: bold; transition: all 0.3s ease; }
|
| 384 |
+
.tab:hover { background: rgba(255, 255, 255, 0.4); }
|
| 385 |
+
.tab.active { background: white; color: #667eea; }
|
| 386 |
+
.tab-content { display: none; }
|
| 387 |
+
.tab-content.active { display: block; }
|
| 388 |
+
.grid { display: grid; grid-template-columns: repeat(auto-fit, minmax(450px, 1fr)); gap: 25px; margin-bottom: 25px; }
|
| 389 |
+
.card { background: white; border-radius: 15px; padding: 25px; box-shadow: 0 10px 30px rgba(0,0,0,0.2); transition: transform 0.3s ease; }
|
| 390 |
+
.card h2 { color: #667eea; margin-bottom: 20px; font-size: 1.5em; border-bottom: 3px solid #667eea; padding-bottom: 10px; }
|
| 391 |
+
|
| 392 |
+
/* New Styles for Indexing/DB Tab */
|
| 393 |
+
.hint-box { background: #fff3cd; border-left: 5px solid #ffc107; padding: 20px; border-radius: 10px; margin-bottom: 25px; color: #856404; }
|
| 394 |
+
.hint-box h3 { margin-bottom: 10px; }
|
| 395 |
+
.tech-table { width: 100%; border-collapse: collapse; margin-top: 15px; }
|
| 396 |
+
.tech-table th, .tech-table td { padding: 12px; border: 1px solid #ddd; text-align: left; font-size: 0.9em; }
|
| 397 |
+
.tech-table th { background-color: #f8f9fa; color: #667eea; }
|
| 398 |
+
.tag-algo { background: #e2e3ff; color: #4a51d4; padding: 2px 8px; border-radius: 4px; font-size: 0.8em; font-weight: bold; }
|
| 399 |
+
.tag-sys { background: #d4edda; color: #155724; padding: 2px 8px; border-radius: 4px; font-size: 0.8em; font-weight: bold; }
|
| 400 |
+
|
| 401 |
+
/* Retained utility classes */
|
| 402 |
+
.timeline-item { display: flex; align-items: center; padding: 15px; margin: 10px 0; background: white; border-radius: 10px; box-shadow: 0 2px 8px rgba(0,0,0,0.1); }
|
| 403 |
+
.timeline-year { font-size: 1.8em; font-weight: bold; color: #667eea; min-width: 80px; padding-right: 20px; border-right: 3px solid #667eea; }
|
| 404 |
+
.timeline-content { padding-left: 20px; flex: 1; }
|
| 405 |
+
.timeline-reference { color: #888; font-size: 0.85em; font-style: italic; margin-top: 5px; }
|
| 406 |
+
.timeline-reference a { color: #667eea; text-decoration: none; }
|
| 407 |
</style>
|
| 408 |
</head>
|
| 409 |
<body>
|
|
|
|
| 413 |
<div class="tabs">
|
| 414 |
<button class="tab active" onclick="switchTab('rag')">RAG</button>
|
| 415 |
<button class="tab" onclick="switchTab('chunking')">Chunking</button>
|
| 416 |
+
<button class="tab" onclick="switchTab('indexing-db')">Indexing & Vector DB</button>
|
| 417 |
</div>
|
| 418 |
|
| 419 |
<!-- RAG TYPES TAB -->
|
|
|
|
| 513 |
</div>
|
| 514 |
</div>
|
| 515 |
</div>
|
| 516 |
+
|
| 517 |
+
<!-- Indexing & Vector DB -->
|
| 518 |
+
<div id="indexing-db-content" class="tab-content">
|
| 519 |
+
<div class="hint-box">
|
| 520 |
+
<h3>💡 Architectural Distinction</h3>
|
| 521 |
+
<p><strong>Indexing Technique <span class="tag-algo">Algorithmic Level</span>:</strong> The mathematical strategy used to organize vector space (e.g., HNSW, IVF). It defines <em>how</em> the search is performed conceptually.</p>
|
| 522 |
+
<p style="margin-top: 10px;"><strong>Vector Database <span class="tag-sys">System Level</span>:</strong> The infrastructure/engine that implements these algorithms, adding database features like persistence, scalability, API layers, and metadata filtering (e.g., Pinecone, Milvus).</p>
|
| 523 |
+
</div>
|
| 524 |
+
|
| 525 |
+
<div class="grid">
|
| 526 |
+
<div class="card">
|
| 527 |
+
<h2>Core Indexing Techniques</h2>
|
| 528 |
+
<table class="tech-table">
|
| 529 |
+
<thead>
|
| 530 |
+
<tr>
|
| 531 |
+
<th>Technique</th>
|
| 532 |
+
<th>Category</th>
|
| 533 |
+
<th>Key Advantage</th>
|
| 534 |
+
</tr>
|
| 535 |
+
</thead>
|
| 536 |
+
<tbody id="indexing-table-body">
|
| 537 |
+
</tbody>
|
| 538 |
+
</table>
|
| 539 |
+
</div>
|
| 540 |
+
|
| 541 |
+
<div class="card">
|
| 542 |
+
<h2>Vector Search Engines (DBs)</h2>
|
| 543 |
+
<table class="tech-table">
|
| 544 |
+
<thead>
|
| 545 |
+
<tr>
|
| 546 |
+
<th>Database</th>
|
| 547 |
+
<th>Primary Index</th>
|
| 548 |
+
<th>Best For</th>
|
| 549 |
+
</tr>
|
| 550 |
+
</thead>
|
| 551 |
+
<tbody id="vectordb-table-body">
|
| 552 |
+
</tbody>
|
| 553 |
+
</table>
|
| 554 |
+
</div>
|
| 555 |
+
</div>
|
| 556 |
+
|
| 557 |
+
<div class="card">
|
| 558 |
+
<h2>📚 Full Citations & Technical References</h2>
|
| 559 |
+
<div id="citation-list"></div>
|
| 560 |
+
</div>
|
| 561 |
+
</div>
|
| 562 |
</div>
|
| 563 |
+
|
| 564 |
|
| 565 |
<script>
|
| 566 |
const ragData = [
|
|
|
|
| 1126 |
chunkingYearTimeline.appendChild(item);
|
| 1127 |
});
|
| 1128 |
}
|
| 1129 |
+
// Indexing & Vector DB
|
| 1130 |
+
const indexingData = [
|
| 1131 |
+
{ name: "Flat Index", desc: "Brute-force exact search", advantage: "100% Accuracy (Recall)", citation: "Lewis, P., et al. (2020). Retrieval-Augmented Generation for Knowledge-Intensive NLP Tasks. NeurIPS 2020.", url: "https://proceedings.neurips.cc/paper/2020/hash/6b49327755561ad448233261fb3a129d-Abstract.html" },
|
| 1132 |
+
{ name: "HNSW", desc: "Graph-based proximity search", advantage: "Fastest retrieval & high recall", citation: "Malkov, Y. A., & Yashunin, D. A. (2018). Efficient and robust approximate nearest neighbor search using Hierarchical Navigable Small World graphs. IEEE TPAMI.", url: "https://ieeexplore.ieee.org/document/8593845" },
|
| 1133 |
+
{ name: "IVF", desc: "Inverted file (Clustering-based)", advantage: "Reduced search space", citation: "Johnson, J., Douze, M., & Jégou, H. (2019). Billion-scale similarity search with GPUs. IEEE Transactions on Big Data.", url: "https://ieeexplore.ieee.org/document/8733051" },
|
| 1134 |
+
{ name: "PQ / OPQ", desc: "Product Quantization (Compression)", advantage: "Massive memory efficiency", citation: "Jégou, H., Douze, M., & Schmid, C. (2011). Product Quantization for Nearest Neighbor Search. IEEE TPAMI.", url: "https://ieeexplore.ieee.org/document/5432202" }
|
| 1135 |
+
];
|
| 1136 |
+
|
| 1137 |
+
const vectorDBData = [
|
| 1138 |
+
{ name: "FAISS", index: "Flat, IVF, HNSW, PQ", best: "Academic Research/Baseline", citation: "Gao, Y., et al. (2024). Retrieval-Augmented Generation for Large Language Models: A Survey. ACM Computing Surveys.", url: "https://dl.acm.org/doi/10.1145/3680493" },
|
| 1139 |
+
{ name: "Milvus", index: "HNSW, IVF-PQ", best: "Scalable Open-source", citation: "Liu, L., et al. (2024). A Survey on Retrieval-Augmented Text Generation. IEEE TKDE.", url: "https://ieeexplore.ieee.org/document/10506183" },
|
| 1140 |
+
{ name: "Pinecone", index: "Proprietary HNSW-like", best: "Production Managed Service", citation: "Gao, Y., et al. (2024). Retrieval-Augmented Generation for Large Language Models: A Survey. ACM Computing Surveys.", url: "https://dl.acm.org/doi/10.1145/3680493" },
|
| 1141 |
+
{ name: "Weaviate", index: "HNSW", best: "Schema/Metadata Support", citation: "Liu, L., et al. (2024). A Survey on Retrieval-Augmented Text Generation. IEEE TKDE.", url: "https://ieeexplore.ieee.org/document/10506183" }
|
| 1142 |
+
];
|
| 1143 |
+
|
| 1144 |
+
function switchTab(tabName) {
|
| 1145 |
+
document.querySelectorAll('.tab').forEach(t => t.classList.remove('active'));
|
| 1146 |
+
document.querySelectorAll('.tab-content').forEach(c => c.classList.remove('active'));
|
| 1147 |
+
|
| 1148 |
+
const activeTab = Array.from(document.querySelectorAll('.tab')).find(t => t.innerText.toLowerCase().includes(tabName.split('-')[0]));
|
| 1149 |
+
if(activeTab) activeTab.classList.add('active');
|
| 1150 |
+
document.getElementById(tabName + '-content').classList.add('active');
|
| 1151 |
+
|
| 1152 |
+
if (tabName === 'indexing-db') initIndexingDB();
|
| 1153 |
+
}
|
| 1154 |
+
|
| 1155 |
+
function initIndexingDB() {
|
| 1156 |
+
const indexBody = document.getElementById('indexing-table-body');
|
| 1157 |
+
const dbBody = document.getElementById('vectordb-table-body');
|
| 1158 |
+
const citeList = document.getElementById('citation-list');
|
| 1159 |
+
|
| 1160 |
+
indexBody.innerHTML = ''; dbBody.innerHTML = ''; citeList.innerHTML = '';
|
| 1161 |
+
|
| 1162 |
+
indexingData.forEach(d => {
|
| 1163 |
+
indexBody.innerHTML += `<tr><td><strong>${d.name}</strong></td><td>${d.desc}</td><td>${d.advantage}</td></tr>`;
|
| 1164 |
+
appendCitation(d);
|
| 1165 |
+
});
|
| 1166 |
+
|
| 1167 |
+
vectorDBData.forEach(d => {
|
| 1168 |
+
dbBody.innerHTML += `<tr><td><strong>${d.name}</strong></td><td>${d.index}</td><td>${d.best}</td></tr>`;
|
| 1169 |
+
appendCitation(d);
|
| 1170 |
+
});
|
| 1171 |
+
}
|
| 1172 |
+
|
| 1173 |
+
function appendCitation(item) {
|
| 1174 |
+
const list = document.getElementById('citation-list');
|
| 1175 |
+
const div = document.createElement('div');
|
| 1176 |
+
div.className = 'timeline-item';
|
| 1177 |
+
div.innerHTML = `<div class="timeline-content"><div class="timeline-reference">📄 <a href="${item.url}" target="_blank">${item.citation}</a></div></div>`;
|
| 1178 |
+
list.appendChild(div);
|
| 1179 |
+
}
|
| 1180 |
</script>
|
| 1181 |
</body>
|
| 1182 |
</html>
|