Soha85 commited on
Commit
1bc8ce2
·
verified ·
1 Parent(s): eb8b3fc

Add Indexing & Vector DB

Browse files
Files changed (1) hide show
  1. index.html +127 -0
index.html CHANGED
@@ -376,6 +376,34 @@
376
  font-size: 0.85em;
377
  box-shadow: 0 2px 4px rgba(0,0,0,0.1);
378
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
379
  </style>
380
  </head>
381
  <body>
@@ -385,6 +413,7 @@
385
  <div class="tabs">
386
  <button class="tab active" onclick="switchTab('rag')">RAG</button>
387
  <button class="tab" onclick="switchTab('chunking')">Chunking</button>
 
388
  </div>
389
 
390
  <!-- RAG TYPES TAB -->
@@ -484,7 +513,54 @@
484
  </div>
485
  </div>
486
  </div>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
487
  </div>
 
488
 
489
  <script>
490
  const ragData = [
@@ -1050,6 +1126,57 @@
1050
  chunkingYearTimeline.appendChild(item);
1051
  });
1052
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1053
  </script>
1054
  </body>
1055
  </html>
 
376
  font-size: 0.85em;
377
  box-shadow: 0 2px 4px rgba(0,0,0,0.1);
378
  }
379
+ body { font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif; background: linear-gradient(135deg, #667eea 0%, #764ba2 100%); padding: 20px; min-height: 100vh; }
380
+ .container { max-width: 1400px; margin: 0 auto; }
381
+ h1 { text-align: center; color: white; margin-bottom: 30px; font-size: 2.5em; text-shadow: 2px 2px 4px rgba(0,0,0,0.3); }
382
+ .tabs { display: flex; gap: 10px; margin-bottom: 25px; }
383
+ .tab { padding: 15px 30px; background: rgba(255, 255, 255, 0.3); color: white; border: none; border-radius: 10px 10px 0 0; cursor: pointer; font-size: 1.1em; font-weight: bold; transition: all 0.3s ease; }
384
+ .tab:hover { background: rgba(255, 255, 255, 0.4); }
385
+ .tab.active { background: white; color: #667eea; }
386
+ .tab-content { display: none; }
387
+ .tab-content.active { display: block; }
388
+ .grid { display: grid; grid-template-columns: repeat(auto-fit, minmax(450px, 1fr)); gap: 25px; margin-bottom: 25px; }
389
+ .card { background: white; border-radius: 15px; padding: 25px; box-shadow: 0 10px 30px rgba(0,0,0,0.2); transition: transform 0.3s ease; }
390
+ .card h2 { color: #667eea; margin-bottom: 20px; font-size: 1.5em; border-bottom: 3px solid #667eea; padding-bottom: 10px; }
391
+
392
+ /* New Styles for Indexing/DB Tab */
393
+ .hint-box { background: #fff3cd; border-left: 5px solid #ffc107; padding: 20px; border-radius: 10px; margin-bottom: 25px; color: #856404; }
394
+ .hint-box h3 { margin-bottom: 10px; }
395
+ .tech-table { width: 100%; border-collapse: collapse; margin-top: 15px; }
396
+ .tech-table th, .tech-table td { padding: 12px; border: 1px solid #ddd; text-align: left; font-size: 0.9em; }
397
+ .tech-table th { background-color: #f8f9fa; color: #667eea; }
398
+ .tag-algo { background: #e2e3ff; color: #4a51d4; padding: 2px 8px; border-radius: 4px; font-size: 0.8em; font-weight: bold; }
399
+ .tag-sys { background: #d4edda; color: #155724; padding: 2px 8px; border-radius: 4px; font-size: 0.8em; font-weight: bold; }
400
+
401
+ /* Retained utility classes */
402
+ .timeline-item { display: flex; align-items: center; padding: 15px; margin: 10px 0; background: white; border-radius: 10px; box-shadow: 0 2px 8px rgba(0,0,0,0.1); }
403
+ .timeline-year { font-size: 1.8em; font-weight: bold; color: #667eea; min-width: 80px; padding-right: 20px; border-right: 3px solid #667eea; }
404
+ .timeline-content { padding-left: 20px; flex: 1; }
405
+ .timeline-reference { color: #888; font-size: 0.85em; font-style: italic; margin-top: 5px; }
406
+ .timeline-reference a { color: #667eea; text-decoration: none; }
407
  </style>
408
  </head>
409
  <body>
 
413
  <div class="tabs">
414
  <button class="tab active" onclick="switchTab('rag')">RAG</button>
415
  <button class="tab" onclick="switchTab('chunking')">Chunking</button>
416
+ <button class="tab" onclick="switchTab('indexing-db')">Indexing & Vector DB</button>
417
  </div>
418
 
419
  <!-- RAG TYPES TAB -->
 
513
  </div>
514
  </div>
515
  </div>
516
+
517
+ <!-- Indexing & Vector DB -->
518
+ <div id="indexing-db-content" class="tab-content">
519
+ <div class="hint-box">
520
+ <h3>💡 Architectural Distinction</h3>
521
+ <p><strong>Indexing Technique <span class="tag-algo">Algorithmic Level</span>:</strong> The mathematical strategy used to organize vector space (e.g., HNSW, IVF). It defines <em>how</em> the search is performed conceptually.</p>
522
+ <p style="margin-top: 10px;"><strong>Vector Database <span class="tag-sys">System Level</span>:</strong> The infrastructure/engine that implements these algorithms, adding database features like persistence, scalability, API layers, and metadata filtering (e.g., Pinecone, Milvus).</p>
523
+ </div>
524
+
525
+ <div class="grid">
526
+ <div class="card">
527
+ <h2>Core Indexing Techniques</h2>
528
+ <table class="tech-table">
529
+ <thead>
530
+ <tr>
531
+ <th>Technique</th>
532
+ <th>Category</th>
533
+ <th>Key Advantage</th>
534
+ </tr>
535
+ </thead>
536
+ <tbody id="indexing-table-body">
537
+ </tbody>
538
+ </table>
539
+ </div>
540
+
541
+ <div class="card">
542
+ <h2>Vector Search Engines (DBs)</h2>
543
+ <table class="tech-table">
544
+ <thead>
545
+ <tr>
546
+ <th>Database</th>
547
+ <th>Primary Index</th>
548
+ <th>Best For</th>
549
+ </tr>
550
+ </thead>
551
+ <tbody id="vectordb-table-body">
552
+ </tbody>
553
+ </table>
554
+ </div>
555
+ </div>
556
+
557
+ <div class="card">
558
+ <h2>📚 Full Citations & Technical References</h2>
559
+ <div id="citation-list"></div>
560
+ </div>
561
+ </div>
562
  </div>
563
+
564
 
565
  <script>
566
  const ragData = [
 
1126
  chunkingYearTimeline.appendChild(item);
1127
  });
1128
  }
1129
+ // Indexing & Vector DB
1130
+ const indexingData = [
1131
+ { name: "Flat Index", desc: "Brute-force exact search", advantage: "100% Accuracy (Recall)", citation: "Lewis, P., et al. (2020). Retrieval-Augmented Generation for Knowledge-Intensive NLP Tasks. NeurIPS 2020.", url: "https://proceedings.neurips.cc/paper/2020/hash/6b49327755561ad448233261fb3a129d-Abstract.html" },
1132
+ { name: "HNSW", desc: "Graph-based proximity search", advantage: "Fastest retrieval & high recall", citation: "Malkov, Y. A., & Yashunin, D. A. (2018). Efficient and robust approximate nearest neighbor search using Hierarchical Navigable Small World graphs. IEEE TPAMI.", url: "https://ieeexplore.ieee.org/document/8593845" },
1133
+ { name: "IVF", desc: "Inverted file (Clustering-based)", advantage: "Reduced search space", citation: "Johnson, J., Douze, M., & Jégou, H. (2019). Billion-scale similarity search with GPUs. IEEE Transactions on Big Data.", url: "https://ieeexplore.ieee.org/document/8733051" },
1134
+ { name: "PQ / OPQ", desc: "Product Quantization (Compression)", advantage: "Massive memory efficiency", citation: "Jégou, H., Douze, M., & Schmid, C. (2011). Product Quantization for Nearest Neighbor Search. IEEE TPAMI.", url: "https://ieeexplore.ieee.org/document/5432202" }
1135
+ ];
1136
+
1137
+ const vectorDBData = [
1138
+ { name: "FAISS", index: "Flat, IVF, HNSW, PQ", best: "Academic Research/Baseline", citation: "Gao, Y., et al. (2024). Retrieval-Augmented Generation for Large Language Models: A Survey. ACM Computing Surveys.", url: "https://dl.acm.org/doi/10.1145/3680493" },
1139
+ { name: "Milvus", index: "HNSW, IVF-PQ", best: "Scalable Open-source", citation: "Liu, L., et al. (2024). A Survey on Retrieval-Augmented Text Generation. IEEE TKDE.", url: "https://ieeexplore.ieee.org/document/10506183" },
1140
+ { name: "Pinecone", index: "Proprietary HNSW-like", best: "Production Managed Service", citation: "Gao, Y., et al. (2024). Retrieval-Augmented Generation for Large Language Models: A Survey. ACM Computing Surveys.", url: "https://dl.acm.org/doi/10.1145/3680493" },
1141
+ { name: "Weaviate", index: "HNSW", best: "Schema/Metadata Support", citation: "Liu, L., et al. (2024). A Survey on Retrieval-Augmented Text Generation. IEEE TKDE.", url: "https://ieeexplore.ieee.org/document/10506183" }
1142
+ ];
1143
+
1144
+ function switchTab(tabName) {
1145
+ document.querySelectorAll('.tab').forEach(t => t.classList.remove('active'));
1146
+ document.querySelectorAll('.tab-content').forEach(c => c.classList.remove('active'));
1147
+
1148
+ const activeTab = Array.from(document.querySelectorAll('.tab')).find(t => t.innerText.toLowerCase().includes(tabName.split('-')[0]));
1149
+ if(activeTab) activeTab.classList.add('active');
1150
+ document.getElementById(tabName + '-content').classList.add('active');
1151
+
1152
+ if (tabName === 'indexing-db') initIndexingDB();
1153
+ }
1154
+
1155
+ function initIndexingDB() {
1156
+ const indexBody = document.getElementById('indexing-table-body');
1157
+ const dbBody = document.getElementById('vectordb-table-body');
1158
+ const citeList = document.getElementById('citation-list');
1159
+
1160
+ indexBody.innerHTML = ''; dbBody.innerHTML = ''; citeList.innerHTML = '';
1161
+
1162
+ indexingData.forEach(d => {
1163
+ indexBody.innerHTML += `<tr><td><strong>${d.name}</strong></td><td>${d.desc}</td><td>${d.advantage}</td></tr>`;
1164
+ appendCitation(d);
1165
+ });
1166
+
1167
+ vectorDBData.forEach(d => {
1168
+ dbBody.innerHTML += `<tr><td><strong>${d.name}</strong></td><td>${d.index}</td><td>${d.best}</td></tr>`;
1169
+ appendCitation(d);
1170
+ });
1171
+ }
1172
+
1173
+ function appendCitation(item) {
1174
+ const list = document.getElementById('citation-list');
1175
+ const div = document.createElement('div');
1176
+ div.className = 'timeline-item';
1177
+ div.innerHTML = `<div class="timeline-content"><div class="timeline-reference">📄 <a href="${item.url}" target="_blank">${item.citation}</a></div></div>`;
1178
+ list.appendChild(div);
1179
+ }
1180
  </script>
1181
  </body>
1182
  </html>