Soha85 commited on
Commit
b1fc5d9
·
verified ·
1 Parent(s): 06372d5

Update index.html

Browse files
Files changed (1) hide show
  1. index.html +96 -0
index.html CHANGED
@@ -514,6 +514,52 @@
514
  </div>
515
  </div>
516
  </div>
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
517
  </div>
518
 
519
  <script>
@@ -1080,6 +1126,56 @@
1080
  chunkingYearTimeline.appendChild(item);
1081
  });
1082
  }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1083
  </script>
1084
  </body>
1085
  </html>
 
514
  </div>
515
  </div>
516
  </div>
517
+
518
+ <!--INDEXING & VECTOR DB -->
519
+ <div id="indexing-db-content" class="tab-content">
520
+ <div class="hint-box">
521
+ <h3>💡 Architectural Distinction</h3>
522
+ <p><strong>Indexing Technique <span class="tag-algo">Algorithmic Level</span>:</strong> The mathematical strategy used to organize vector space (e.g., HNSW, IVF). It defines <em>how</em> the search is performed conceptually.</p>
523
+ <p style="margin-top: 10px;"><strong>Vector Database <span class="tag-sys">System Level</span>:</strong> The infrastructure/engine that implements these algorithms, adding database features like persistence, scalability, API layers, and metadata filtering (e.g., Pinecone, Milvus).</p>
524
+ </div>
525
+
526
+ <div class="grid">
527
+ <div class="card">
528
+ <h2>Core Indexing Techniques</h2>
529
+ <table class="tech-table">
530
+ <thead>
531
+ <tr>
532
+ <th>Technique</th>
533
+ <th>Category</th>
534
+ <th>Key Advantage</th>
535
+ </tr>
536
+ </thead>
537
+ <tbody id="indexing-table-body">
538
+ </tbody>
539
+ </table>
540
+ </div>
541
+
542
+ <div class="card">
543
+ <h2>Vector Search Engines (DBs)</h2>
544
+ <table class="tech-table">
545
+ <thead>
546
+ <tr>
547
+ <th>Database</th>
548
+ <th>Primary Index</th>
549
+ <th>Best For</th>
550
+ </tr>
551
+ </thead>
552
+ <tbody id="vectordb-table-body">
553
+ </tbody>
554
+ </table>
555
+ </div>
556
+ </div>
557
+
558
+ <div class="card">
559
+ <h2>📚 Full Citations & Technical References</h2>
560
+ <div id="citation-list"></div>
561
+ </div>
562
+ </div>
563
  </div>
564
 
565
  <script>
 
1126
  chunkingYearTimeline.appendChild(item);
1127
  });
1128
  }
1129
+ const indexingData = [
1130
+ { name: "Flat Index", desc: "Brute-force exact search", advantage: "100% Accuracy (Recall)", citation: "Lewis, P., et al. (2020). Retrieval-Augmented Generation for Knowledge-Intensive NLP Tasks. NeurIPS 2020.", url: "https://proceedings.neurips.cc/paper/2020/hash/6b49327755561ad448233261fb3a129d-Abstract.html" },
1131
+ { name: "HNSW", desc: "Graph-based proximity search", advantage: "Fastest retrieval & high recall", citation: "Malkov, Y. A., & Yashunin, D. A. (2018). Efficient and robust approximate nearest neighbor search using Hierarchical Navigable Small World graphs. IEEE TPAMI.", url: "https://ieeexplore.ieee.org/document/8593845" },
1132
+ { name: "IVF", desc: "Inverted file (Clustering-based)", advantage: "Reduced search space", citation: "Johnson, J., Douze, M., & Jégou, H. (2019). Billion-scale similarity search with GPUs. IEEE Transactions on Big Data.", url: "https://ieeexplore.ieee.org/document/8733051" },
1133
+ { name: "PQ / OPQ", desc: "Product Quantization (Compression)", advantage: "Massive memory efficiency", citation: "Jégou, H., Douze, M., & Schmid, C. (2011). Product Quantization for Nearest Neighbor Search. IEEE TPAMI.", url: "https://ieeexplore.ieee.org/document/5432202" }
1134
+ ];
1135
+
1136
+ const vectorDBData = [
1137
+ { name: "FAISS", index: "Flat, IVF, HNSW, PQ", best: "Academic Research/Baseline", citation: "Gao, Y., et al. (2024). Retrieval-Augmented Generation for Large Language Models: A Survey. ACM Computing Surveys.", url: "https://dl.acm.org/doi/10.1145/3680493" },
1138
+ { name: "Milvus", index: "HNSW, IVF-PQ", best: "Scalable Open-source", citation: "Liu, L., et al. (2024). A Survey on Retrieval-Augmented Text Generation. IEEE TKDE.", url: "https://ieeexplore.ieee.org/document/10506183" },
1139
+ { name: "Pinecone", index: "Proprietary HNSW-like", best: "Production Managed Service", citation: "Gao, Y., et al. (2024). Retrieval-Augmented Generation for Large Language Models: A Survey. ACM Computing Surveys.", url: "https://dl.acm.org/doi/10.1145/3680493" },
1140
+ { name: "Weaviate", index: "HNSW", best: "Schema/Metadata Support", citation: "Liu, L., et al. (2024). A Survey on Retrieval-Augmented Text Generation. IEEE TKDE.", url: "https://ieeexplore.ieee.org/document/10506183" }
1141
+ ];
1142
+
1143
+ function switchTab(tabName) {
1144
+ document.querySelectorAll('.tab').forEach(t => t.classList.remove('active'));
1145
+ document.querySelectorAll('.tab-content').forEach(c => c.classList.remove('active'));
1146
+
1147
+ const activeTab = Array.from(document.querySelectorAll('.tab')).find(t => t.innerText.toLowerCase().includes(tabName.split('-')[0]));
1148
+ if(activeTab) activeTab.classList.add('active');
1149
+ document.getElementById(tabName + '-content').classList.add('active');
1150
+
1151
+ if (tabName === 'indexing-db') initIndexingDB();
1152
+ }
1153
+
1154
+ function initIndexingDB() {
1155
+ const indexBody = document.getElementById('indexing-table-body');
1156
+ const dbBody = document.getElementById('vectordb-table-body');
1157
+ const citeList = document.getElementById('citation-list');
1158
+
1159
+ indexBody.innerHTML = ''; dbBody.innerHTML = ''; citeList.innerHTML = '';
1160
+
1161
+ indexingData.forEach(d => {
1162
+ indexBody.innerHTML += `<tr><td><strong>${d.name}</strong></td><td>${d.desc}</td><td>${d.advantage}</td></tr>`;
1163
+ appendCitation(d);
1164
+ });
1165
+
1166
+ vectorDBData.forEach(d => {
1167
+ dbBody.innerHTML += `<tr><td><strong>${d.name}</strong></td><td>${d.index}</td><td>${d.best}</td></tr>`;
1168
+ appendCitation(d);
1169
+ });
1170
+ }
1171
+
1172
+ function appendCitation(item) {
1173
+ const list = document.getElementById('citation-list');
1174
+ const div = document.createElement('div');
1175
+ div.className = 'timeline-item';
1176
+ div.innerHTML = `<div class="timeline-content"><div class="timeline-reference">📄 <a href="${item.url}" target="_blank">${item.citation}</a></div></div>`;
1177
+ list.appendChild(div);
1178
+ }
1179
  </script>
1180
  </body>
1181
  </html>