RAG_Study / index.html
Soha85's picture
Update index.html
1c221a5 verified
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>RAG Visualization</title>
<script src="https://cdnjs.cloudflare.com/ajax/libs/Chart.js/3.9.1/chart.min.js"></script>
<style>
* {
margin: 0;
padding: 0;
box-sizing: border-box;
}
body {
font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
padding: 20px;
min-height: 100vh;
}
.container {
max-width: 1400px;
margin: 0 auto;
}
h1 {
text-align: center;
color: white;
margin-bottom: 30px;
font-size: 2.5em;
text-shadow: 2px 2px 4px rgba(0,0,0,0.3);
}
.tabs {
display: flex;
gap: 10px;
margin-bottom: 25px;
}
.tab {
padding: 15px 30px;
background: rgba(255, 255, 255, 0.3);
color: white;
border: none;
border-radius: 10px 10px 0 0;
cursor: pointer;
font-size: 1.1em;
font-weight: bold;
transition: all 0.3s ease;
}
.tab:hover {
background: rgba(255, 255, 255, 0.4);
}
.tab.active {
background: white;
color: #667eea;
}
.tab-content {
display: none;
}
.tab-content.active {
display: block;
}
.grid {
display: grid;
grid-template-columns: repeat(auto-fit, minmax(450px, 1fr));
gap: 25px;
margin-bottom: 25px;
}
.card {
background: white;
border-radius: 15px;
padding: 25px;
box-shadow: 0 10px 30px rgba(0,0,0,0.2);
transition: transform 0.3s ease;
}
.card:hover {
transform: translateY(-5px);
}
.card h2 {
color: #667eea;
margin-bottom: 20px;
font-size: 1.5em;
border-bottom: 3px solid #667eea;
padding-bottom: 10px;
}
.chart-container {
position: relative;
height: 400px;
}
.timeline-card {
grid-column: 1 / -1;
}
.workflow-selector {
margin-bottom: 20px;
}
.workflow-selector select {
width: 100%;
padding: 12px;
border: 2px solid #667eea;
border-radius: 8px;
font-size: 16px;
background: white;
cursor: pointer;
transition: all 0.3s ease;
}
.workflow-selector select:hover {
border-color: #764ba2;
}
.flowchart-container {
display: flex;
gap: 30px;
margin-top: 20px;
flex-wrap: wrap;
}
.flowchart {
flex: 1;
min-width: 400px;
background: #f8f9fa;
padding: 20px;
border-radius: 10px;
}
.flowchart h3 {
color: #667eea;
margin-bottom: 15px;
font-size: 1.2em;
text-align: center;
}
.flow-step {
background: white;
padding: 12px 15px;
margin: 10px 0;
border-radius: 8px;
box-shadow: 0 2px 5px rgba(0,0,0,0.1);
position: relative;
border-left: 4px solid #667eea;
}
.flow-step::after {
content: '↓';
position: absolute;
bottom: -20px;
left: 50%;
transform: translateX(-50%);
font-size: 20px;
color: #667eea;
font-weight: bold;
}
.flow-step:last-child::after {
content: '';
}
.flow-step.highlight {
background: linear-gradient(135deg, #667eea15, #764ba215);
border-left-color: #764ba2;
}
.benefits-challenges {
display: grid;
grid-template-columns: 1fr 1fr;
gap: 15px;
margin-top: 20px;
}
.info-box {
background: #f8f9fa;
padding: 15px;
border-radius: 8px;
}
.info-box h4 {
color: #667eea;
margin-bottom: 10px;
display: flex;
align-items: center;
gap: 8px;
}
.info-box p {
color: #555;
line-height: 1.6;
font-size: 14px;
}
.timeline-item {
display: flex;
align-items: center;
padding: 15px;
margin: 10px 0;
background: white;
border-radius: 10px;
box-shadow: 0 2px 8px rgba(0,0,0,0.1);
transition: all 0.3s ease;
}
.timeline-item:hover {
transform: translateX(10px);
box-shadow: 0 4px 12px rgba(0,0,0,0.15);
}
.timeline-year {
font-size: 1.8em;
font-weight: bold;
color: #667eea;
min-width: 80px;
padding-right: 20px;
border-right: 3px solid #667eea;
}
.timeline-content {
padding-left: 20px;
flex: 1;
}
.timeline-type {
font-weight: bold;
font-size: 1.1em;
color: #333;
margin-bottom: 5px;
}
.timeline-category {
color: #666;
font-size: 0.9em;
}
.timeline-reference {
color: #888;
font-size: 0.85em;
font-style: italic;
margin-top: 5px;
line-height: 1.4;
}
.timeline-reference a {
color: #667eea;
text-decoration: none;
transition: color 0.3s ease;
}
.timeline-reference a:hover {
color: #764ba2;
text-decoration: underline;
}
.category-badge {
display: inline-block;
padding: 4px 12px;
border-radius: 15px;
font-size: 0.85em;
margin-top: 5px;
}
.badge-foundational { background: #667eea; color: white; }
.badge-agentic { background: #764ba2; color: white; }
.badge-modular { background: #f093fb; color: white; }
.badge-structural-modular { background: #4facfe; color: white; }
.badge-structural { background: #00d2ff; color: white; }
.category-definition {
background: #f8f9fa;
padding: 12px;
margin: 8px 0;
border-radius: 8px;
border-left: 4px solid;
font-size: 0.9em;
}
.category-definition strong {
display: block;
margin-bottom: 5px;
font-size: 1em;
}
.category-definition p {
color: #555;
line-height: 1.5;
margin: 0;
}
.def-foundational { border-left-color: #667eea; }
.def-agentic { border-left-color: #764ba2; }
.def-modular { border-left-color: #f093fb; }
.def-structural { border-left-color: #00d2ff; }
.chunking-brief {
background: #f8f9fa;
padding: 15px;
border-radius: 8px;
margin-top: 15px;
border-left: 4px solid #667eea;
}
.chunking-brief p {
color: #555;
line-height: 1.6;
margin-bottom: 10px;
}
.chunking-brief .reference {
color: #888;
font-size: 0.9em;
font-style: italic;
}
.chunking-brief .reference a {
color: #667eea;
text-decoration: none;
}
.chunking-brief .reference a:hover {
text-decoration: underline;
}
.category-tree {
background: #f8f9fa;
padding: 20px;
border-radius: 10px;
margin-top: 20px;
}
.category-node {
margin: 15px 0;
padding: 15px;
background: white;
border-radius: 8px;
border-left: 4px solid #667eea;
box-shadow: 0 2px 5px rgba(0,0,0,0.1);
}
.category-node h4 {
color: #667eea;
margin-bottom: 10px;
font-size: 1.1em;
}
.category-node .brief {
color: #666;
font-size: 0.9em;
font-style: italic;
margin-bottom: 10px;
line-height: 1.5;
}
.category-methods {
display: flex;
flex-wrap: wrap;
gap: 8px;
margin-top: 10px;
}
.method-tag {
background: linear-gradient(135deg, #667eea, #764ba2);
color: white;
padding: 6px 12px;
border-radius: 15px;
font-size: 0.85em;
box-shadow: 0 2px 4px rgba(0,0,0,0.1);
}
.rag-table {
width: 100%;
border-collapse: collapse;
margin-bottom: 2rem;
font-size: 0.95rem;
}
.rag-table th,
.rag-table td {
border: 1px solid #ddd;
padding: 5px;
vertical-align: top;
color: #666;
font-size: 0.8em;
font-style: italic;
margin-bottom: 8px;
line-height: 1;
}
.rag-table th {
background-color: #5b6ee1;
color: #fff;
text-align: left;
}
.rag-table tr:nth-child(even) {
background-color: #f5f6fa;
}
.rag-table a {
color: #1a4cff;
text-decoration: none;
font-weight: 500;
}
.rag-table a:hover {
text-decoration: underline;
}
</style>
</head>
<body>
<div class="container">
<h1>🔍 RAG Visualization Dashboard</h1>
<div class="tabs">
<button class="tab active" onclick="switchTab('rag')">RAG</button>
<button class="tab" onclick="switchTab('chunking')">Chunking</button>
<button class="tab" onclick="switchTab('indexing-db')">Indexing & Vector DB</button>
</div>
<!-- RAG TYPES TAB -->
<div id="rag-content" class="tab-content active">
<div class="grid">
<div class="card">
<h2>RAG Categories Distribution</h2>
<div class="chart-container">
<canvas id="categoryChart"></canvas>
</div>
<div id="categoryDefinitions"></div>
</div>
<div class="card">
<h2>RAG Types by Year</h2>
<div class="chart-container">
<canvas id="timelineChart"></canvas>
</div>
</div>
</div>
<div class="card timeline-card">
<h2>📅 Evolution Timeline: Year → RAG Type</h2>
<div id="yearTimeline"></div>
</div>
<div class="card timeline-card">
<h2>RAG Workflow Flowcharts</h2>
<div class="workflow-selector">
<select id="ragTypeSelector">
<option value="">Select a RAG Type to view workflows...</option>
</select>
</div>
<div id="flowchartDetails" style="display: none;">
<div class="flowchart-container">
<div class="flowchart">
<h3>📥 Indexing Workflow</h3>
<div id="indexingFlowchart"></div>
</div>
<div class="flowchart">
<h3>🔄 Inference Workflow</h3>
<div id="inferenceFlowchart"></div>
</div>
</div>
<div class="benefits-challenges">
<div class="info-box">
<h4>✅ Key Benefits</h4>
<p id="benefits"></p>
</div>
<div class="info-box">
<h4>⚠️ Challenges</h4>
<p id="challenges"></p>
</div>
</div>
</div>
</div>
</div>
<!-- CHUNKING TAB -->
<div id="chunking-content" class="tab-content">
<div class="grid">
<div class="card">
<h2>Chunking Methods Distribution</h2>
<div class="chart-container">
<canvas id="chunkingChart"></canvas>
</div>
<div id="chunkingCategoryDefs"></div>
</div>
<div class="card">
<h2>Chunking Categories & Methods</h2>
<div id="categoryHierarchy" class="category-tree"></div>
</div>
</div>
<div class="card timeline-card">
<h2>📅 Chunking Methods Timeline</h2>
<div id="chunkingYearTimeline"></div>
</div>
<div class="card timeline-card">
<h2>Chunking Method Flowcharts</h2>
<div class="workflow-selector">
<select id="chunkingSelector">
<option value="">Select a Chunking Method...</option>
</select>
</div>
<div id="chunkingFlowchartDetails" style="display: none;">
<div class="flowchart">
<h3>🔄 Chunking Process Flow</h3>
<div id="chunkingFlowchart"></div>
</div>
<div class="chunking-brief">
<p id="chunkingDescription"></p>
<div class="reference" id="chunkingReference"></div>
</div>
</div>
</div>
</div>
<!-- INDEXING TAB -->
<div id="indexing-db" class="tab-content">
<div class="grid">
<div class="card">
<h2>🧮 Indexing Techniques (Algorithmic Level)</h2>
<p class="brief">Algorithmic methods for organizing and searching vectors efficiently. These are the core algorithms that determine how vectors are structured and retrieved (e.g., Flat Index, ANN, IVF, HNSW, PQ/OPQ).</p>
</div>
<div class="card">
<h2 class="brief">💾 Vector Databases (System Level)</h2>
<p>Complete systems/engines that implement indexing techniques along with additional features like persistence, metadata support, distributed architecture, and APIs. They are the production-grade platforms (e.g., FAISS, Milvus, Pinecone, Weaviate).</p>
</div>
</div>
<!-- ---------- INDEXING TECHNIQUES TABLE ---------- -->
<div class="card">
<table class="rag-table">
<thead>
<tr>
<th style="width:10%">Indexing Technique</th>
<th style="width:15%">Description</th>
<th style="width:15%">Workflow</th>
<th style="width:15%">Pros</th>
<th style="width:15%">Cons</th>
<th style="width:30%">Peer-Reviewed Reference</th>
</tr>
</thead>
<tbody>
<tr>
<td>Flat Index (Brute-Force)</td>
<td>Exact similarity search over all vectors using distance metrics.</td>
<td>
Embed documents → store vectors → compare query with all vectors → return top-k
</td>
<td>Exact results<br>Simple implementation</td>
<td>Not scalable<br>High latency & memory cost</td>
<td class="timeline-reference">
<a href="https://proceedings.neurips.cc/paper_files/paper/2020/file/6b493230205f780e1bc26945df7481e5-Paper.pdf" target="_blank">
📄Lewis, P., Perez, E., Piktus, A., Petroni, F., Karpukhin, V., Goyal, N., Küttler, H., Lewis, M., Yih, W., Rocktäschel, T., Riedel, S., & Kiela, D. (2020). Retrieval‑augmented generation for knowledge‑intensive NLP tasks. In H. Larochelle, M. Ranzato, R. Hadsell, M. F. Balcan, & H. T. Lin (Eds.), Advances in Neural Information Processing Systems, 33, 9459–9474. Curran Associates, Inc.
</a>
</td>
</tr>
<tr>
<td>ANN (Approximate Nearest Neighbor)</td>
<td>Fast similarity search with controlled approximation.</td>
<td>
Embed documents → build ANN structure → retrieve approximate neighbors
</td>
<td>Fast retrieval<br>Scales to millions/billions</td>
<td>Approximate (not exact)</td>
<td class="timeline-reference">
<a href="https://simg.baai.ac.cn/paperfile/25a43194-c74c-4cd3-b60f-0a1f27f8b8af.pdf" target="_blank">
📄Gao, Y., Xiong, Y., Gao, X., Jia, K., Pan, J., Bi, Y., Dai, Y., Sun, J., Wang, M., & Wang, H. (2024). Retrieval‑Augmented Generation for Large Language Models: A Survey (arXiv:2312.10997). arXiv Preprint. https://doi.org/10.48550/arXiv.2312.10997
</a>
</td>
</tr>
<tr>
<td>IVF (Inverted File Index)</td>
<td>Partitions vector space into clusters; searches only relevant partitions.</td>
<td>
K-means clustering → assign vectors → query nearest clusters → search inside
</td>
<td>Good speed-accuracy trade-off<br>Scalable</td>
<td>Requires tuning<br>Cluster-quality sensitive</td>
<td class="timeline-reference">
<a href="https://arxiv.org/pdf/1702.08734" target="_blank">
📄Johnson, J., Douze, M., & Jégou, H. (2019). Billion‑scale similarity search with GPUs. IEEE Transactions on Big Data, 7(3), 535–547. https://doi.org/10.1109/TBDATA.2019.2921572
</a>
</td>
</tr>
<tr>
<td>HNSW</td>
<td>Graph-based ANN using hierarchical proximity graphs.</td>
<td>
Build layered graph → incremental insertion → top-down graph traversal
</td>
<td>Very fast<br>High recall<br>Dynamic updates</td>
<td>High memory usage<br>Complex construction</td>
<td class="timeline-reference">
<a href="https://arxiv.org/abs/1603.09320" target="_blank">
📄Malkov, Y. A., & Yashunin, D. A. (2020). Efficient and robust approximate nearest neighbor search using hierarchical navigable small world graphs. IEEE Transactions on Pattern Analysis and Machine Intelligence, 42(4), 824–836. https://doi.org/10.1109/TPAMI.2018.2889473
</a>
</td>
</tr>
<tr>
<td>PQ / OPQ</td>
<td>Compresses vectors into short codes for memory-efficient ANN.</td>
<td>
Subspace split → quantization → store codes → approximate distance
</td>
<td>Memory efficient<br>Enables billion-scale RAG</td>
<td>Lossy compression<br>Lower recall if misconfigured</td>
<td class="timeline-reference">
<a href="https://www.irisa.fr/texmex/people/jegou/papers/jegou_searching_with_quantization.pdf" target="_blank">
📄Jégou, H., Douze, M., & Schmid, C. (2011). Product quantization for nearest neighbor search. IEEE Transactions on Pattern Analysis and Machine Intelligence, 33(1), 117–128. https://doi.org/10.1109/TPAMI.2010.57
</a>
</td>
</tr>
</tbody>
</table>
</div>
<!-- ---------- VECTOR DATABASES TABLE ---------- -->
<div class="card">
<table class="rag-table">
<thead>
<tr>
<th style="width:10%">Vector DB</th>
<th style="width:20%">Description</th>
<th style="width:10%">Underlying Index</th>
<th style="width:15%">Pros</th>
<th style="width:15%">Cons</th>
<th style="width:30%">Peer-Reviewed Reference</th>
</tr>
</thead>
<tbody>
<tr>
<td>FAISS</td>
<td>Research-oriented vector similarity library widely used in RAG.</td>
<td>Flat, IVF, HNSW, PQ, OPQ</td>
<td>Highly optimized<br>Flexible ANN<br>Academic standard</td>
<td>Not a full DBMS<br>Limited metadata</td>
<td class="timeline-reference">
<a href="https://simg.baai.ac.cn/paperfile/25a43194-c74c-4cd3-b60f-0a1f27f8b8af.pdf" target="_blank">
📄Gao, Y., Xiong, Y., Gao, X., Jia, K., Pan, J., Bi, Y., Dai, Y., Sun, J., Wang, M., & Wang, H. (2024). Retrieval‑Augmented Generation for Large Language Models: A Survey (arXiv:2312.10997). arXiv Preprint. https://doi.org/10.48550/arXiv.2312.10997
</a>
</td>
</tr>
<tr>
<td>Milvus</td>
<td>Distributed open-source vector database for large-scale RAG.</td>
<td>HNSW, IVF-PQ</td>
<td>Scalable<br>Distributed<br>Open-source</td>
<td>Deployment complexity<br>Operational overhead</td>
<td class="timeline-reference">
<a href="https://ijaibdcms.org/index.php/ijaibdcms/article/view/257?utm_source=chatgpt.com" target="_blank">
📄Rusum, G. P., & Anasuri, S. (2025). Vector databases in modern applications: Real‑time search, recommendations, and retrieval‑augmented generation (RAG). International Journal of AI, BigData, Computational and Management Studies, 5(4), Article 113. https://doi.org/10.63282/3050‑9416.IJAIBDCMS‑V5I4P113
</a>
</td>
</tr>
<tr>
<td>Pinecone</td>
<td>Fully managed vector database for production RAG systems.</td>
<td>Proprietary ANN (HNSW-like)</td>
<td>Fully managed<br>High availability<br>Scalable</td>
<td>Closed-source<br>Opaque indexing</td>
<td class="timeline-reference">
<a href="https://simg.baai.ac.cn/paperfile/25a43194-c74c-4cd3-b60f-0a1f27f8b8af.pdf" target="_blank">
📄Gao, Y., Xiong, Y., Gao, X., Jia, K., Pan, J., Bi, Y., Dai, Y., Sun, J., Wang, M., & Wang, H. (2024). Retrieval‑Augmented Generation for Large Language Models: A Survey (arXiv:2312.10997). arXiv Preprint. https://doi.org/10.48550/arXiv.2312.10997
</a>
</td>
</tr>
<tr>
<td>Weaviate</td>
<td>Open-source vector DB with schema & metadata-aware retrieval.</td>
<td>HNSW</td>
<td>Simple API<br>Schema support</td>
<td>High memory usage<br>Limited ANN tuning</td>
<td class="timeline-reference">
<a href="https://ijaibdcms.org/index.php/ijaibdcms/article/view/257?utm_source=chatgpt.com" target="_blank">
📄Rusum, G. P., & Anasuri, S. (2025). Vector databases in modern applications: Real‑time search, recommendations, and retrieval‑augmented generation (RAG). International Journal of AI, BigData, Computational and Management Studies, 5(4), Article 113. https://doi.org/10.63282/3050‑9416.IJAIBDCMS‑V5I4P113
</a>
</td>
</tr>
<tr>
<td>Elasticsearch (Vector)</td>
<td>Hybrid sparse-dense retrieval using BM25 + vectors.</td>
<td>HNSW + BM25</td>
<td>Hybrid retrieval<br>Mature ecosystem</td>
<td>Higher latency<br>Slower pure vector search</td>
<td class="timeline-reference">
<a href="https://simg.baai.ac.cn/paperfile/25a43194-c74c-4cd3-b60f-0a1f27f8b8af.pdf" target="_blank">
📄Gao, Y., Xiong, Y., Gao, X., Jia, K., Pan, J., Bi, Y., Dai, Y., Sun, J., Wang, M., & Wang, H. (2024). Retrieval‑Augmented Generation for Large Language Models: A Survey (arXiv:2312.10997). arXiv Preprint. https://doi.org/10.48550/arXiv.2312.10997
</a>
</td>
</tr>
<tr>
<td>Chroma</td>
<td>Lightweight developer-focused vector store for prototyping.</td>
<td>HNSW (ANN backends)</td>
<td>Easy integration<br>Fast prototyping</td>
<td>Limited scalability<br>Not enterprise-grade</td>
<td>
<a href="https://simg.baai.ac.cn/paperfile/25a43194-c74c-4cd3-b60f-0a1f27f8b8af.pdf" target="_blank">
📄Gao, Y., Xiong, Y., Gao, X., Jia, K., Pan, J., Bi, Y., Dai, Y., Sun, J., Wang, M., & Wang, H. (2024). Retrieval‑Augmented Generation for Large Language Models: A Survey (arXiv:2312.10997). arXiv Preprint. https://doi.org/10.48550/arXiv.2312.10997
</a>
</td>
</tr>
</tbody>
</table>
</div>
</div>
</div>
<script>
const ragData = [
{
type: "Naive RAG",
year: 2020,
category: "Foundational",
indexing: "Preprocessing → Fixed Chunking → Simple Embedding → Vector DB Storage",
inference: "User Query → Embedding → Vector DB Lookup (Top-K) → Concatenation → LLM Generate",
benefits: "Establishes the knowledge retrieval baseline; Simple and cheap to implement.",
challenges: "Context loss due to rigid chunking; High hallucination risk; Poor handling of complex/multi-step queries.",
references: "Lewis, P., et al. (2020). Retrieval-Augmented Generation for Knowledge-Intensive NLP Tasks. NeurIPS 2020.",
paperUrl: "https://arxiv.org/abs/2005.11401"
},
{
type: "Self RAG",
year: 2023,
category: "Agentic & Modular",
indexing: "Preprocessing → Standard Chunking → Embedding → Vector DB Storage",
inference: "User Query → LLM Generates a thought → Retrieval → LLM Generates/Evaluates Retrieved Passages → LLM Decides if Answer is Ready → Final LLM Generate",
benefits: "Reduces hallucinations by self-critique/verification; Filters out poor quality retrieved passages.",
challenges: "Increases inference latency (multiple LLM calls per query); Requires careful tuning of reflection/critique prompt.",
references: "Asai, A., et al. (2023). Self-RAG: Learning to Retrieve, Generate, and Critique through Self-Reflection. arXiv:2310.11511",
paperUrl: "https://arxiv.org/abs/2310.11511"
},
{
type: "Modular RAG",
year: 2024,
category: "Modular",
indexing: "Preprocessing → Standard Chunking → Embedding → Vector DB Storage",
inference: "User Query → Router/Module Selection → Selected Module Executes → LLM Generate",
benefits: "Improves flexibility and component reusability; Enables optimal module selection for specific tasks.",
challenges: "Requires complex routing/planning logic; Overhead of training/managing multiple specialized components.",
references: "Gao, Y., et al. (2024). Modular RAG: Transforming RAG Systems into LEGO-like Reconfigurable Frameworks. arXiv:2407.21059",
paperUrl: "https://arxiv.org/abs/2407.21059"
},
{
type: "Cache RAG",
year: 2024,
category: "Modular",
indexing: "Preprocessing → Standard Chunking → Embedding → Vector DB Storage",
inference: "User Query → Cache Lookup → If Hit: Return Cached Answer → If Miss: Standard Retrieval → LLM Generate → Cache Store",
benefits: "Dramatically improves latency and reduces LLM cost for repeated or highly similar queries.",
challenges: "Complex cache invalidation logic; Requires robust query similarity and hashing functions.",
references: "Jin, C., et al. (2024). RAGCache: Efficient Knowledge Caching for Retrieval-Augmented Generation. ACM TOCS.",
paperUrl: "https://arxiv.org/abs/2404.12457"
},
{
type: "MultiModal RAG",
year: 2024,
category: "Structural",
indexing: "Preprocessing → Multi-Modal Embedding (e.g., CLIP) → Stores representations of all modalities in Vector DB",
inference: "User Query (Text or Image) → Multi-Modal Embedding → Vector DB Lookup (Retrieves related text, image, metadata) → LLM Generate",
benefits: "Unlocks knowledge stored in non-text data (images, charts, tables); Provides a richer context.",
challenges: "Requires specialized multimodal embeddings/models; Indexing is computationally expensive; Difficult to combine disparate modalities coherently.",
references: "Gao, Y., et al. (2024). Retrieval-Augmented Multimodal Language Modeling. CVPR 2024.",
paperUrl: "https://arxiv.org/abs/2211.12561"
},
{
type: "Graph RAG",
year: 2024,
category: "Structural & Modular",
indexing: "Preprocessing → Entity/Relation Extraction → Store in Knowledge Graph (KG) & Vector DB",
inference: "User Query → Embedding/KG Query → Simultaneous Retrieval (Vector + KG Path) → Concatenation → LLM Generate",
benefits: "Resolves complex, multi-hop queries by leveraging factual relationships; Improves interpretability and fact consistency.",
challenges: "High indexing complexity (KG construction); Expensive maintenance for rapidly changing data; Retrieval latency can be high.",
references: "Barry, M., et al. (2025). GraphRAG: Leveraging Graph-Based Efficiency to Minimize Hallucinations in LLM-Driven RAG. GenAIK Workshop.",
paperUrl: "https://aclanthology.org/2025.genaik-1.6/"
},
{
type: "Recursive RAG",
year: 2024,
category: "Structural & Modular",
indexing: "Preprocessing → Chunking & Summarization → Embeddings of both chunks & summaries → Vector DB Storage",
inference: "User Query → Retrieval → LLM evaluates initial result → Recursive Query → Retrieve Specific Chunks → LLM Generate",
benefits: "Summarizes context or decomposes queries recursively; Handles high-level questions that require abstract understanding.",
challenges: "Risk of information loss during aggressive summarization; Chain of thought adds significant latency.",
references: "Liu, Y., et al. (2024). RAG-GPT: Retrieval-Augmented Generation for Open-Domain Question Answering. IJCNN 2024.",
paperUrl: "https://arxiv.org/abs/2405.10627"
},
{
type: "Multi-Hop RAG",
year: 2024,
category: "Structural & Modular",
indexing: "Preprocessing → Chunking/Entity Extraction → Embedding → Structured Storage (Vector DB + Optional KG)",
inference: "User Query → Query Decomposition → Hop 1 Retrieval → Iterative Reasoning → Hop 2 Retrieval → Final Evidence Aggregation → LLM Generate",
benefits: "Solves questions requiring reasoning across multiple independent documents or retrieval steps.",
challenges: "Prone to error propagation (if one hop fails); Significantly higher latency; Requires generation of accurate intermediate queries.",
references: "Tang, B., & Yang, Y. (2024). MultiHop-RAG: Benchmarking Retrieval-Augmented Generation for Multi-Hop Queries. COLM 2024.",
paperUrl: "https://arxiv.org/abs/2401.15391"
},
{
type: "Corrective RAG",
year: 2024,
category: "Agentic & Modular",
indexing: "Preprocessing → Standard Chunking → Embedding → Vector DB Storage",
inference: "User Query → Standard Retrieval → Retrieved Docs Evaluated → Corrective Action → LLM Generate",
benefits: "Detects and corrects poor quality retrieval/generation post-hoc; Increases overall trustworthiness.",
challenges: "High latency due to iterative correction loops; Requires training a dedicated evaluation model.",
references: "Yan, S.-Q., et al. (2024). Corrective Retrieval Augmented Generation. arXiv:2401.15884",
paperUrl: "https://arxiv.org/abs/2401.15884"
},
{
type: "Agentic RAG",
year: 2024,
category: "Agentic & Modular",
indexing: "Preprocessing → Standard Chunking → Embedding → Vector DB Storage",
inference: "User Query → Agent Planning/Tool Selection → Agent Executes RAG Retrieval → Agent Reflects/Synthesizes → Final LLM Generate",
benefits: "Handles complex, goal-oriented tasks via dynamic planning, tool use, and state tracking.",
challenges: "Highest development/orchestration complexity; Slowest inference due to planning/execution loops; Failure in planning leads to catastrophic task failure.",
references: "Singh, A., et al. (2025). Agentic Retrieval-Augmented Generation: A Survey on Agentic RAG.",
paperUrl: "https://arxiv.org/abs/2412.09550"
},
{
type: "Adaptive RAG",
year: 2024,
category: "Agentic & Modular",
indexing: "Preprocessing → Standard Chunking → Embedding → Vector DB Storage → Train Query Complexity Classifier",
inference: "User Query → Query Classification (Router) → Adaptive Decision → Retrieval Execution → LLM Generate",
benefits: "Optimizes pipeline complexity and cost based on query assessment.",
challenges: "Requires training a robust query classifier/router; Misclassification can lead to poor quality results.",
references: "Jeong, S., et al. (2024). Adaptive-RAG: Learning to Adapt Retrieval-Augmented Large Language Models through Question Complexity. NAACL 2024.",
paperUrl: "https://arxiv.org/abs/2403.14403"
},
{
type: "Hierarchical RAG",
year: 2025,
category: "Structural & Modular",
indexing: "Preprocessing → Hierarchical Chunking (Multiple levels) → Multiple Embeddings (for each level) → Vector DB Storage",
inference: "User Query → Embedding → Multi-Level Retrieval → Concatenation → LLM Generate",
benefits: "Solves the 'needle-in-a-haystack' problem for very long documents; Efficiently prunes non-relevant sections.",
challenges: "Complex, multi-level chunking and indexing structure; Requires multiple retrieval passes.",
references: "Huang, H., et al. (2025). Retrieval-Augmented Generation with Hierarchical Knowledge (HiRAG). EMNLP 2025.",
paperUrl: "https://aclanthology.org/2025.emnlp-main.1/"
},
{
type: "Speculative RAG",
year: 2025,
category: "Modular",
indexing: "Preprocessing → Standard Chunking → Embedding → Vector DB Storage",
inference: "User Query → Standard Retrieval → Drafting LLM Generates Tokens → Verifier LLM Checks Drafted Tokens Against Context → LLM Generate",
benefits: "Significantly reduces token generation latency and LLM inference cost.",
challenges: "Does not inherently improve semantic quality or hallucination rate; Requires careful balance between drafting and verifier models.",
references: "Wang, Z., et al. (2025). Speculative RAG: Enhancing Retrieval Augmented Generation through Drafting. ICLR 2025.",
paperUrl: "https://arxiv.org/abs/2407.08223"
}
];
const chunkingData = [
{
type: "Sliding Window Chunking",
year: 1998,
category: "Size-based",
workflow: "Input Document → Define Chunk Size & Overlap → Slide Window (e.g., size 300, overlap 100) → Create Overlapping Chunks → Output Chunks",
description: "Uses a fixed chunk size plus overlap (e.g., chunk size 300, overlap 100). Maintains context continuity across chunks.",
references: "Carbonell, J. & Goldstein, J. (1998). The use of MMR, diversity-based reranking for reordering documents. SIGIR 1998.",
paperUrl: "https://dl.acm.org/doi/10.1145/290941.291025"
},
{
type: "Fixed-size Chunking",
year: 2001,
category: "Size-based",
workflow: "Input Document → Split by Fixed Token/Word Count (200-500 tokens) → Create Chunks → Output Chunks",
description: "Splits text into chunks based on a fixed number of tokens/words (e.g., 200-500 tokens). Simple but may break semantic boundaries.",
references: "Collobert, R., et al. (2011). Natural language processing (almost) from scratch. Journal of Machine Learning Research 12:2493-2537.",
paperUrl: "https://www.jmlr.org/papers/volume12/collobert11a/collobert11a.pdf"
},
{
type: "Discourse-aware Chunking",
year: 2005,
category: "Semantic-based",
workflow: "Input Document → Identify Discourse Markers (however, in contrast, therefore) → Detect Conceptual Shifts → Split at Discourse Boundaries → Output Discourse Chunks",
description: "Uses discourse markers (e.g., 'however', 'in contrast', 'therefore') to detect conceptual shifts and chunk accordingly.",
references: "Sporleder, C. & Lapata, M. (2005). Discourse Chunking and its Application to Sentence Compression. HLT 2005.",
paperUrl: "https://aclanthology.org/H05-1033/"
},
{
type: "Paragraph-based Chunking",
year: 2014,
category: "Structure-based",
workflow: "Input Document → Identify Paragraph Boundaries → Split at Paragraphs → Create Paragraph Chunks → Output Chunks",
description: "Uses natural paragraph boundaries for chunking. Preserves document structure and thematic grouping.",
references: "Dudhabaware, R. S., et al. (2014). Review on natural language processing tasks for text documents. IEEE ICCIC 2014.",
paperUrl: "https://ieeexplore.ieee.org/document/7238334"
},
{
type: "Recursive/Hierarchical Chunking",
year: 2023,
category: "Hierarchical",
workflow: "Input Document → Break into Sections → Split Sections into Paragraphs → Split Paragraphs into Sentences → Split Sentences into Tokens → Output Multi-level Chunks",
description: "Breaks document into progressively smaller units (section → paragraph → sentence → tokens). Enables multi-level retrieval.",
references: "Latif, S., et al. (2025). The Chunking Paradigm: Recursive Semantic for RAG. ICNLSP 2025.",
paperUrl: "https://aclanthology.org/2025.icnlsp-1.16/"
},
{
type: "Semantic Chunking",
year: 2024,
category: "Semantic-based",
workflow: "Input Document → Analyze Topic Shifts/Semantic Boundaries → Split at Semantic Breaks → Create Semantically Coherent Chunks → Output Chunks",
description: "Chunks based on topic shifts or semantic boundaries rather than size. Preserves meaning and context.",
references: "LangChain (2024). Semantic chunker. Accessed 2024-09-14.",
paperUrl: "https://python.langchain.com/docs/how_to/semantic-chunker/"
},
{
type: "Sentence-based Chunking",
year: 2024,
category: "Structure-based",
workflow: "Input Document → Split by Sentences → Group Sentences (Optional) → Create Sentence Chunks → Output Chunks",
description: "Splits text by sentences. Maintains grammatical integrity and natural language boundaries.",
references: "Dong, K., et al. (2024). Multi-view content-aware indexing for long document retrieval. arXiv:2404.15103",
paperUrl: "https://arxiv.org/abs/2404.15103"
},
{
type: "Hybrid Chunking",
year: 2024,
category: "Semantic-based",
workflow: "Input Document → Apply Semantic Analysis → Apply Fixed-size Constraint → Apply Overlap Strategy → Create Hybrid Chunks → Output Chunks",
description: "Combines semantic + fixed-size + overlap strategies. Balances context preservation with practical constraints.",
references: "Kamradt, G. (2024). 5 levels of text splitting.",
paperUrl: ""
},
{
type: "Embedding-similarity-based Chunking",
year: 2024,
category: "Semantic-based",
workflow: "Input Document → Generate Sentence Embeddings → Calculate Similarity Scores → Detect Similarity Drops Below Threshold → Split at Low Similarity Points → Output Chunks",
description: "Sequentially processes text and opens a new chunk when embedding similarity drops below a threshold. Data-driven approach.",
references: "Kamradt, G. (2024). 5 levels of text splitting.",
paperUrl: ""
},
{
type: "Page-based Chunking",
year: 2024,
category: "Structure-based",
workflow: "Input PDF/Scanned Document → Identify Page Boundaries → Split by Page → Create Page Chunks → Output Chunks",
description: "Splits by PDF page or scanned document page. Useful for document-level retrieval and citation.",
references: "",
paperUrl: ""
},
{
type: "Domain-specific Chunking",
year: 2024,
category: "Domain-specific",
workflow: "Input Domain Document → Apply Domain Rules (legal sections, medical records, code functions) → Split by Domain Logic → Create Domain Chunks → Output Chunks",
description: "Tailored to content type (e.g., legal documents by sections, medical records by encounters, code by functions).",
references: "Allamraju, A., et al. (2024). Breaking It Down: Domain-Aware Semantic Segmentation for Retrieval Augmented Generation. arXiv:2512.00367",
paperUrl: "https://arxiv.org/abs/2512.00367"
},
{
type: "Metadata-based Chunking",
year: 2025,
category: "Structure-based",
workflow: "Input Document → Extract Metadata (headers, bullets, sections) → Split Using Metadata Structure → Create Metadata-aware Chunks → Output Chunks",
description: "Splits text using document metadata (e.g., headers, bullets). Preserves document structure and hierarchy.",
references: "Zhao, J., et al. (2025). MoC: Mixtures of Text Chunking Learners for Retrieval-Augmented Generation. ACL 2025.",
paperUrl: "https://aclanthology.org/2025.acl-long.1/"
}
];
const chunkingCategoryBriefs = {
'Size-based': 'Chunks text by fixed sizes or overlapping windows. Simple, predictable, but may break context.',
'Semantic-based': 'Identifies meaning, topics, or discourse patterns to create contextually coherent chunks.',
'Structure-based': 'Uses natural document structure (sentences, paragraphs, metadata) for boundaries.',
'Hierarchical': 'Creates multi-level chunks (sections → paragraphs → sentences) for flexible retrieval.',
'Domain-specific': 'Applies domain knowledge and rules tailored to specific content types (legal, medical, code).'
};
const mainCategoryDefinitions = {
'Foundational': 'The baseline RAG approach establishing core retrieval-augmented generation principles with simple, fixed pipelines.',
'Modular': 'Focuses on pipeline flexibility through specialized, interchangeable components and intermediate operations between query and generation.',
'Agentic': 'Uses dynamic decision-makers (agents or classifiers) to guide the pipeline, perform self-correction, and enable adaptive behavior.',
'Structural': 'Innovations in how source documents are preprocessed, stored, and indexed for more effective retrieval (e.g., knowledge graphs, hierarchies, multi-modal embeddings).'
};
function switchTab(tabName) {
const tabs = document.querySelectorAll('.tab');
const contents = document.querySelectorAll('.tab-content');
tabs.forEach(tab => tab.classList.remove('active'));
contents.forEach(content => content.classList.remove('active'));
if (tabName === 'rag') {
tabs[0].classList.add('active');
document.getElementById('rag-content').classList.add('active');
} else if (tabName === 'chunking') {
tabs[1].classList.add('active');
document.getElementById('chunking-content').classList.add('active');
initChunkingVisualizations();
} else if (tabName === 'indexing-db') {
tabs[2].classList.add('active');
document.getElementById('indexing-db').classList.add('active');
}
}
function getCategoryBadgeClass(category) {
const map = {
'Foundational': 'badge-foundational',
'Agentic & Modular': 'badge-agentic',
'Modular': 'badge-modular',
'Structural & Modular': 'badge-structural-modular',
'Structural': 'badge-structural'
};
return map[category] || 'badge-modular';
}
function getCategoryDefClass(category) {
const map = {
'Foundational': 'def-foundational',
'Agentic': 'def-agentic',
'Modular': 'def-modular',
'Structural': 'def-structural'
};
return map[category] || 'def-modular';
}
// Create Category Definitions - Main 4 categories
const categoryDefContainer = document.getElementById('categoryDefinitions');
Object.keys(mainCategoryDefinitions).forEach(category => {
const div = document.createElement('div');
div.className = `category-definition ${getCategoryDefClass(category)}`;
div.innerHTML = `
<strong>${category}</strong>
<p>${mainCategoryDefinitions[category]}</p>
`;
categoryDefContainer.appendChild(div);
});
// Create Year Timeline
const yearTimeline = document.getElementById('yearTimeline');
ragData.forEach(rag => {
const item = document.createElement('div');
item.className = 'timeline-item';
item.innerHTML = `
<div class="timeline-year">${rag.year}</div>
<div class="timeline-content">
<div class="timeline-type">${rag.type}</div>
<span class="category-badge ${getCategoryBadgeClass(rag.category)}">${rag.category}</span>
<div class="timeline-reference">📄 <a href="${rag.paperUrl}" target="_blank">${rag.references}</a></div>
</div>
`;
yearTimeline.appendChild(item);
});
// Populate RAG selector
const selector = document.getElementById('ragTypeSelector');
ragData.forEach(rag => {
const option = document.createElement('option');
option.value = rag.type;
option.textContent = `${rag.type} (${rag.year})`;
selector.appendChild(option);
});
function createFlowchart(workflow, containerId) {
const container = document.getElementById(containerId);
container.innerHTML = '';
const steps = workflow.split('→').map(s => s.trim());
steps.forEach((step, index) => {
const div = document.createElement('div');
div.className = 'flow-step';
if (index === 0 || index === steps.length - 1) {
div.classList.add('highlight');
}
div.textContent = step;
container.appendChild(div);
});
}
selector.addEventListener('change', (e) => {
const selected = ragData.find(r => r.type === e.target.value);
const details = document.getElementById('flowchartDetails');
if (selected) {
createFlowchart(selected.indexing, 'indexingFlowchart');
createFlowchart(selected.inference, 'inferenceFlowchart');
document.getElementById('benefits').textContent = selected.benefits;
document.getElementById('challenges').textContent = selected.challenges;
details.style.display = 'block';
} else {
details.style.display = 'none';
}
});
// Category Chart
const categoryCount = {};
ragData.forEach(rag => {
categoryCount[rag.category] = (categoryCount[rag.category] || 0) + 1;
});
new Chart(document.getElementById('categoryChart'), {
type: 'doughnut',
data: {
labels: Object.keys(categoryCount),
datasets: [{
data: Object.values(categoryCount),
backgroundColor: [
'#667eea',
'#764ba2',
'#f093fb',
'#4facfe',
'#00d2ff'
]
}]
},
options: {
responsive: true,
maintainAspectRatio: false,
plugins: {
legend: {
position: 'bottom'
}
}
}
});
// Timeline Chart
const yearCount = {};
ragData.forEach(rag => {
yearCount[rag.year] = (yearCount[rag.year] || 0) + 1;
});
new Chart(document.getElementById('timelineChart'), {
type: 'bar',
data: {
labels: Object.keys(yearCount).sort(),
datasets: [{
label: 'Number of RAG Types',
data: Object.keys(yearCount).sort().map(year => yearCount[year]),
backgroundColor: '#667eea'
}]
},
options: {
responsive: true,
maintainAspectRatio: false,
scales: {
y: {
beginAtZero: true,
ticks: {
stepSize: 1
}
}
},
plugins: {
legend: {
display: false
}
}
}
});
// Chunking Visualizations
let chunkingChartsInitialized = false;
function initChunkingVisualizations() {
if (chunkingChartsInitialized) return;
chunkingChartsInitialized = true;
// Populate chunking selector
const chunkingSelector = document.getElementById('chunkingSelector');
chunkingData.forEach(chunk => {
const option = document.createElement('option');
option.value = chunk.type;
option.textContent = `${chunk.type} (${chunk.year})`;
chunkingSelector.appendChild(option);
});
chunkingSelector.addEventListener('change', (e) => {
const selected = chunkingData.find(c => c.type === e.target.value);
const details = document.getElementById('chunkingFlowchartDetails');
if (selected) {
createFlowchart(selected.workflow, 'chunkingFlowchart');
document.getElementById('chunkingDescription').textContent = selected.description;
const refElement = document.getElementById('chunkingReference');
if (selected.references && selected.paperUrl) {
refElement.innerHTML = `📄 <a href="${selected.paperUrl}" target="_blank">${selected.references}</a>`;
} else if (selected.references) {
refElement.textContent = `📄 ${selected.references}`;
} else {
refElement.textContent = '';
}
details.style.display = 'block';
} else {
details.style.display = 'none';
}
});
// Chunking methods chart
const chunkingTypes = {};
chunkingData.forEach(chunk => {
chunkingTypes[chunk.category] = (chunkingTypes[chunk.category] || 0) + 1;
});
new Chart(document.getElementById('chunkingChart'), {
type: 'pie',
data: {
labels: Object.keys(chunkingTypes),
datasets: [{
data: Object.values(chunkingTypes),
backgroundColor: ['#667eea', '#764ba2', '#f093fb', '#4facfe', '#00d2ff']
}]
},
options: {
responsive: true,
maintainAspectRatio: false,
plugins: {
legend: {
position: 'bottom'
}
}
}
});
// Add category definitions
const chunkingCategoryDefsContainer = document.getElementById('chunkingCategoryDefs');
Object.keys(chunkingCategoryBriefs).forEach(category => {
const div = document.createElement('div');
div.className = 'category-definition def-modular';
div.innerHTML = `
<strong>${category}</strong>
<p>${chunkingCategoryBriefs[category]}</p>
`;
chunkingCategoryDefsContainer.appendChild(div);
});
// Create hierarchical category view
const categoryHierarchy = document.getElementById('categoryHierarchy');
Object.keys(chunkingCategoryBriefs).forEach(category => {
const methods = chunkingData.filter(c => c.category === category);
const node = document.createElement('div');
node.className = 'category-node';
const methodTags = methods.map(m =>
`<span class="method-tag">${m.type}</span>`
).join('');
node.innerHTML = `
<h4>${category}</h4>
<div class="brief">${chunkingCategoryBriefs[category]}</div>
<div class="category-methods">${methodTags}</div>
`;
categoryHierarchy.appendChild(node);
});
// Chunking year timeline
const chunkingYearTimeline = document.getElementById('chunkingYearTimeline');
chunkingData.forEach(chunk => {
const item = document.createElement('div');
item.className = 'timeline-item';
let referenceHTML = '';
if (chunk.references && chunk.paperUrl) {
referenceHTML = `<div class="timeline-reference">📄 <a href="${chunk.paperUrl}" target="_blank">${chunk.references}</a></div>`;
} else if (chunk.references) {
referenceHTML = `<div class="timeline-reference">📄 ${chunk.references}</div>`;
}
item.innerHTML = `
<div class="timeline-year">${chunk.year}</div>
<div class="timeline-content">
<div class="timeline-type">${chunk.type}</div>
${referenceHTML}
</div>
`;
chunkingYearTimeline.appendChild(item);
});
}
</script>
</body>
</html>