Spaces:

Soha85
/

RAG_Study

Running

File size: 60,413 Bytes

<!DOCTYPE html>
<html lang="en">
<head>
    <meta charset="UTF-8">
    <meta name="viewport" content="width=device-width, initial-scale=1.0">
    <title>RAG Visualization</title>
    <script src="https://cdnjs.cloudflare.com/ajax/libs/Chart.js/3.9.1/chart.min.js"></script>
    <style>
        * {
            margin: 0;
            padding: 0;
            box-sizing: border-box;
        }
        
        body {
            font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
            background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
            padding: 20px;
            min-height: 100vh;
        }
        
        .container {
            max-width: 1400px;
            margin: 0 auto;
        }
        
        h1 {
            text-align: center;
            color: white;
            margin-bottom: 30px;
            font-size: 2.5em;
            text-shadow: 2px 2px 4px rgba(0,0,0,0.3);
        }
        
        .tabs {
            display: flex;
            gap: 10px;
            margin-bottom: 25px;
        }
        
        .tab {
            padding: 15px 30px;
            background: rgba(255, 255, 255, 0.3);
            color: white;
            border: none;
            border-radius: 10px 10px 0 0;
            cursor: pointer;
            font-size: 1.1em;
            font-weight: bold;
            transition: all 0.3s ease;
        }
        
        .tab:hover {
            background: rgba(255, 255, 255, 0.4);
        }
        
        .tab.active {
            background: white;
            color: #667eea;
        }
        
        .tab-content {
            display: none;
        }
        
        .tab-content.active {
            display: block;
        }
        
        .grid {
            display: grid;
            grid-template-columns: repeat(auto-fit, minmax(450px, 1fr));
            gap: 25px;
            margin-bottom: 25px;
        }
        
        .card {
            background: white;
            border-radius: 15px;
            padding: 25px;
            box-shadow: 0 10px 30px rgba(0,0,0,0.2);
            transition: transform 0.3s ease;
        }
        
        .card:hover {
            transform: translateY(-5px);
        }
        
        .card h2 {
            color: #667eea;
            margin-bottom: 20px;
            font-size: 1.5em;
            border-bottom: 3px solid #667eea;
            padding-bottom: 10px;
        }
        
        .chart-container {
            position: relative;
            height: 400px;
        }
        
        .timeline-card {
            grid-column: 1 / -1;
        }
        
        .workflow-selector {
            margin-bottom: 20px;
        }
        
        .workflow-selector select {
            width: 100%;
            padding: 12px;
            border: 2px solid #667eea;
            border-radius: 8px;
            font-size: 16px;
            background: white;
            cursor: pointer;
            transition: all 0.3s ease;
        }
        
        .workflow-selector select:hover {
            border-color: #764ba2;
        }
        
        .flowchart-container {
            display: flex;
            gap: 30px;
            margin-top: 20px;
            flex-wrap: wrap;
        }
        
        .flowchart {
            flex: 1;
            min-width: 400px;
            background: #f8f9fa;
            padding: 20px;
            border-radius: 10px;
        }
        
        .flowchart h3 {
            color: #667eea;
            margin-bottom: 15px;
            font-size: 1.2em;
            text-align: center;
        }
        
        .flow-step {
            background: white;
            padding: 12px 15px;
            margin: 10px 0;
            border-radius: 8px;
            box-shadow: 0 2px 5px rgba(0,0,0,0.1);
            position: relative;
            border-left: 4px solid #667eea;
        }
        
        .flow-step::after {
            content: '↓';
            position: absolute;
            bottom: -20px;
            left: 50%;
            transform: translateX(-50%);
            font-size: 20px;
            color: #667eea;
            font-weight: bold;
        }
        
        .flow-step:last-child::after {
            content: '';
        }
        
        .flow-step.highlight {
            background: linear-gradient(135deg, #667eea15, #764ba215);
            border-left-color: #764ba2;
        }
        
        .benefits-challenges {
            display: grid;
            grid-template-columns: 1fr 1fr;
            gap: 15px;
            margin-top: 20px;
        }
        
        .info-box {
            background: #f8f9fa;
            padding: 15px;
            border-radius: 8px;
        }
        
        .info-box h4 {
            color: #667eea;
            margin-bottom: 10px;
            display: flex;
            align-items: center;
            gap: 8px;
        }
        
        .info-box p {
            color: #555;
            line-height: 1.6;
            font-size: 14px;
        }
        
        .timeline-item {
            display: flex;
            align-items: center;
            padding: 15px;
            margin: 10px 0;
            background: white;
            border-radius: 10px;
            box-shadow: 0 2px 8px rgba(0,0,0,0.1);
            transition: all 0.3s ease;
        }
        
        .timeline-item:hover {
            transform: translateX(10px);
            box-shadow: 0 4px 12px rgba(0,0,0,0.15);
        }
        
        .timeline-year {
            font-size: 1.8em;
            font-weight: bold;
            color: #667eea;
            min-width: 80px;
            padding-right: 20px;
            border-right: 3px solid #667eea;
        }
        
        .timeline-content {
            padding-left: 20px;
            flex: 1;
        }
        
        .timeline-type {
            font-weight: bold;
            font-size: 1.1em;
            color: #333;
            margin-bottom: 5px;
        }
        
        .timeline-category {
            color: #666;
            font-size: 0.9em;
        }
        
        .timeline-reference {
            color: #888;
            font-size: 0.85em;
            font-style: italic;
            margin-top: 5px;
            line-height: 1.4;
        }
        
        .timeline-reference a {
            color: #667eea;
            text-decoration: none;
            transition: color 0.3s ease;
        }
        
        .timeline-reference a:hover {
            color: #764ba2;
            text-decoration: underline;
        }
        
        .category-badge {
            display: inline-block;
            padding: 4px 12px;
            border-radius: 15px;
            font-size: 0.85em;
            margin-top: 5px;
        }
        
        .badge-foundational { background: #667eea; color: white; }
        .badge-agentic { background: #764ba2; color: white; }
        .badge-modular { background: #f093fb; color: white; }
        .badge-structural-modular { background: #4facfe; color: white; }
        .badge-structural { background: #00d2ff; color: white; }
        
        .category-definition {
            background: #f8f9fa;
            padding: 12px;
            margin: 8px 0;
            border-radius: 8px;
            border-left: 4px solid;
            font-size: 0.9em;
        }
        
        .category-definition strong {
            display: block;
            margin-bottom: 5px;
            font-size: 1em;
        }
        
        .category-definition p {
            color: #555;
            line-height: 1.5;
            margin: 0;
        }
        
        .def-foundational { border-left-color: #667eea; }
        .def-agentic { border-left-color: #764ba2; }
        .def-modular { border-left-color: #f093fb; }
        .def-structural { border-left-color: #00d2ff; }
        
        .chunking-brief {
            background: #f8f9fa;
            padding: 15px;
            border-radius: 8px;
            margin-top: 15px;
            border-left: 4px solid #667eea;
        }
        
        .chunking-brief p {
            color: #555;
            line-height: 1.6;
            margin-bottom: 10px;
        }
        
        .chunking-brief .reference {
            color: #888;
            font-size: 0.9em;
            font-style: italic;
        }
        
        .chunking-brief .reference a {
            color: #667eea;
            text-decoration: none;
        }
        
        .chunking-brief .reference a:hover {
            text-decoration: underline;
        }
        
        .category-tree {
            background: #f8f9fa;
            padding: 20px;
            border-radius: 10px;
            margin-top: 20px;
        }
        
        .category-node {
            margin: 15px 0;
            padding: 15px;
            background: white;
            border-radius: 8px;
            border-left: 4px solid #667eea;
            box-shadow: 0 2px 5px rgba(0,0,0,0.1);
        }
        
        .category-node h4 {
            color: #667eea;
            margin-bottom: 10px;
            font-size: 1.1em;
        }
        
        .category-node .brief {
            color: #666;
            font-size: 0.9em;
            font-style: italic;
            margin-bottom: 10px;
            line-height: 1.5;
        }
        
        .category-methods {
            display: flex;
            flex-wrap: wrap;
            gap: 8px;
            margin-top: 10px;
        }
        
        .method-tag {
            background: linear-gradient(135deg, #667eea, #764ba2);
            color: white;
            padding: 6px 12px;
            border-radius: 15px;
            font-size: 0.85em;
            box-shadow: 0 2px 4px rgba(0,0,0,0.1);
        }
        .rag-table {
            width: 100%;
            border-collapse: collapse;
            margin-bottom: 2rem;
            font-size: 0.95rem;
        }

        .rag-table th,
        .rag-table td {
            border: 1px solid #ddd;
            padding: 5px;
            vertical-align: top;
            color: #666;
            font-size: 0.8em;
            font-style: italic;
            margin-bottom: 8px;
            line-height: 1;
        }

        .rag-table th {
            background-color: #5b6ee1;
            color: #fff;
            text-align: left;
        }

        .rag-table tr:nth-child(even) {
            background-color: #f5f6fa;
        }

        .rag-table a {
            color: #1a4cff;
            text-decoration: none;
            font-weight: 500;
        }

        .rag-table a:hover {
            text-decoration: underline;
        }
    </style>
</head>
<body>
    <div class="container">
        <h1>🔍 RAG Visualization Dashboard</h1>
        
        <div class="tabs">
            <button class="tab active" onclick="switchTab('rag')">RAG</button>
            <button class="tab" onclick="switchTab('chunking')">Chunking</button>
            <button class="tab" onclick="switchTab('indexing-db')">Indexing & Vector DB</button>
        </div>
        
        <!-- RAG TYPES TAB -->
        <div id="rag-content" class="tab-content active">
            <div class="grid">
                <div class="card">
                    <h2>RAG Categories Distribution</h2>
                    <div class="chart-container">
                        <canvas id="categoryChart"></canvas>
                    </div>
                    <div id="categoryDefinitions"></div>
                </div>
                
                <div class="card">
                    <h2>RAG Types by Year</h2>
                    <div class="chart-container">
                        <canvas id="timelineChart"></canvas>
                    </div>
                </div>
            </div>
            
            <div class="card timeline-card">
                <h2>📅 Evolution Timeline: Year → RAG Type</h2>
                <div id="yearTimeline"></div>
            </div>
            
            <div class="card timeline-card">
                <h2>RAG Workflow Flowcharts</h2>
                <div class="workflow-selector">
                    <select id="ragTypeSelector">
                        <option value="">Select a RAG Type to view workflows...</option>
                    </select>
                </div>
                <div id="flowchartDetails" style="display: none;">
                    <div class="flowchart-container">
                        <div class="flowchart">
                            <h3>📥 Indexing Workflow</h3>
                            <div id="indexingFlowchart"></div>
                        </div>
                        <div class="flowchart">
                            <h3>🔄 Inference Workflow</h3>
                            <div id="inferenceFlowchart"></div>
                        </div>
                    </div>
                    <div class="benefits-challenges">
                        <div class="info-box">
                            <h4>✅ Key Benefits</h4>
                            <p id="benefits"></p>
                        </div>
                        <div class="info-box">
                            <h4>⚠️ Challenges</h4>
                            <p id="challenges"></p>
                        </div>
                    </div>
                </div>
            </div>
        </div>
        
        <!-- CHUNKING TAB -->
        <div id="chunking-content" class="tab-content">
            <div class="grid">
                <div class="card">
                    <h2>Chunking Methods Distribution</h2>
                    <div class="chart-container">
                        <canvas id="chunkingChart"></canvas>
                    </div>
                    <div id="chunkingCategoryDefs"></div>
                </div>
                
                <div class="card">
                    <h2>Chunking Categories & Methods</h2>
                    <div id="categoryHierarchy" class="category-tree"></div>
                </div>
            </div>
            
            <div class="card timeline-card">
                <h2>📅 Chunking Methods Timeline</h2>
                <div id="chunkingYearTimeline"></div>
            </div>
            
            <div class="card timeline-card">
                <h2>Chunking Method Flowcharts</h2>
                <div class="workflow-selector">
                    <select id="chunkingSelector">
                        <option value="">Select a Chunking Method...</option>
                    </select>
                </div>
                <div id="chunkingFlowchartDetails" style="display: none;">
                    <div class="flowchart">
                        <h3>🔄 Chunking Process Flow</h3>
                        <div id="chunkingFlowchart"></div>
                    </div>
                    <div class="chunking-brief">
                        <p id="chunkingDescription"></p>
                        <div class="reference" id="chunkingReference"></div>
                    </div>
                </div>
            </div>
        </div>

        <!-- INDEXING TAB -->
        <div id="indexing-db" class="tab-content">
        <div class="grid">
                <div class="card">
                    <h2>🧮 Indexing Techniques (Algorithmic Level)</h2>
                    <p class="brief">Algorithmic methods for organizing and searching vectors efficiently. These are the core algorithms that determine how vectors are structured and retrieved (e.g., Flat Index, ANN, IVF, HNSW, PQ/OPQ).</p>
                </div>
                <div class="card">
                    <h2 class="brief">💾 Vector Databases (System Level)</h2>
                    <p>Complete systems/engines that implement indexing techniques along with additional features like persistence, metadata support, distributed architecture, and APIs. They are the production-grade platforms (e.g., FAISS, Milvus, Pinecone, Weaviate).</p>
                </div>
        </div>
        <!-- ---------- INDEXING TECHNIQUES TABLE ---------- -->
        <div class="card">
            <table class="rag-table">
                <thead>
                <tr>
                    <th style="width:10%">Indexing Technique</th>
                    <th style="width:15%">Description</th>
                    <th style="width:15%">Workflow</th>
                    <th style="width:15%">Pros</th>
                    <th style="width:15%">Cons</th>
                    <th style="width:30%">Peer-Reviewed Reference</th>
                </tr>
                </thead>
                <tbody>
                <tr>
                    <td>Flat Index (Brute-Force)</td>
                    <td>Exact similarity search over all vectors using distance metrics.</td>
                    <td>
                      Embed documents → store vectors → compare query with all vectors → return top-k
                    </td>
                    <td>Exact results<br>Simple implementation</td>
                    <td>Not scalable<br>High latency & memory cost</td>
                    <td class="timeline-reference">
                    <a href="https://proceedings.neurips.cc/paper_files/paper/2020/file/6b493230205f780e1bc26945df7481e5-Paper.pdf" target="_blank">
                        📄Lewis, P., Perez, E., Piktus, A., Petroni, F., Karpukhin, V., Goyal, N., Küttler, H., Lewis, M., Yih, W., Rocktäschel, T., Riedel, S., & Kiela, D. (2020). Retrieval‑augmented generation for knowledge‑intensive NLP tasks. In H. Larochelle, M. Ranzato, R. Hadsell, M. F. Balcan, & H. T. Lin (Eds.), Advances in Neural Information Processing Systems, 33, 9459–9474. Curran Associates, Inc.
                    </a>
                    </td>
                </tr>

                <tr>
                    <td>ANN (Approximate Nearest Neighbor)</td>
                    <td>Fast similarity search with controlled approximation.</td>
                    <td>
                    Embed documents → build ANN structure → retrieve approximate neighbors
                    </td>
                    <td>Fast retrieval<br>Scales to millions/billions</td>
                    <td>Approximate (not exact)</td>
                    <td class="timeline-reference">
                    <a href="https://simg.baai.ac.cn/paperfile/25a43194-c74c-4cd3-b60f-0a1f27f8b8af.pdf" target="_blank">
                        📄Gao, Y., Xiong, Y., Gao, X., Jia, K., Pan, J., Bi, Y., Dai, Y., Sun, J., Wang, M., & Wang, H. (2024). Retrieval‑Augmented Generation for Large Language Models: A Survey (arXiv:2312.10997). arXiv Preprint. https://doi.org/10.48550/arXiv.2312.10997
                    </a>
                    </td>
                </tr>

                <tr>
                    <td>IVF (Inverted File Index)</td>
                    <td>Partitions vector space into clusters; searches only relevant partitions.</td>
                    <td>
                    K-means clustering → assign vectors → query nearest clusters → search inside
                    </td>
                    <td>Good speed-accuracy trade-off<br>Scalable</td>
                    <td>Requires tuning<br>Cluster-quality sensitive</td>
                    <td class="timeline-reference">
                    <a href="https://arxiv.org/pdf/1702.08734" target="_blank">
                        📄Johnson, J., Douze, M., & Jégou, H. (2019). Billion‑scale similarity search with GPUs. IEEE Transactions on Big Data, 7(3), 535–547. https://doi.org/10.1109/TBDATA.2019.2921572
                    </a>
                    </td>
                </tr>

                <tr>
                    <td>HNSW</td>
                    <td>Graph-based ANN using hierarchical proximity graphs.</td>
                    <td>
                    Build layered graph → incremental insertion → top-down graph traversal
                    </td>
                    <td>Very fast<br>High recall<br>Dynamic updates</td>
                    <td>High memory usage<br>Complex construction</td>
                    <td class="timeline-reference">
                    <a href="https://arxiv.org/abs/1603.09320" target="_blank">
                        📄Malkov, Y. A., & Yashunin, D. A. (2020). Efficient and robust approximate nearest neighbor search using hierarchical navigable small world graphs. IEEE Transactions on Pattern Analysis and Machine Intelligence, 42(4), 824–836. https://doi.org/10.1109/TPAMI.2018.2889473
                    </a>
                    </td>
                </tr>

                <tr>
                    <td>PQ / OPQ</td>
                    <td>Compresses vectors into short codes for memory-efficient ANN.</td>
                    <td>
                    Subspace split → quantization → store codes → approximate distance
                    </td>
                    <td>Memory efficient<br>Enables billion-scale RAG</td>
                    <td>Lossy compression<br>Lower recall if misconfigured</td>
                    <td class="timeline-reference">
                    <a href="https://www.irisa.fr/texmex/people/jegou/papers/jegou_searching_with_quantization.pdf" target="_blank">
                        📄Jégou, H., Douze, M., & Schmid, C. (2011). Product quantization for nearest neighbor search. IEEE Transactions on Pattern Analysis and Machine Intelligence, 33(1), 117–128. https://doi.org/10.1109/TPAMI.2010.57
                    </a>
                    </td>
                </tr>
                </tbody>
            </table>
         </div>
        <!-- ---------- VECTOR DATABASES TABLE ---------- -->
        <div class="card">
            <table class="rag-table">
                <thead>
                <tr>
                    <th style="width:10%">Vector DB</th>
                    <th style="width:20%">Description</th>
                    <th style="width:10%">Underlying Index</th>
                    <th style="width:15%">Pros</th>
                    <th style="width:15%">Cons</th>
                    <th style="width:30%">Peer-Reviewed Reference</th>
                </tr>
                </thead>
                <tbody>
                <tr>
                    <td>FAISS</td>
                    <td>Research-oriented vector similarity library widely used in RAG.</td>
                    <td>Flat, IVF, HNSW, PQ, OPQ</td>
                    <td>Highly optimized<br>Flexible ANN<br>Academic standard</td>
                    <td>Not a full DBMS<br>Limited metadata</td>
                    <td class="timeline-reference">
                    <a href="https://simg.baai.ac.cn/paperfile/25a43194-c74c-4cd3-b60f-0a1f27f8b8af.pdf" target="_blank">
                        📄Gao, Y., Xiong, Y., Gao, X., Jia, K., Pan, J., Bi, Y., Dai, Y., Sun, J., Wang, M., & Wang, H. (2024). Retrieval‑Augmented Generation for Large Language Models: A Survey (arXiv:2312.10997). arXiv Preprint. https://doi.org/10.48550/arXiv.2312.10997
                    </a>
                    </td>
                </tr>

                <tr>
                    <td>Milvus</td>
                    <td>Distributed open-source vector database for large-scale RAG.</td>
                    <td>HNSW, IVF-PQ</td>
                    <td>Scalable<br>Distributed<br>Open-source</td>
                    <td>Deployment complexity<br>Operational overhead</td>
                    <td class="timeline-reference">
                    <a href="https://ijaibdcms.org/index.php/ijaibdcms/article/view/257?utm_source=chatgpt.com" target="_blank">
                        📄Rusum, G. P., & Anasuri, S. (2025). Vector databases in modern applications: Real‑time search, recommendations, and retrieval‑augmented generation (RAG). International Journal of AI, BigData, Computational and Management Studies, 5(4), Article 113. https://doi.org/10.63282/3050‑9416.IJAIBDCMS‑V5I4P113
                    </a>
                    </td>
                </tr>

                <tr>
                    <td>Pinecone</td>
                    <td>Fully managed vector database for production RAG systems.</td>
                    <td>Proprietary ANN (HNSW-like)</td>
                    <td>Fully managed<br>High availability<br>Scalable</td>
                    <td>Closed-source<br>Opaque indexing</td>
                    <td class="timeline-reference">
                    <a href="https://simg.baai.ac.cn/paperfile/25a43194-c74c-4cd3-b60f-0a1f27f8b8af.pdf" target="_blank">
                       📄Gao, Y., Xiong, Y., Gao, X., Jia, K., Pan, J., Bi, Y., Dai, Y., Sun, J., Wang, M., & Wang, H. (2024). Retrieval‑Augmented Generation for Large Language Models: A Survey (arXiv:2312.10997). arXiv Preprint. https://doi.org/10.48550/arXiv.2312.10997
                    </a>
                    </td>
                </tr>

                <tr>
                    <td>Weaviate</td>
                    <td>Open-source vector DB with schema & metadata-aware retrieval.</td>
                    <td>HNSW</td>
                    <td>Simple API<br>Schema support</td>
                    <td>High memory usage<br>Limited ANN tuning</td>
                    <td class="timeline-reference">
                    <a href="https://ijaibdcms.org/index.php/ijaibdcms/article/view/257?utm_source=chatgpt.com" target="_blank">
                      📄Rusum, G. P., & Anasuri, S. (2025). Vector databases in modern applications: Real‑time search, recommendations, and retrieval‑augmented generation (RAG). International Journal of AI, BigData, Computational and Management Studies, 5(4), Article 113. https://doi.org/10.63282/3050‑9416.IJAIBDCMS‑V5I4P113
                    </a>
                    </td>
                </tr>

                <tr>
                    <td>Elasticsearch (Vector)</td>
                    <td>Hybrid sparse-dense retrieval using BM25 + vectors.</td>
                    <td>HNSW + BM25</td>
                    <td>Hybrid retrieval<br>Mature ecosystem</td>
                    <td>Higher latency<br>Slower pure vector search</td>
                    <td class="timeline-reference">
                    <a href="https://simg.baai.ac.cn/paperfile/25a43194-c74c-4cd3-b60f-0a1f27f8b8af.pdf" target="_blank">
                      📄Gao, Y., Xiong, Y., Gao, X., Jia, K., Pan, J., Bi, Y., Dai, Y., Sun, J., Wang, M., & Wang, H. (2024). Retrieval‑Augmented Generation for Large Language Models: A Survey (arXiv:2312.10997). arXiv Preprint. https://doi.org/10.48550/arXiv.2312.10997
                    </a>
                    </td>
                </tr>

                <tr>
                    <td>Chroma</td>
                    <td>Lightweight developer-focused vector store for prototyping.</td>
                    <td>HNSW (ANN backends)</td>
                    <td>Easy integration<br>Fast prototyping</td>
                    <td>Limited scalability<br>Not enterprise-grade</td>
                    <td>
                    <a href="https://simg.baai.ac.cn/paperfile/25a43194-c74c-4cd3-b60f-0a1f27f8b8af.pdf" target="_blank">
                       📄Gao, Y., Xiong, Y., Gao, X., Jia, K., Pan, J., Bi, Y., Dai, Y., Sun, J., Wang, M., & Wang, H. (2024). Retrieval‑Augmented Generation for Large Language Models: A Survey (arXiv:2312.10997). arXiv Preprint. https://doi.org/10.48550/arXiv.2312.10997
                    </a>
                    </td>
                </tr>
                </tbody>
            </table>
        </div>
        </div>
    </div>

    <script>
        const ragData = [
            {
                type: "Naive RAG",
                year: 2020,
                category: "Foundational",
                indexing: "Preprocessing → Fixed Chunking → Simple Embedding → Vector DB Storage",
                inference: "User Query → Embedding → Vector DB Lookup (Top-K) → Concatenation → LLM Generate",
                benefits: "Establishes the knowledge retrieval baseline; Simple and cheap to implement.",
                challenges: "Context loss due to rigid chunking; High hallucination risk; Poor handling of complex/multi-step queries.",
                references: "Lewis, P., et al. (2020). Retrieval-Augmented Generation for Knowledge-Intensive NLP Tasks. NeurIPS 2020.",
                paperUrl: "https://arxiv.org/abs/2005.11401"
            },
            {
                type: "Self RAG",
                year: 2023,
                category: "Agentic & Modular",
                indexing: "Preprocessing → Standard Chunking → Embedding → Vector DB Storage",
                inference: "User Query → LLM Generates a thought → Retrieval → LLM Generates/Evaluates Retrieved Passages → LLM Decides if Answer is Ready → Final LLM Generate",
                benefits: "Reduces hallucinations by self-critique/verification; Filters out poor quality retrieved passages.",
                challenges: "Increases inference latency (multiple LLM calls per query); Requires careful tuning of reflection/critique prompt.",
                references: "Asai, A., et al. (2023). Self-RAG: Learning to Retrieve, Generate, and Critique through Self-Reflection. arXiv:2310.11511",
                paperUrl: "https://arxiv.org/abs/2310.11511"
            },
            {
                type: "Modular RAG",
                year: 2024,
                category: "Modular",
                indexing: "Preprocessing → Standard Chunking → Embedding → Vector DB Storage",
                inference: "User Query → Router/Module Selection → Selected Module Executes → LLM Generate",
                benefits: "Improves flexibility and component reusability; Enables optimal module selection for specific tasks.",
                challenges: "Requires complex routing/planning logic; Overhead of training/managing multiple specialized components.",
                references: "Gao, Y., et al. (2024). Modular RAG: Transforming RAG Systems into LEGO-like Reconfigurable Frameworks. arXiv:2407.21059",
                paperUrl: "https://arxiv.org/abs/2407.21059"
            },
            {
                type: "Cache RAG",
                year: 2024,
                category: "Modular",
                indexing: "Preprocessing → Standard Chunking → Embedding → Vector DB Storage",
                inference: "User Query → Cache Lookup → If Hit: Return Cached Answer → If Miss: Standard Retrieval → LLM Generate → Cache Store",
                benefits: "Dramatically improves latency and reduces LLM cost for repeated or highly similar queries.",
                challenges: "Complex cache invalidation logic; Requires robust query similarity and hashing functions.",
                references: "Jin, C., et al. (2024). RAGCache: Efficient Knowledge Caching for Retrieval-Augmented Generation. ACM TOCS.",
                paperUrl: "https://arxiv.org/abs/2404.12457"
            },
            {
                type: "MultiModal RAG",
                year: 2024,
                category: "Structural",
                indexing: "Preprocessing → Multi-Modal Embedding (e.g., CLIP) → Stores representations of all modalities in Vector DB",
                inference: "User Query (Text or Image) → Multi-Modal Embedding → Vector DB Lookup (Retrieves related text, image, metadata) → LLM Generate",
                benefits: "Unlocks knowledge stored in non-text data (images, charts, tables); Provides a richer context.",
                challenges: "Requires specialized multimodal embeddings/models; Indexing is computationally expensive; Difficult to combine disparate modalities coherently.",
                references: "Gao, Y., et al. (2024). Retrieval-Augmented Multimodal Language Modeling. CVPR 2024.",
                paperUrl: "https://arxiv.org/abs/2211.12561"
            },
            {
                type: "Graph RAG",
                year: 2024,
                category: "Structural & Modular",
                indexing: "Preprocessing → Entity/Relation Extraction → Store in Knowledge Graph (KG) & Vector DB",
                inference: "User Query → Embedding/KG Query → Simultaneous Retrieval (Vector + KG Path) → Concatenation → LLM Generate",
                benefits: "Resolves complex, multi-hop queries by leveraging factual relationships; Improves interpretability and fact consistency.",
                challenges: "High indexing complexity (KG construction); Expensive maintenance for rapidly changing data; Retrieval latency can be high.",
                references: "Barry, M., et al. (2025). GraphRAG: Leveraging Graph-Based Efficiency to Minimize Hallucinations in LLM-Driven RAG. GenAIK Workshop.",
                paperUrl: "https://aclanthology.org/2025.genaik-1.6/"
            },
            {
                type: "Recursive RAG",
                year: 2024,
                category: "Structural & Modular",
                indexing: "Preprocessing → Chunking & Summarization → Embeddings of both chunks & summaries → Vector DB Storage",
                inference: "User Query → Retrieval → LLM evaluates initial result → Recursive Query → Retrieve Specific Chunks → LLM Generate",
                benefits: "Summarizes context or decomposes queries recursively; Handles high-level questions that require abstract understanding.",
                challenges: "Risk of information loss during aggressive summarization; Chain of thought adds significant latency.",
                references: "Liu, Y., et al. (2024). RAG-GPT: Retrieval-Augmented Generation for Open-Domain Question Answering. IJCNN 2024.",
                paperUrl: "https://arxiv.org/abs/2405.10627"
            },
            {
                type: "Multi-Hop RAG",
                year: 2024,
                category: "Structural & Modular",
                indexing: "Preprocessing → Chunking/Entity Extraction → Embedding → Structured Storage (Vector DB + Optional KG)",
                inference: "User Query → Query Decomposition → Hop 1 Retrieval → Iterative Reasoning → Hop 2 Retrieval → Final Evidence Aggregation → LLM Generate",
                benefits: "Solves questions requiring reasoning across multiple independent documents or retrieval steps.",
                challenges: "Prone to error propagation (if one hop fails); Significantly higher latency; Requires generation of accurate intermediate queries.",
                references: "Tang, B., & Yang, Y. (2024). MultiHop-RAG: Benchmarking Retrieval-Augmented Generation for Multi-Hop Queries. COLM 2024.",
                paperUrl: "https://arxiv.org/abs/2401.15391"
            },
            {
                type: "Corrective RAG",
                year: 2024,
                category: "Agentic & Modular",
                indexing: "Preprocessing → Standard Chunking → Embedding → Vector DB Storage",
                inference: "User Query → Standard Retrieval → Retrieved Docs Evaluated → Corrective Action → LLM Generate",
                benefits: "Detects and corrects poor quality retrieval/generation post-hoc; Increases overall trustworthiness.",
                challenges: "High latency due to iterative correction loops; Requires training a dedicated evaluation model.",
                references: "Yan, S.-Q., et al. (2024). Corrective Retrieval Augmented Generation. arXiv:2401.15884",
                paperUrl: "https://arxiv.org/abs/2401.15884"
            },
            {
                type: "Agentic RAG",
                year: 2024,
                category: "Agentic & Modular",
                indexing: "Preprocessing → Standard Chunking → Embedding → Vector DB Storage",
                inference: "User Query → Agent Planning/Tool Selection → Agent Executes RAG Retrieval → Agent Reflects/Synthesizes → Final LLM Generate",
                benefits: "Handles complex, goal-oriented tasks via dynamic planning, tool use, and state tracking.",
                challenges: "Highest development/orchestration complexity; Slowest inference due to planning/execution loops; Failure in planning leads to catastrophic task failure.",
                references: "Singh, A., et al. (2025). Agentic Retrieval-Augmented Generation: A Survey on Agentic RAG.",
                paperUrl: "https://arxiv.org/abs/2412.09550"
            },
            {
                type: "Adaptive RAG",
                year: 2024,
                category: "Agentic & Modular",
                indexing: "Preprocessing → Standard Chunking → Embedding → Vector DB Storage → Train Query Complexity Classifier",
                inference: "User Query → Query Classification (Router) → Adaptive Decision → Retrieval Execution → LLM Generate",
                benefits: "Optimizes pipeline complexity and cost based on query assessment.",
                challenges: "Requires training a robust query classifier/router; Misclassification can lead to poor quality results.",
                references: "Jeong, S., et al. (2024). Adaptive-RAG: Learning to Adapt Retrieval-Augmented Large Language Models through Question Complexity. NAACL 2024.",
                paperUrl: "https://arxiv.org/abs/2403.14403"
            },
            {
                type: "Hierarchical RAG",
                year: 2025,
                category: "Structural & Modular",
                indexing: "Preprocessing → Hierarchical Chunking (Multiple levels) → Multiple Embeddings (for each level) → Vector DB Storage",
                inference: "User Query → Embedding → Multi-Level Retrieval → Concatenation → LLM Generate",
                benefits: "Solves the 'needle-in-a-haystack' problem for very long documents; Efficiently prunes non-relevant sections.",
                challenges: "Complex, multi-level chunking and indexing structure; Requires multiple retrieval passes.",
                references: "Huang, H., et al. (2025). Retrieval-Augmented Generation with Hierarchical Knowledge (HiRAG). EMNLP 2025.",
                paperUrl: "https://aclanthology.org/2025.emnlp-main.1/"
            },
            {
                type: "Speculative RAG",
                year: 2025,
                category: "Modular",
                indexing: "Preprocessing → Standard Chunking → Embedding → Vector DB Storage",
                inference: "User Query → Standard Retrieval → Drafting LLM Generates Tokens → Verifier LLM Checks Drafted Tokens Against Context → LLM Generate",
                benefits: "Significantly reduces token generation latency and LLM inference cost.",
                challenges: "Does not inherently improve semantic quality or hallucination rate; Requires careful balance between drafting and verifier models.",
                references: "Wang, Z., et al. (2025). Speculative RAG: Enhancing Retrieval Augmented Generation through Drafting. ICLR 2025.",
                paperUrl: "https://arxiv.org/abs/2407.08223"
            }
        ];

        const chunkingData = [
            {
                type: "Sliding Window Chunking",
                year: 1998,
                category: "Size-based",
                workflow: "Input Document → Define Chunk Size & Overlap → Slide Window (e.g., size 300, overlap 100) → Create Overlapping Chunks → Output Chunks",
                description: "Uses a fixed chunk size plus overlap (e.g., chunk size 300, overlap 100). Maintains context continuity across chunks.",
                references: "Carbonell, J. & Goldstein, J. (1998). The use of MMR, diversity-based reranking for reordering documents. SIGIR 1998.",
                paperUrl: "https://dl.acm.org/doi/10.1145/290941.291025"
            },   
            {
                type: "Fixed-size Chunking",
                year: 2001,
                category: "Size-based",
                workflow: "Input Document → Split by Fixed Token/Word Count (200-500 tokens) → Create Chunks → Output Chunks",
                description: "Splits text into chunks based on a fixed number of tokens/words (e.g., 200-500 tokens). Simple but may break semantic boundaries.",
                references: "Collobert, R., et al. (2011). Natural language processing (almost) from scratch. Journal of Machine Learning Research 12:2493-2537.",
                paperUrl: "https://www.jmlr.org/papers/volume12/collobert11a/collobert11a.pdf"
            },
            {
                type: "Discourse-aware Chunking",
                year: 2005,
                category: "Semantic-based",
                workflow: "Input Document → Identify Discourse Markers (however, in contrast, therefore) → Detect Conceptual Shifts → Split at Discourse Boundaries → Output Discourse Chunks",
                description: "Uses discourse markers (e.g., 'however', 'in contrast', 'therefore') to detect conceptual shifts and chunk accordingly.",
                references: "Sporleder, C. & Lapata, M. (2005). Discourse Chunking and its Application to Sentence Compression. HLT 2005.",
                paperUrl: "https://aclanthology.org/H05-1033/"
            },
            {
                type: "Paragraph-based Chunking",
                year: 2014,
                category: "Structure-based",
                workflow: "Input Document → Identify Paragraph Boundaries → Split at Paragraphs → Create Paragraph Chunks → Output Chunks",
                description: "Uses natural paragraph boundaries for chunking. Preserves document structure and thematic grouping.",
                references: "Dudhabaware, R. S., et al. (2014). Review on natural language processing tasks for text documents. IEEE ICCIC 2014.",
                paperUrl: "https://ieeexplore.ieee.org/document/7238334"
            },
            {
                type: "Recursive/Hierarchical Chunking",
                year: 2023,
                category: "Hierarchical",
                workflow: "Input Document → Break into Sections → Split Sections into Paragraphs → Split Paragraphs into Sentences → Split Sentences into Tokens → Output Multi-level Chunks",
                description: "Breaks document into progressively smaller units (section → paragraph → sentence → tokens). Enables multi-level retrieval.",
                references: "Latif, S., et al. (2025). The Chunking Paradigm: Recursive Semantic for RAG. ICNLSP 2025.",
                paperUrl: "https://aclanthology.org/2025.icnlsp-1.16/"
            },
            {
                type: "Semantic Chunking",
                year: 2024,
                category: "Semantic-based",
                workflow: "Input Document → Analyze Topic Shifts/Semantic Boundaries → Split at Semantic Breaks → Create Semantically Coherent Chunks → Output Chunks",
                description: "Chunks based on topic shifts or semantic boundaries rather than size. Preserves meaning and context.",
                references: "LangChain (2024). Semantic chunker. Accessed 2024-09-14.",
                paperUrl: "https://python.langchain.com/docs/how_to/semantic-chunker/"
            },
            {
                type: "Sentence-based Chunking",
                year: 2024,
                category: "Structure-based",
                workflow: "Input Document → Split by Sentences → Group Sentences (Optional) → Create Sentence Chunks → Output Chunks",
                description: "Splits text by sentences. Maintains grammatical integrity and natural language boundaries.",
                references: "Dong, K., et al. (2024). Multi-view content-aware indexing for long document retrieval. arXiv:2404.15103",
                paperUrl: "https://arxiv.org/abs/2404.15103"
            },
            {
                type: "Hybrid Chunking",
                year: 2024,
                category: "Semantic-based",
                workflow: "Input Document → Apply Semantic Analysis → Apply Fixed-size Constraint → Apply Overlap Strategy → Create Hybrid Chunks → Output Chunks",
                description: "Combines semantic + fixed-size + overlap strategies. Balances context preservation with practical constraints.",
                references: "Kamradt, G. (2024). 5 levels of text splitting.",
                paperUrl: ""
            },
            {
                type: "Embedding-similarity-based Chunking",
                year: 2024,
                category: "Semantic-based",
                workflow: "Input Document → Generate Sentence Embeddings → Calculate Similarity Scores → Detect Similarity Drops Below Threshold → Split at Low Similarity Points → Output Chunks",
                description: "Sequentially processes text and opens a new chunk when embedding similarity drops below a threshold. Data-driven approach.",
                references: "Kamradt, G. (2024). 5 levels of text splitting.",
                paperUrl: ""
            },
            {
                type: "Page-based Chunking",
                year: 2024,
                category: "Structure-based",
                workflow: "Input PDF/Scanned Document → Identify Page Boundaries → Split by Page → Create Page Chunks → Output Chunks",
                description: "Splits by PDF page or scanned document page. Useful for document-level retrieval and citation.",
                references: "",
                paperUrl: ""
            },
            {
                type: "Domain-specific Chunking",
                year: 2024,
                category: "Domain-specific",
                workflow: "Input Domain Document → Apply Domain Rules (legal sections, medical records, code functions) → Split by Domain Logic → Create Domain Chunks → Output Chunks",
                description: "Tailored to content type (e.g., legal documents by sections, medical records by encounters, code by functions).",
                references: "Allamraju, A., et al. (2024). Breaking It Down: Domain-Aware Semantic Segmentation for Retrieval Augmented Generation. arXiv:2512.00367",
                paperUrl: "https://arxiv.org/abs/2512.00367"
            },
            {
                type: "Metadata-based Chunking",
                year: 2025,
                category: "Structure-based",
                workflow: "Input Document → Extract Metadata (headers, bullets, sections) → Split Using Metadata Structure → Create Metadata-aware Chunks → Output Chunks",
                description: "Splits text using document metadata (e.g., headers, bullets). Preserves document structure and hierarchy.",
                references: "Zhao, J., et al. (2025). MoC: Mixtures of Text Chunking Learners for Retrieval-Augmented Generation. ACL 2025.",
                paperUrl: "https://aclanthology.org/2025.acl-long.1/"
            }
        ];

        const chunkingCategoryBriefs = {
            'Size-based': 'Chunks text by fixed sizes or overlapping windows. Simple, predictable, but may break context.',
            'Semantic-based': 'Identifies meaning, topics, or discourse patterns to create contextually coherent chunks.',
            'Structure-based': 'Uses natural document structure (sentences, paragraphs, metadata) for boundaries.',
            'Hierarchical': 'Creates multi-level chunks (sections → paragraphs → sentences) for flexible retrieval.',
            'Domain-specific': 'Applies domain knowledge and rules tailored to specific content types (legal, medical, code).'
        };

        const mainCategoryDefinitions = {
            'Foundational': 'The baseline RAG approach establishing core retrieval-augmented generation principles with simple, fixed pipelines.',
            'Modular': 'Focuses on pipeline flexibility through specialized, interchangeable components and intermediate operations between query and generation.',
            'Agentic': 'Uses dynamic decision-makers (agents or classifiers) to guide the pipeline, perform self-correction, and enable adaptive behavior.',
            'Structural': 'Innovations in how source documents are preprocessed, stored, and indexed for more effective retrieval (e.g., knowledge graphs, hierarchies, multi-modal embeddings).'
        };

        function switchTab(tabName) {
            const tabs = document.querySelectorAll('.tab');
            const contents = document.querySelectorAll('.tab-content');
            
            tabs.forEach(tab => tab.classList.remove('active'));
            contents.forEach(content => content.classList.remove('active'));
            
            if (tabName === 'rag') {
                tabs[0].classList.add('active');
                document.getElementById('rag-content').classList.add('active');
            } else if (tabName === 'chunking') {
                tabs[1].classList.add('active');
                document.getElementById('chunking-content').classList.add('active');
                initChunkingVisualizations();
            } else if (tabName === 'indexing-db') {
                tabs[2].classList.add('active');
                document.getElementById('indexing-db').classList.add('active');
            }   
        }

        function getCategoryBadgeClass(category) {
            const map = {
                'Foundational': 'badge-foundational',
                'Agentic & Modular': 'badge-agentic',
                'Modular': 'badge-modular',
                'Structural & Modular': 'badge-structural-modular',
                'Structural': 'badge-structural'
            };
            return map[category] || 'badge-modular';
        }

        function getCategoryDefClass(category) {
            const map = {
                'Foundational': 'def-foundational',
                'Agentic': 'def-agentic',
                'Modular': 'def-modular',
                'Structural': 'def-structural'
            };
            return map[category] || 'def-modular';
        }

        // Create Category Definitions - Main 4 categories
        const categoryDefContainer = document.getElementById('categoryDefinitions');
        Object.keys(mainCategoryDefinitions).forEach(category => {
            const div = document.createElement('div');
            div.className = `category-definition ${getCategoryDefClass(category)}`;
            div.innerHTML = `
                <strong>${category}</strong>
                <p>${mainCategoryDefinitions[category]}</p>
            `;
            categoryDefContainer.appendChild(div);
        });

        // Create Year Timeline
        const yearTimeline = document.getElementById('yearTimeline');
        ragData.forEach(rag => {
            const item = document.createElement('div');
            item.className = 'timeline-item';
            item.innerHTML = `
                <div class="timeline-year">${rag.year}</div>
                <div class="timeline-content">
                    <div class="timeline-type">${rag.type}</div>
                    <span class="category-badge ${getCategoryBadgeClass(rag.category)}">${rag.category}</span>
                    <div class="timeline-reference">📄 <a href="${rag.paperUrl}" target="_blank">${rag.references}</a></div>
                </div>
            `;
            yearTimeline.appendChild(item);
        });

        // Populate RAG selector
        const selector = document.getElementById('ragTypeSelector');
        ragData.forEach(rag => {
            const option = document.createElement('option');
            option.value = rag.type;
            option.textContent = `${rag.type} (${rag.year})`;
            selector.appendChild(option);
        });

        function createFlowchart(workflow, containerId) {
            const container = document.getElementById(containerId);
            container.innerHTML = '';
            const steps = workflow.split('→').map(s => s.trim());
            steps.forEach((step, index) => {
                const div = document.createElement('div');
                div.className = 'flow-step';
                if (index === 0 || index === steps.length - 1) {
                    div.classList.add('highlight');
                }
                div.textContent = step;
                container.appendChild(div);
            });
        }

        selector.addEventListener('change', (e) => {
            const selected = ragData.find(r => r.type === e.target.value);
            const details = document.getElementById('flowchartDetails');
            
            if (selected) {
                createFlowchart(selected.indexing, 'indexingFlowchart');
                createFlowchart(selected.inference, 'inferenceFlowchart');
                document.getElementById('benefits').textContent = selected.benefits;
                document.getElementById('challenges').textContent = selected.challenges;
                details.style.display = 'block';
            } else {
                details.style.display = 'none';
            }
        });

        // Category Chart
        const categoryCount = {};
        ragData.forEach(rag => {
            categoryCount[rag.category] = (categoryCount[rag.category] || 0) + 1;
        });

        new Chart(document.getElementById('categoryChart'), {
            type: 'doughnut',
            data: {
                labels: Object.keys(categoryCount),
                datasets: [{
                    data: Object.values(categoryCount),
                    backgroundColor: [
                        '#667eea',
                        '#764ba2',
                        '#f093fb',
                        '#4facfe',
                        '#00d2ff'
                    ]
                }]
            },
            options: {
                responsive: true,
                maintainAspectRatio: false,
                plugins: {
                    legend: {
                        position: 'bottom'
                    }
                }
            }
        });

        // Timeline Chart
        const yearCount = {};
        ragData.forEach(rag => {
            yearCount[rag.year] = (yearCount[rag.year] || 0) + 1;
        });

        new Chart(document.getElementById('timelineChart'), {
            type: 'bar',
            data: {
                labels: Object.keys(yearCount).sort(),
                datasets: [{
                    label: 'Number of RAG Types',
                    data: Object.keys(yearCount).sort().map(year => yearCount[year]),
                    backgroundColor: '#667eea'
                }]
            },
            options: {
                responsive: true,
                maintainAspectRatio: false,
                scales: {
                    y: {
                        beginAtZero: true,
                        ticks: {
                            stepSize: 1
                        }
                    }
                },
                plugins: {
                    legend: {
                        display: false
                    }
                }
            }
        });

        // Chunking Visualizations
        let chunkingChartsInitialized = false;

        function initChunkingVisualizations() {
            if (chunkingChartsInitialized) return;
            chunkingChartsInitialized = true;

            // Populate chunking selector
            const chunkingSelector = document.getElementById('chunkingSelector');
            chunkingData.forEach(chunk => {
                const option = document.createElement('option');
                option.value = chunk.type;
                option.textContent = `${chunk.type} (${chunk.year})`;
                chunkingSelector.appendChild(option);
            });

            chunkingSelector.addEventListener('change', (e) => {
                const selected = chunkingData.find(c => c.type === e.target.value);
                const details = document.getElementById('chunkingFlowchartDetails');
                
                if (selected) {
                    createFlowchart(selected.workflow, 'chunkingFlowchart');
                    document.getElementById('chunkingDescription').textContent = selected.description;
                    
                    const refElement = document.getElementById('chunkingReference');
                    if (selected.references && selected.paperUrl) {
                        refElement.innerHTML = `📄 <a href="${selected.paperUrl}" target="_blank">${selected.references}</a>`;
                    } else if (selected.references) {
                        refElement.textContent = `📄 ${selected.references}`;
                    } else {
                        refElement.textContent = '';
                    }
                    
                    details.style.display = 'block';
                } else {
                    details.style.display = 'none';
                }
            });

            // Chunking methods chart
            const chunkingTypes = {};
            chunkingData.forEach(chunk => {
                chunkingTypes[chunk.category] = (chunkingTypes[chunk.category] || 0) + 1;
            });

            new Chart(document.getElementById('chunkingChart'), {
                type: 'pie',
                data: {
                    labels: Object.keys(chunkingTypes),
                    datasets: [{
                        data: Object.values(chunkingTypes),
                        backgroundColor: ['#667eea', '#764ba2', '#f093fb', '#4facfe', '#00d2ff']
                    }]
                },
                options: {
                    responsive: true,
                    maintainAspectRatio: false,
                    plugins: {
                        legend: {
                            position: 'bottom'
                        }
                    }
                }
            });

            // Add category definitions
            const chunkingCategoryDefsContainer = document.getElementById('chunkingCategoryDefs');
            Object.keys(chunkingCategoryBriefs).forEach(category => {
                const div = document.createElement('div');
                div.className = 'category-definition def-modular';
                div.innerHTML = `
                    <strong>${category}</strong>
                    <p>${chunkingCategoryBriefs[category]}</p>
                `;
                chunkingCategoryDefsContainer.appendChild(div);
            });

            // Create hierarchical category view
            const categoryHierarchy = document.getElementById('categoryHierarchy');
            Object.keys(chunkingCategoryBriefs).forEach(category => {
                const methods = chunkingData.filter(c => c.category === category);
                const node = document.createElement('div');
                node.className = 'category-node';
                
                const methodTags = methods.map(m => 
                    `<span class="method-tag">${m.type}</span>`
                ).join('');
                
                node.innerHTML = `
                    <h4>${category}</h4>
                    <div class="brief">${chunkingCategoryBriefs[category]}</div>
                    <div class="category-methods">${methodTags}</div>
                `;
                categoryHierarchy.appendChild(node);
            });

            // Chunking year timeline
            const chunkingYearTimeline = document.getElementById('chunkingYearTimeline');
            chunkingData.forEach(chunk => {
                const item = document.createElement('div');
                item.className = 'timeline-item';
                
                let referenceHTML = '';
                if (chunk.references && chunk.paperUrl) {
                    referenceHTML = `<div class="timeline-reference">📄 <a href="${chunk.paperUrl}" target="_blank">${chunk.references}</a></div>`;
                } else if (chunk.references) {
                    referenceHTML = `<div class="timeline-reference">📄 ${chunk.references}</div>`;
                }
                
                item.innerHTML = `
                    <div class="timeline-year">${chunk.year}</div>
                    <div class="timeline-content">
                        <div class="timeline-type">${chunk.type}</div>
                        ${referenceHTML}
                    </div>
                `;
                chunkingYearTimeline.appendChild(item);
            });
        }
    </script>
</body>
</html>