Spaces:

Soha85
/

RAG_Study

Running

App Files Files Community

Soha85 commited on 12 days ago

Commit

90e2d2b

verified ·

1 Parent(s): 6f657ba

Update index.html

Browse files

Files changed (1) hide show

index.html +59 -13

index.html CHANGED Viewed

@@ -208,6 +208,25 @@
             font-size: 0.9em;
         }
         .category-badge {
             display: inline-block;
             padding: 4px 12px;
@@ -321,7 +340,9 @@
                 indexing: "Preprocessing → Fixed Chunking → Simple Embedding → Vector DB Storage",
                 inference: "User Query → Embedding → Vector DB Lookup (Top-K) → Concatenation → LLM Generate",
                 benefits: "Establishes the knowledge retrieval baseline; Simple and cheap to implement.",
-                challenges: "Context loss due to rigid chunking; High hallucination risk; Poor handling of complex/multi-step queries."
             },
             {
                 type: "Self RAG",
@@ -330,7 +351,9 @@
                 indexing: "Preprocessing → Standard Chunking → Embedding → Vector DB Storage",
                 inference: "User Query → LLM Generates a thought → Retrieval → LLM Generates/Evaluates Retrieved Passages → LLM Decides if Answer is Ready → Final LLM Generate",
                 benefits: "Reduces hallucinations by self-critique/verification; Filters out poor quality retrieved passages.",
-                challenges: "Increases inference latency (multiple LLM calls per query); Requires careful tuning of reflection/critique prompt."
             },
             {
                 type: "Modular RAG",
@@ -339,7 +362,9 @@
                 indexing: "Preprocessing → Standard Chunking → Embedding → Vector DB Storage",
                 inference: "User Query → Router/Module Selection → Selected Module Executes → LLM Generate",
                 benefits: "Improves flexibility and component reusability; Enables optimal module selection for specific tasks.",
-                challenges: "Requires complex routing/planning logic; Overhead of training/managing multiple specialized components."
             },
             {
                 type: "Graph RAG",
@@ -348,7 +373,9 @@
                 indexing: "Preprocessing → Entity/Relation Extraction → Store in Knowledge Graph (KG) & Vector DB",
                 inference: "User Query → Embedding/KG Query → Simultaneous Retrieval (Vector + KG Path) → Concatenation → LLM Generate",
                 benefits: "Resolves complex, multi-hop queries by leveraging factual relationships; Improves interpretability and fact consistency.",
-                challenges: "High indexing complexity (KG construction); Expensive maintenance for rapidly changing data; Retrieval latency can be high."
             },
             {
                 type: "MultiModal RAG",
@@ -357,7 +384,9 @@
                 indexing: "Preprocessing → Multi-Modal Embedding (e.g., CLIP) → Stores representations of all modalities in Vector DB",
                 inference: "User Query (Text or Image) → Multi-Modal Embedding → Vector DB Lookup (Retrieves related text, image, metadata) → LLM Generate",
                 benefits: "Unlocks knowledge stored in non-text data (images, charts, tables); Provides a richer context.",
-                challenges: "Requires specialized multimodal embeddings/models; Indexing is computationally expensive; Difficult to combine disparate modalities coherently."
             },
             {
                 type: "Recursive RAG",
@@ -366,7 +395,9 @@
                 indexing: "Preprocessing → Chunking & Summarization → Embeddings of both chunks & summaries → Vector DB Storage",
                 inference: "User Query → Retrieval → LLM evaluates initial result → Recursive Query → Retrieve Specific Chunks → LLM Generate",
                 benefits: "Summarizes context or decomposes queries recursively; Handles high-level questions that require abstract understanding.",
-                challenges: "Risk of information loss during aggressive summarization; Chain of thought adds significant latency."
             },
             {
                 type: "Cache RAG",
@@ -375,7 +406,9 @@
                 indexing: "Preprocessing → Standard Chunking → Embedding → Vector DB Storage",
                 inference: "User Query → Cache Lookup → If Hit: Return Cached Answer → If Miss: Standard Retrieval → LLM Generate → Cache Store",
                 benefits: "Dramatically improves latency and reduces LLM cost for repeated or highly similar queries.",
-                challenges: "Complex cache invalidation logic; Requires robust query similarity and hashing functions."
             },
             {
                 type: "Corrective RAG",
@@ -384,7 +417,9 @@
                 indexing: "Preprocessing → Standard Chunking → Embedding → Vector DB Storage",
                 inference: "User Query → Standard Retrieval → Retrieved Docs Evaluated → Corrective Action → LLM Generate",
                 benefits: "Detects and corrects poor quality retrieval/generation post-hoc; Increases overall trustworthiness.",
-                challenges: "High latency due to iterative correction loops; Requires training a dedicated evaluation model."
             },
             {
                 type: "Multi-Hop RAG",
@@ -393,7 +428,9 @@
                 indexing: "Preprocessing → Chunking/Entity Extraction → Embedding → Structured Storage (Vector DB + Optional KG)",
                 inference: "User Query → Query Decomposition → Hop 1 Retrieval → Iterative Reasoning → Hop 2 Retrieval → Final Evidence Aggregation → LLM Generate",
                 benefits: "Solves questions requiring reasoning across multiple independent documents or retrieval steps.",
-                challenges: "Prone to error propagation (if one hop fails); Significantly higher latency; Requires generation of accurate intermediate queries."
             },
             {
                 type: "Agentic RAG",
@@ -402,7 +439,9 @@
                 indexing: "Preprocessing → Standard Chunking → Embedding → Vector DB Storage",
                 inference: "User Query → Agent Planning/Tool Selection → Agent Executes RAG Retrieval → Agent Reflects/Synthesizes → Final LLM Generate",
                 benefits: "Handles complex, goal-oriented tasks via dynamic planning, tool use, and state tracking.",
-                challenges: "Highest development/orchestration complexity; Slowest inference due to planning/execution loops; Failure in planning leads to catastrophic task failure."
             },
             {
                 type: "Adaptive RAG",
@@ -411,7 +450,9 @@
                 indexing: "Preprocessing → Standard Chunking → Embedding → Vector DB Storage → Train Query Complexity Classifier",
                 inference: "User Query → Query Classification (Router) → Adaptive Decision → Retrieval Execution → LLM Generate",
                 benefits: "Optimizes pipeline complexity and cost based on query assessment.",
-                challenges: "Requires training a robust query classifier/router; Misclassification can lead to poor quality results."
             },
             {
                 type: "Hierarchical RAG",
@@ -420,7 +461,9 @@
                 indexing: "Preprocessing → Hierarchical Chunking (Multiple levels) → Multiple Embeddings (for each level) → Vector DB Storage",
                 inference: "User Query → Embedding → Multi-Level Retrieval → Concatenation → LLM Generate",
                 benefits: "Solves the 'needle-in-a-haystack' problem for very long documents; Efficiently prunes non-relevant sections.",
-                challenges: "Complex, multi-level chunking and indexing structure; Requires multiple retrieval passes."
             },
             {
                 type: "Speculative RAG",
@@ -429,7 +472,9 @@
                 indexing: "Preprocessing → Standard Chunking → Embedding → Vector DB Storage",
                 inference: "User Query → Standard Retrieval → Drafting LLM Generates Tokens → Verifier LLM Checks Drafted Tokens Against Context → LLM Generate",
                 benefits: "Significantly reduces token generation latency and LLM inference cost.",
-                challenges: "Does not inherently improve semantic quality or hallucination rate; Requires careful balance between drafting and verifier models."
             }
         ];
@@ -483,6 +528,7 @@
                 <div class="timeline-content">
                     <div class="timeline-type">${rag.type}</div>
                     <span class="category-badge ${getCategoryBadgeClass(rag.category)}">${rag.category}</span>
                 </div>
             `;
             yearTimeline.appendChild(item);

             font-size: 0.9em;
         }
+        .timeline-reference {
+            color: #888;
+            font-size: 0.85em;
+            font-style: italic;
+            margin-top: 5px;
+            line-height: 1.4;
+        }
+        .timeline-reference a {
+            color: #667eea;
+            text-decoration: none;
+            transition: color 0.3s ease;
+        }
+        .timeline-reference a:hover {
+            color: #764ba2;
+            text-decoration: underline;
+        }
         .category-badge {
             display: inline-block;
             padding: 4px 12px;
                 indexing: "Preprocessing → Fixed Chunking → Simple Embedding → Vector DB Storage",
                 inference: "User Query → Embedding → Vector DB Lookup (Top-K) → Concatenation → LLM Generate",
                 benefits: "Establishes the knowledge retrieval baseline; Simple and cheap to implement.",
+                challenges: "Context loss due to rigid chunking; High hallucination risk; Poor handling of complex/multi-step queries.",
+                references: "Lewis, P., et al. (2020). Retrieval-Augmented Generation for Knowledge-Intensive NLP Tasks. NeurIPS 2020.",
+                paperUrl: "https://arxiv.org/abs/2005.11401"
             },
             {
                 type: "Self RAG",
                 indexing: "Preprocessing → Standard Chunking → Embedding → Vector DB Storage",
                 inference: "User Query → LLM Generates a thought → Retrieval → LLM Generates/Evaluates Retrieved Passages → LLM Decides if Answer is Ready → Final LLM Generate",
                 benefits: "Reduces hallucinations by self-critique/verification; Filters out poor quality retrieved passages.",
+                challenges: "Increases inference latency (multiple LLM calls per query); Requires careful tuning of reflection/critique prompt.",
+                references: "Asai, A., et al. (2023). Self-RAG: Learning to Retrieve, Generate, and Critique through Self-Reflection. arXiv:2310.11511",
+                paperUrl: "https://arxiv.org/abs/2310.11511"
             },
             {
                 type: "Modular RAG",
                 indexing: "Preprocessing → Standard Chunking → Embedding → Vector DB Storage",
                 inference: "User Query → Router/Module Selection → Selected Module Executes → LLM Generate",
                 benefits: "Improves flexibility and component reusability; Enables optimal module selection for specific tasks.",
+                challenges: "Requires complex routing/planning logic; Overhead of training/managing multiple specialized components.",
+                references: "Gao, Y., et al. (2024). Modular RAG: Transforming RAG Systems into LEGO-like Reconfigurable Frameworks. arXiv:2407.21059",
+                paperUrl: "https://arxiv.org/abs/2407.21059"
             },
             {
                 type: "Graph RAG",
                 indexing: "Preprocessing → Entity/Relation Extraction → Store in Knowledge Graph (KG) & Vector DB",
                 inference: "User Query → Embedding/KG Query → Simultaneous Retrieval (Vector + KG Path) → Concatenation → LLM Generate",
                 benefits: "Resolves complex, multi-hop queries by leveraging factual relationships; Improves interpretability and fact consistency.",
+                challenges: "High indexing complexity (KG construction); Expensive maintenance for rapidly changing data; Retrieval latency can be high.",
+                references: "Barry, M., et al. (2025). GraphRAG: Leveraging Graph-Based Efficiency to Minimize Hallucinations in LLM-Driven RAG. GenAIK Workshop.",
+                paperUrl: "https://aclanthology.org/2025.genaik-1.6/"
             },
             {
                 type: "MultiModal RAG",
                 indexing: "Preprocessing → Multi-Modal Embedding (e.g., CLIP) → Stores representations of all modalities in Vector DB",
                 inference: "User Query (Text or Image) → Multi-Modal Embedding → Vector DB Lookup (Retrieves related text, image, metadata) → LLM Generate",
                 benefits: "Unlocks knowledge stored in non-text data (images, charts, tables); Provides a richer context.",
+                challenges: "Requires specialized multimodal embeddings/models; Indexing is computationally expensive; Difficult to combine disparate modalities coherently.",
+                references: "Gao, Y., et al. (2024). Retrieval-Augmented Multimodal Language Modeling. CVPR 2024.",
+                paperUrl: "https://arxiv.org/abs/2211.12561"
             },
             {
                 type: "Recursive RAG",
                 indexing: "Preprocessing → Chunking & Summarization → Embeddings of both chunks & summaries → Vector DB Storage",
                 inference: "User Query → Retrieval → LLM evaluates initial result → Recursive Query → Retrieve Specific Chunks → LLM Generate",
                 benefits: "Summarizes context or decomposes queries recursively; Handles high-level questions that require abstract understanding.",
+                challenges: "Risk of information loss during aggressive summarization; Chain of thought adds significant latency.",
+                references: "Liu, Y., et al. (2024). RAG-GPT: Retrieval-Augmented Generation for Open-Domain Question Answering. IJCNN 2024.",
+                paperUrl: "https://arxiv.org/abs/2405.10627"
             },
             {
                 type: "Cache RAG",
                 indexing: "Preprocessing → Standard Chunking → Embedding → Vector DB Storage",
                 inference: "User Query → Cache Lookup → If Hit: Return Cached Answer → If Miss: Standard Retrieval → LLM Generate → Cache Store",
                 benefits: "Dramatically improves latency and reduces LLM cost for repeated or highly similar queries.",
+                challenges: "Complex cache invalidation logic; Requires robust query similarity and hashing functions.",
+                references: "Jin, C., et al. (2024). RAGCache: Efficient Knowledge Caching for Retrieval-Augmented Generation. ACM TOCS.",
+                paperUrl: "https://arxiv.org/abs/2404.12457"
             },
             {
                 type: "Corrective RAG",
                 indexing: "Preprocessing → Standard Chunking → Embedding → Vector DB Storage",
                 inference: "User Query → Standard Retrieval → Retrieved Docs Evaluated → Corrective Action → LLM Generate",
                 benefits: "Detects and corrects poor quality retrieval/generation post-hoc; Increases overall trustworthiness.",
+                challenges: "High latency due to iterative correction loops; Requires training a dedicated evaluation model.",
+                references: "Yan, S.-Q., et al. (2024). Corrective Retrieval Augmented Generation. arXiv:2401.15884",
+                paperUrl: "https://arxiv.org/abs/2401.15884"
             },
             {
                 type: "Multi-Hop RAG",
                 indexing: "Preprocessing → Chunking/Entity Extraction → Embedding → Structured Storage (Vector DB + Optional KG)",
                 inference: "User Query → Query Decomposition → Hop 1 Retrieval → Iterative Reasoning → Hop 2 Retrieval → Final Evidence Aggregation → LLM Generate",
                 benefits: "Solves questions requiring reasoning across multiple independent documents or retrieval steps.",
+                challenges: "Prone to error propagation (if one hop fails); Significantly higher latency; Requires generation of accurate intermediate queries.",
+                references: "Tang, B., & Yang, Y. (2024). MultiHop-RAG: Benchmarking Retrieval-Augmented Generation for Multi-Hop Queries. COLM 2024.",
+                paperUrl: "https://arxiv.org/abs/2401.15391"
             },
             {
                 type: "Agentic RAG",
                 indexing: "Preprocessing → Standard Chunking → Embedding → Vector DB Storage",
                 inference: "User Query → Agent Planning/Tool Selection → Agent Executes RAG Retrieval → Agent Reflects/Synthesizes → Final LLM Generate",
                 benefits: "Handles complex, goal-oriented tasks via dynamic planning, tool use, and state tracking.",
+                challenges: "Highest development/orchestration complexity; Slowest inference due to planning/execution loops; Failure in planning leads to catastrophic task failure.",
+                references: "Singh, A., et al. (2025). Agentic Retrieval-Augmented Generation: A Survey on Agentic RAG.",
+                paperUrl: "https://arxiv.org/abs/2412.09550"
             },
             {
                 type: "Adaptive RAG",
                 indexing: "Preprocessing → Standard Chunking → Embedding → Vector DB Storage → Train Query Complexity Classifier",
                 inference: "User Query → Query Classification (Router) → Adaptive Decision → Retrieval Execution → LLM Generate",
                 benefits: "Optimizes pipeline complexity and cost based on query assessment.",
+                challenges: "Requires training a robust query classifier/router; Misclassification can lead to poor quality results.",
+                references: "Jeong, S., et al. (2024). Adaptive-RAG: Learning to Adapt Retrieval-Augmented Large Language Models through Question Complexity. NAACL 2024.",
+                paperUrl: "https://arxiv.org/abs/2403.14403"
             },
             {
                 type: "Hierarchical RAG",
                 indexing: "Preprocessing → Hierarchical Chunking (Multiple levels) → Multiple Embeddings (for each level) → Vector DB Storage",
                 inference: "User Query → Embedding → Multi-Level Retrieval → Concatenation → LLM Generate",
                 benefits: "Solves the 'needle-in-a-haystack' problem for very long documents; Efficiently prunes non-relevant sections.",
+                challenges: "Complex, multi-level chunking and indexing structure; Requires multiple retrieval passes.",
+                references: "Huang, H., et al. (2025). Retrieval-Augmented Generation with Hierarchical Knowledge (HiRAG). EMNLP 2025.",
+                paperUrl: "https://aclanthology.org/2025.emnlp-main.1/"
             },
             {
                 type: "Speculative RAG",
                 indexing: "Preprocessing → Standard Chunking → Embedding → Vector DB Storage",
                 inference: "User Query → Standard Retrieval → Drafting LLM Generates Tokens → Verifier LLM Checks Drafted Tokens Against Context → LLM Generate",
                 benefits: "Significantly reduces token generation latency and LLM inference cost.",
+                challenges: "Does not inherently improve semantic quality or hallucination rate; Requires careful balance between drafting and verifier models.",
+                references: "Wang, Z., et al. (2025). Speculative RAG: Enhancing Retrieval Augmented Generation through Drafting. ICLR 2025.",
+                paperUrl: "https://arxiv.org/abs/2407.08223"
             }
         ];
                 <div class="timeline-content">
                     <div class="timeline-type">${rag.type}</div>
                     <span class="category-badge ${getCategoryBadgeClass(rag.category)}">${rag.category}</span>
+                    <div class="timeline-reference">📄 <a href="${rag.paperUrl}" target="_blank">${rag.references}</a></div>
                 </div>
             `;
             yearTimeline.appendChild(item);