CapStoneRAG10 / archived_scripts /create_ppt_presentation.py
Developer
Initial commit for HuggingFace Spaces - RAG Capstone Project with Qdrant Cloud
1d10b0a
"""Create a comprehensive PowerPoint presentation for RAG Capstone Project."""
from pptx import Presentation
from pptx.util import Inches, Pt
from pptx.enum.text import PP_ALIGN
from pptx.dml.color import RGBColor
from datetime import datetime
def create_presentation():
"""Create the RAG Capstone Project presentation."""
prs = Presentation()
prs.slide_width = Inches(10)
prs.slide_height = Inches(7.5)
# Define color scheme
DARK_BLUE = RGBColor(25, 55, 109)
ACCENT_BLUE = RGBColor(0, 120, 215)
LIGHT_GRAY = RGBColor(240, 240, 240)
TEXT_DARK = RGBColor(33, 33, 33)
def add_title_slide(title, subtitle=""):
"""Add a title slide."""
slide = prs.slides.add_slide(prs.slide_layouts[6]) # Blank layout
background = slide.background
fill = background.fill
fill.solid()
fill.fore_color.rgb = DARK_BLUE
# Title
title_box = slide.shapes.add_textbox(Inches(0.5), Inches(2.5), Inches(9), Inches(1.5))
title_frame = title_box.text_frame
title_frame.word_wrap = True
p = title_frame.paragraphs[0]
p.text = title
p.font.size = Pt(54)
p.font.bold = True
p.font.color.rgb = RGBColor(255, 255, 255)
p.alignment = PP_ALIGN.CENTER
# Subtitle
if subtitle:
subtitle_box = slide.shapes.add_textbox(Inches(0.5), Inches(4.2), Inches(9), Inches(1))
subtitle_frame = subtitle_box.text_frame
p = subtitle_frame.paragraphs[0]
p.text = subtitle
p.font.size = Pt(28)
p.font.color.rgb = ACCENT_BLUE
p.alignment = PP_ALIGN.CENTER
return slide
def add_content_slide(title, content_items):
"""Add a content slide with bullet points."""
slide = prs.slides.add_slide(prs.slide_layouts[6])
background = slide.background
fill = background.fill
fill.solid()
fill.fore_color.rgb = RGBColor(255, 255, 255)
# Title bar
title_shape = slide.shapes.add_shape(1, Inches(0), Inches(0), Inches(10), Inches(0.8))
title_shape.fill.solid()
title_shape.fill.fore_color.rgb = DARK_BLUE
title_shape.line.color.rgb = DARK_BLUE
# Title text
title_frame = title_shape.text_frame
p = title_frame.paragraphs[0]
p.text = title
p.font.size = Pt(40)
p.font.bold = True
p.font.color.rgb = RGBColor(255, 255, 255)
p.space_before = Pt(8)
p.space_after = Pt(8)
# Content
text_box = slide.shapes.add_textbox(Inches(0.7), Inches(1.2), Inches(8.6), Inches(6))
text_frame = text_box.text_frame
text_frame.word_wrap = True
for i, item in enumerate(content_items):
if i > 0:
p = text_frame.add_paragraph()
else:
p = text_frame.paragraphs[0]
p.text = item
p.level = 0
p.font.size = Pt(18)
p.font.color.rgb = TEXT_DARK
p.space_before = Pt(6)
p.space_after = Pt(6)
return slide
def add_two_column_slide(title, left_title, left_items, right_title, right_items):
"""Add a two-column content slide."""
slide = prs.slides.add_slide(prs.slide_layouts[6])
background = slide.background
fill = background.fill
fill.solid()
fill.fore_color.rgb = RGBColor(255, 255, 255)
# Title bar
title_shape = slide.shapes.add_shape(1, Inches(0), Inches(0), Inches(10), Inches(0.8))
title_shape.fill.solid()
title_shape.fill.fore_color.rgb = DARK_BLUE
title_shape.line.color.rgb = DARK_BLUE
title_frame = title_shape.text_frame
p = title_frame.paragraphs[0]
p.text = title
p.font.size = Pt(40)
p.font.bold = True
p.font.color.rgb = RGBColor(255, 255, 255)
p.space_before = Pt(8)
p.space_after = Pt(8)
# Left column
left_box = slide.shapes.add_textbox(Inches(0.4), Inches(1.2), Inches(4.6), Inches(6))
left_frame = left_box.text_frame
left_frame.word_wrap = True
p = left_frame.paragraphs[0]
p.text = left_title
p.font.size = Pt(20)
p.font.bold = True
p.font.color.rgb = ACCENT_BLUE
p.space_after = Pt(8)
for item in left_items:
p = left_frame.add_paragraph()
p.text = item
p.level = 0
p.font.size = Pt(15)
p.font.color.rgb = TEXT_DARK
p.space_after = Pt(6)
# Right column
right_box = slide.shapes.add_textbox(Inches(5.0), Inches(1.2), Inches(4.6), Inches(6))
right_frame = right_box.text_frame
right_frame.word_wrap = True
p = right_frame.paragraphs[0]
p.text = right_title
p.font.size = Pt(20)
p.font.bold = True
p.font.color.rgb = ACCENT_BLUE
p.space_after = Pt(8)
for item in right_items:
p = right_frame.add_paragraph()
p.text = item
p.level = 0
p.font.size = Pt(15)
p.font.color.rgb = TEXT_DARK
p.space_after = Pt(6)
return slide
# Slide 1: Title Slide
add_title_slide(
"RAG Capstone Project",
"Retrieval-Augmented Generation Pipeline with Advanced Evaluation"
)
# Slide 2: Project Overview
add_content_slide(
"Project Overview",
[
"🎯 Goal: Build a production-ready RAG system with comprehensive evaluation",
"",
"πŸ“Š Key Components:",
" β€’ Document ingestion from RAGBench datasets (15+ datasets)",
" β€’ Flexible chunking strategies (6 different approaches)",
" β€’ Multiple embedding models (8 different embeddings)",
" β€’ Advanced LLM-based evaluation framework",
" β€’ Real-time monitoring and audit trails",
"",
"πŸ”§ Tech Stack: Python, Streamlit, ChromaDB, Groq LLM API, Sentence Transformers"
]
)
# Slide 3: RAG Pipeline Architecture
add_content_slide(
"RAG Pipeline Architecture",
[
"1️⃣ DATA INGESTION",
" Load documents from 15+ RAGBench datasets (CovidQA, CUAD, FinQA, etc.)",
"",
"2️⃣ DOCUMENT CHUNKING",
" Apply 6 chunking strategies to split documents into manageable pieces",
"",
"3️⃣ EMBEDDING & VECTORIZATION",
" Convert chunks to dense vectors using multiple embedding models",
"",
"4️⃣ VECTOR STORAGE",
" Store in ChromaDB with semantic search capabilities",
"",
"5️⃣ RETRIEVAL & RANKING",
" Retrieve relevant documents based on query similarity",
"",
"6️⃣ RESPONSE GENERATION",
" Use Groq LLM to generate answers grounded in retrieved documents"
]
)
# Slide 4: Chunking Strategies
add_two_column_slide(
"Document Chunking Strategies",
"Chunking Methods",
[
"1. Dense Chunking",
" Fixed-size chunks (512 tokens) with overlap",
" Best for: Uniform content",
"",
"2. Sparse Chunking",
" Semantic boundaries (paragraphs)",
" Best for: Structured documents",
"",
"3. Hybrid Chunking",
" Combines dense + semantic splitting",
" Best for: Mixed content types",
],
"Advanced Methods",
[
"4. Re-ranking Chunking",
" Chunks with relevance re-ranking",
" Best for: High precision retrieval",
"",
"5. Row-based Chunking",
" Preserves data structure for tables",
" Best for: Tabular data",
"",
"6. Entity-based Chunking",
" Groups by semantic entities",
" Best for: Knowledge extraction",
]
)
# Slide 5: Embedding Models
add_content_slide(
"Embedding Models Used",
[
"πŸ”Ή General Purpose Models:",
" β€’ sentence-transformers/all-mpnet-base-v2 (High quality, 768-dim)",
" β€’ sentence-transformers/all-MiniLM-L6-v2 (Fast, lightweight, 384-dim)",
"",
"πŸ”Ή Domain-Specific Models:",
" β€’ emilyalsentzer/Bio_ClinicalBERT (Clinical text, 768-dim)",
" β€’ microsoft/BiomedNLP-PubMedBERT (Medical abstracts, 768-dim)",
" β€’ allenai/specter (Academic papers, 768-dim)",
"",
"πŸ”Ή Multilingual Models:",
" β€’ sentence-transformers/multilingual-MiniLM-L12-v2 (110 languages)",
" β€’ sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2",
"",
"πŸ”Ή API-Based Model:",
" β€’ gemini-embedding-001 (Google Gemini API embeddings)"
]
)
# Slide 6: RAG Evaluation Challenge
add_content_slide(
"The RAG Evaluation Challenge",
[
"❌ Why Traditional Metrics Fail?",
" β€’ BLEU/ROUGE only measure surface-level similarity",
" β€’ Don't evaluate grounding in retrieved documents",
" β€’ Can't detect hallucinations or factual errors",
"",
"βœ… What We Need?",
" β€’ Metrics that measure document relevance to query",
" β€’ Metrics that measure document usage in response",
" β€’ Metrics that evaluate response grounding (no hallucinations)",
" β€’ Metrics that assess completeness of coverage",
"",
"πŸŽ“ Our Solution: LLM-based Evaluation Framework",
" Inspired by RAGBench paper (arXiv:2407.11005)"
]
)
# Slide 7: TRACE Framework
add_content_slide(
"TRACE Framework - 4 Core Metrics",
[
"πŸ”΄ RELEVANCE (R)",
" Fraction of retrieved context relevant to the query",
" Formula: Ξ£ Len(Relevant spans) / Ξ£ Len(All retrieved docs)",
"",
"πŸ”΅ UTILIZATION (T)",
" Fraction of retrieved context used in the response",
" Formula: Ξ£ Len(Used spans) / Ξ£ Len(All retrieved docs)",
"",
"🟒 ADHERENCE (A)",
" Boolean: Is the response fully grounded in documents?",
" Detects hallucinations and unsupported claims",
"",
"🟑 COMPLETENESS (C)",
" Fraction of relevant information covered by response",
" Formula: Len(Relevant ∩ Used) / Len(Relevant)"
]
)
# Slide 8: LLM-Based Evaluation
add_content_slide(
"Advanced LLM-Based Evaluation",
[
"πŸ€– GPT Labeling Approach:",
" β€’ Use LLM (GPT-4/Groq) to annotate response sentences",
" β€’ Match each response sentence to supporting document spans",
" β€’ Detect fully supported, partially supported, and unsupported sentences",
"",
"πŸ“‹ Evaluation Process:",
" 1. Extract all sentences from both response and documents",
" 2. Prompt LLM to identify relevant document sentences for query",
" 3. Prompt LLM to map response sentences to document spans",
" 4. Calculate support metrics at sentence and document level",
"",
"✨ Advantages:",
" βœ“ Semantic understanding (not just keyword matching)",
" βœ“ Detects hallucinations and contradictions",
" βœ“ Provides explainable audit trails",
" βœ“ Works across different domains and languages"
]
)
# Slide 9: Evaluation Output Metrics
add_two_column_slide(
"Evaluation Output & Metrics",
"Per-Response Metrics",
[
"βœ“ Context Relevance (0-1)",
" How much retrieved content is relevant?",
"",
"βœ“ Context Utilization (0-1)",
" How much retrieved content was used?",
"",
"βœ“ Adherence (0-1)",
" Is response grounded in documents?",
"",
"βœ“ Completeness (0-1)",
" Does response cover relevant information?",
],
"Aggregate Metrics",
[
"πŸ“Š RMSE Metrics",
" Root Mean Squared Error for each metric",
"",
"πŸ“ˆ AUC-ROC Metrics",
" Area Under ROC Curve for binary classification",
"",
"🎯 Average Score",
" Mean of all 4 TRACE metrics",
"",
"πŸ“‹ Detailed Audit Trail",
" Sentence-level support information",
]
)
# Slide 10: Audit Trail & Explainability
add_content_slide(
"Explainability & Audit Trails",
[
"πŸ” Detailed Audit Information Captured:",
"",
"βœ“ Original Query",
" User's question or request",
"",
"βœ“ LLM Prompt",
" Exact instructions sent to LLM for evaluation",
"",
"βœ“ LLM Response",
" Complete evaluation reasoning from LLM",
"",
"βœ“ Retrieved Documents",
" Context provided to the RAG system",
"",
"βœ“ Sentence-Level Support Map",
" Which document spans support each response sentence",
"",
"🎯 Enables: Root cause analysis, model improvements, and trust building"
]
)
# Slide 11: System Architecture
add_content_slide(
"System Architecture Overview",
[
"πŸ“± Frontend: Streamlit Web Interface",
" β€’ Interactive configuration panel",
" β€’ Real-time collection management",
" β€’ Chat interface with context display",
" β€’ Evaluation results visualization",
"",
"βš™οΈ Backend: Python Services",
" β€’ Vector store management (ChromaDB with SQLite indexing)",
" β€’ Embedding pipeline with 8 models",
" β€’ LLM integration (Groq API with rate limiting)",
" β€’ Advanced evaluation engine",
"",
"πŸ“š Data Layer: ChromaDB",
" β€’ Persistent vector storage",
" β€’ SQLite metadata indexing",
" β€’ Multi-collection support",
" β€’ 4 active collections from RAGBench"
]
)
# Slide 12: Key Features
add_two_column_slide(
"Key System Features",
"Data Management",
[
"βœ“ 15+ RAGBench datasets",
"βœ“ Flexible chunking strategies",
"βœ“ Multiple embedding models",
"βœ“ Real-time collection loading",
"βœ“ Batch processing capability",
"βœ“ Persistent storage (ChromaDB)",
"βœ“ SQLite metadata indexing",
],
"Evaluation & Monitoring",
[
"βœ“ LLM-based evaluation",
"βœ“ 4 TRACE metrics",
"βœ“ RMSE & AUC metrics",
"βœ“ Sentence-level analysis",
"βœ“ Hallucination detection",
"βœ“ Detailed audit trails",
"βœ“ JSON export & visualization",
]
)
# Slide 13: LLM Configuration
add_content_slide(
"LLM Configuration & Settings",
[
"πŸ”§ Groq LLM Models Supported:",
" β€’ meta-llama/llama-4-maverick-17b-128e-instruct",
" β€’ llama-3.1-8b-instant",
" β€’ openai/gpt-oss-120b",
"",
"βš™οΈ Configurable Parameters:",
" β€’ Temperature: 0.0 (deterministic for evaluation)",
" β€’ Max Tokens: 2048 (sufficient for detailed analysis)",
" β€’ Rate Limit: 30 RPM (Groq API limit)",
" β€’ Rate Limit Delay: 2.0 seconds (throttling)",
"",
"🎯 System Prompt:",
" Specialized fact-checking and citation verification prompt",
" Enables LLM to evaluate without additional fine-tuning"
]
)
# Slide 14: Data Flow Example
add_content_slide(
"Data Flow Example: A Question in RAG",
[
"1️⃣ USER QUERY",
' "What are the COVID-19 vaccine side effects?"',
"",
"2️⃣ RETRIEVAL",
" ChromaDB retrieves top 5 similar chunks from CovidQA dataset",
"",
"3️⃣ CONTEXT PREPARATION",
" Relevant medical documents selected and formatted",
"",
"4️⃣ RESPONSE GENERATION",
" Groq LLM generates answer: 'Common side effects include...'",
"",
"5️⃣ EVALUATION",
" β€’ LLM verifies: Are claims supported by documents?",
" β€’ Calculates: Relevance=0.92, Utilization=0.87, Adherence=1.0, Completeness=0.95",
"",
"6️⃣ OUTPUT",
" JSON with metrics, audit trail, and source documents"
]
)
# Slide 15: Use Cases
add_content_slide(
"Real-World Use Cases",
[
"πŸ“‹ Document Q&A Systems",
" Help desk, knowledge base search, document retrieval",
"",
"πŸ₯ Medical Information Retrieval",
" Clinical decision support, patient education",
"",
"βš–οΈ Legal Document Analysis",
" Contract review, case law research, compliance checking",
"",
"πŸ’° Financial Analysis",
" SEC filing analysis, market research, investment insights",
"",
"πŸŽ“ Academic Research",
" Paper indexing, literature review, citation analysis",
"",
"🏒 Enterprise Knowledge Management",
" Internal document search, policy retrieval, FAQs"
]
)
# Slide 16: Performance & Results
add_content_slide(
"System Performance & Achievements",
[
"βœ… Successfully Processed:",
" β€’ 4 collections from RAGBench datasets",
" β€’ Recovered and re-indexed 4M+ vector embeddings in ChromaDB",
" β€’ 8 different embedding models tested",
" β€’ 6 chunking strategies implemented and evaluated",
"",
"πŸ“Š Evaluation Coverage:",
" β€’ Batch evaluation of 100+ test cases",
" β€’ Per-sentence analysis with GPT labeling",
" β€’ Comprehensive audit trails with LLM reasoning",
"",
"⚑ Performance Metrics:",
" β€’ Sub-second retrieval latency",
" β€’ Batch evaluation: ~2-3 seconds per query (with GPT labeling)",
" β€’ Rate limiting: Controlled via Groq API settings"
]
)
# Slide 17: Technical Innovations
add_content_slide(
"Technical Innovations",
[
"πŸ”Ή Advanced ChromaDB Recovery",
" Smart SQLite index rebuilding preserving all vector data",
"",
"πŸ”Ή Smart Collection Naming",
" Automatic metadata extraction with interactive fallback UI",
"",
"πŸ”Ή Sentence-Level Evaluation",
" Maps individual response sentences to document spans",
"",
"πŸ”Ή Multi-Metric Evaluation",
" RMSE and AUC-ROC metrics alongside TRACE framework",
"",
"πŸ”Ή Explainable AI",
" Complete audit trails showing LLM reasoning for each decision",
"",
"πŸ”Ή Flexible Pipeline",
" Modular design allows easy swapping of chunking, embedding, and LLM components"
]
)
# Slide 18: Challenges & Solutions
add_two_column_slide(
"Challenges & Solutions",
"Challenges Faced",
[
"πŸ”΄ ChromaDB Index Corruption",
" Collection folders orphaned from SQLite",
"",
"πŸ”΄ Evaluation Consistency",
" Different chunking strategies vary in effectiveness",
"",
"πŸ”΄ Rate Limiting",
" Groq API has strict RPM limits",
"",
"πŸ”΄ Hallucination Detection",
" Hard to detect factual errors without reference",
"",
"πŸ”΄ Scalability",
" Large batch evaluations take time",
],
"Solutions Implemented",
[
"βœ… Data-Preserving Recovery",
" Direct SQLite rebuild scripts",
"",
"βœ… Comprehensive Testing",
" Baseline metrics for different strategies",
"",
"βœ… Intelligent Queuing",
" Configurable rate limit delays",
"",
"βœ… LLM Verification",
" Adherence metric detects unsupported claims",
"",
"βœ… Batch Processing",
" Parallel processing where possible",
]
)
# Slide 19: Future Roadmap
add_content_slide(
"Future Development Roadmap",
[
"πŸš€ Phase 2: Production Enhancements",
" β€’ Distributed processing for large-scale evaluation",
" β€’ Caching layer for frequently accessed documents",
" β€’ Real-time monitoring dashboard",
"",
"πŸš€ Phase 3: Advanced Features",
" β€’ Multimodal RAG (images, tables, PDFs)",
" β€’ Knowledge graph integration",
" β€’ Cross-domain transfer learning",
"",
"πŸš€ Phase 4: Enterprise Features",
" β€’ Multi-tenant support",
" β€’ Fine-tuned models for specific domains",
" β€’ Advanced security and compliance",
"",
"πŸš€ Phase 5: Research Contributions",
" β€’ Publication of benchmark results",
" β€’ Open-source evaluation framework",
" β€’ Industry collaboration"
]
)
# Slide 20: Conclusion
add_title_slide(
"Key Takeaways",
"Advanced RAG with Comprehensive Evaluation"
)
# Add content to conclusion
slide = prs.slides[-1]
text_box = slide.shapes.add_textbox(Inches(1), Inches(2.5), Inches(8), Inches(4))
text_frame = text_box.text_frame
points = [
"βœ“ Complete RAG pipeline from ingestion to evaluation",
"βœ“ Flexible architecture supporting multiple chunking and embedding strategies",
"βœ“ LLM-based evaluation with sentence-level grounding verification",
"βœ“ Explainable AI with comprehensive audit trails",
"βœ“ Production-ready implementation with real data (RAGBench datasets)",
"βœ“ Addresses critical RAG evaluation challenges",
]
for i, point in enumerate(points):
if i == 0:
p = text_frame.paragraphs[0]
else:
p = text_frame.add_paragraph()
p.text = point
p.font.size = Pt(20)
p.font.color.rgb = TEXT_DARK
p.space_before = Pt(12)
p.space_after = Pt(12)
# Save presentation
output_file = "RAG_Capstone_Project_Presentation.pptx"
prs.save(output_file)
print(f"βœ… Presentation created successfully: {output_file}")
print(f"πŸ“Š Total slides: {len(prs.slides)}")
print(f"πŸ’Ύ File size: {len(open(output_file, 'rb').read()) / 1024:.2f} KB")
if __name__ == "__main__":
create_presentation()