"""Create a comprehensive PowerPoint presentation for RAG Capstone Project.""" from pptx import Presentation from pptx.util import Inches, Pt from pptx.enum.text import PP_ALIGN from pptx.dml.color import RGBColor from datetime import datetime def create_presentation(): """Create the RAG Capstone Project presentation.""" prs = Presentation() prs.slide_width = Inches(10) prs.slide_height = Inches(7.5) # Define color scheme DARK_BLUE = RGBColor(25, 55, 109) ACCENT_BLUE = RGBColor(0, 120, 215) LIGHT_GRAY = RGBColor(240, 240, 240) TEXT_DARK = RGBColor(33, 33, 33) def add_title_slide(title, subtitle=""): """Add a title slide.""" slide = prs.slides.add_slide(prs.slide_layouts[6]) # Blank layout background = slide.background fill = background.fill fill.solid() fill.fore_color.rgb = DARK_BLUE # Title title_box = slide.shapes.add_textbox(Inches(0.5), Inches(2.5), Inches(9), Inches(1.5)) title_frame = title_box.text_frame title_frame.word_wrap = True p = title_frame.paragraphs[0] p.text = title p.font.size = Pt(54) p.font.bold = True p.font.color.rgb = RGBColor(255, 255, 255) p.alignment = PP_ALIGN.CENTER # Subtitle if subtitle: subtitle_box = slide.shapes.add_textbox(Inches(0.5), Inches(4.2), Inches(9), Inches(1)) subtitle_frame = subtitle_box.text_frame p = subtitle_frame.paragraphs[0] p.text = subtitle p.font.size = Pt(28) p.font.color.rgb = ACCENT_BLUE p.alignment = PP_ALIGN.CENTER return slide def add_content_slide(title, content_items): """Add a content slide with bullet points.""" slide = prs.slides.add_slide(prs.slide_layouts[6]) background = slide.background fill = background.fill fill.solid() fill.fore_color.rgb = RGBColor(255, 255, 255) # Title bar title_shape = slide.shapes.add_shape(1, Inches(0), Inches(0), Inches(10), Inches(0.8)) title_shape.fill.solid() title_shape.fill.fore_color.rgb = DARK_BLUE title_shape.line.color.rgb = DARK_BLUE # Title text title_frame = title_shape.text_frame p = title_frame.paragraphs[0] p.text = title p.font.size = Pt(40) p.font.bold = True p.font.color.rgb = RGBColor(255, 255, 255) p.space_before = Pt(8) p.space_after = Pt(8) # Content text_box = slide.shapes.add_textbox(Inches(0.7), Inches(1.2), Inches(8.6), Inches(6)) text_frame = text_box.text_frame text_frame.word_wrap = True for i, item in enumerate(content_items): if i > 0: p = text_frame.add_paragraph() else: p = text_frame.paragraphs[0] p.text = item p.level = 0 p.font.size = Pt(18) p.font.color.rgb = TEXT_DARK p.space_before = Pt(6) p.space_after = Pt(6) return slide def add_two_column_slide(title, left_title, left_items, right_title, right_items): """Add a two-column content slide.""" slide = prs.slides.add_slide(prs.slide_layouts[6]) background = slide.background fill = background.fill fill.solid() fill.fore_color.rgb = RGBColor(255, 255, 255) # Title bar title_shape = slide.shapes.add_shape(1, Inches(0), Inches(0), Inches(10), Inches(0.8)) title_shape.fill.solid() title_shape.fill.fore_color.rgb = DARK_BLUE title_shape.line.color.rgb = DARK_BLUE title_frame = title_shape.text_frame p = title_frame.paragraphs[0] p.text = title p.font.size = Pt(40) p.font.bold = True p.font.color.rgb = RGBColor(255, 255, 255) p.space_before = Pt(8) p.space_after = Pt(8) # Left column left_box = slide.shapes.add_textbox(Inches(0.4), Inches(1.2), Inches(4.6), Inches(6)) left_frame = left_box.text_frame left_frame.word_wrap = True p = left_frame.paragraphs[0] p.text = left_title p.font.size = Pt(20) p.font.bold = True p.font.color.rgb = ACCENT_BLUE p.space_after = Pt(8) for item in left_items: p = left_frame.add_paragraph() p.text = item p.level = 0 p.font.size = Pt(15) p.font.color.rgb = TEXT_DARK p.space_after = Pt(6) # Right column right_box = slide.shapes.add_textbox(Inches(5.0), Inches(1.2), Inches(4.6), Inches(6)) right_frame = right_box.text_frame right_frame.word_wrap = True p = right_frame.paragraphs[0] p.text = right_title p.font.size = Pt(20) p.font.bold = True p.font.color.rgb = ACCENT_BLUE p.space_after = Pt(8) for item in right_items: p = right_frame.add_paragraph() p.text = item p.level = 0 p.font.size = Pt(15) p.font.color.rgb = TEXT_DARK p.space_after = Pt(6) return slide # Slide 1: Title Slide add_title_slide( "RAG Capstone Project", "Retrieval-Augmented Generation Pipeline with Advanced Evaluation" ) # Slide 2: Project Overview add_content_slide( "Project Overview", [ "🎯 Goal: Build a production-ready RAG system with comprehensive evaluation", "", "📊 Key Components:", " • Document ingestion from RAGBench datasets (15+ datasets)", " • Flexible chunking strategies (6 different approaches)", " • Multiple embedding models (8 different embeddings)", " • Advanced LLM-based evaluation framework", " • Real-time monitoring and audit trails", "", "🔧 Tech Stack: Python, Streamlit, ChromaDB, Groq LLM API, Sentence Transformers" ] ) # Slide 3: RAG Pipeline Architecture add_content_slide( "RAG Pipeline Architecture", [ "1️⃣ DATA INGESTION", " Load documents from 15+ RAGBench datasets (CovidQA, CUAD, FinQA, etc.)", "", "2️⃣ DOCUMENT CHUNKING", " Apply 6 chunking strategies to split documents into manageable pieces", "", "3️⃣ EMBEDDING & VECTORIZATION", " Convert chunks to dense vectors using multiple embedding models", "", "4️⃣ VECTOR STORAGE", " Store in ChromaDB with semantic search capabilities", "", "5️⃣ RETRIEVAL & RANKING", " Retrieve relevant documents based on query similarity", "", "6️⃣ RESPONSE GENERATION", " Use Groq LLM to generate answers grounded in retrieved documents" ] ) # Slide 4: Chunking Strategies add_two_column_slide( "Document Chunking Strategies", "Chunking Methods", [ "1. Dense Chunking", " Fixed-size chunks (512 tokens) with overlap", " Best for: Uniform content", "", "2. Sparse Chunking", " Semantic boundaries (paragraphs)", " Best for: Structured documents", "", "3. Hybrid Chunking", " Combines dense + semantic splitting", " Best for: Mixed content types", ], "Advanced Methods", [ "4. Re-ranking Chunking", " Chunks with relevance re-ranking", " Best for: High precision retrieval", "", "5. Row-based Chunking", " Preserves data structure for tables", " Best for: Tabular data", "", "6. Entity-based Chunking", " Groups by semantic entities", " Best for: Knowledge extraction", ] ) # Slide 5: Embedding Models add_content_slide( "Embedding Models Used", [ "🔹 General Purpose Models:", " • sentence-transformers/all-mpnet-base-v2 (High quality, 768-dim)", " • sentence-transformers/all-MiniLM-L6-v2 (Fast, lightweight, 384-dim)", "", "🔹 Domain-Specific Models:", " • emilyalsentzer/Bio_ClinicalBERT (Clinical text, 768-dim)", " • microsoft/BiomedNLP-PubMedBERT (Medical abstracts, 768-dim)", " • allenai/specter (Academic papers, 768-dim)", "", "🔹 Multilingual Models:", " • sentence-transformers/multilingual-MiniLM-L12-v2 (110 languages)", " • sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2", "", "🔹 API-Based Model:", " • gemini-embedding-001 (Google Gemini API embeddings)" ] ) # Slide 6: RAG Evaluation Challenge add_content_slide( "The RAG Evaluation Challenge", [ "❌ Why Traditional Metrics Fail?", " • BLEU/ROUGE only measure surface-level similarity", " • Don't evaluate grounding in retrieved documents", " • Can't detect hallucinations or factual errors", "", "✅ What We Need?", " • Metrics that measure document relevance to query", " • Metrics that measure document usage in response", " • Metrics that evaluate response grounding (no hallucinations)", " • Metrics that assess completeness of coverage", "", "🎓 Our Solution: LLM-based Evaluation Framework", " Inspired by RAGBench paper (arXiv:2407.11005)" ] ) # Slide 7: TRACE Framework add_content_slide( "TRACE Framework - 4 Core Metrics", [ "🔴 RELEVANCE (R)", " Fraction of retrieved context relevant to the query", " Formula: Σ Len(Relevant spans) / Σ Len(All retrieved docs)", "", "🔵 UTILIZATION (T)", " Fraction of retrieved context used in the response", " Formula: Σ Len(Used spans) / Σ Len(All retrieved docs)", "", "🟢 ADHERENCE (A)", " Boolean: Is the response fully grounded in documents?", " Detects hallucinations and unsupported claims", "", "🟡 COMPLETENESS (C)", " Fraction of relevant information covered by response", " Formula: Len(Relevant ∩ Used) / Len(Relevant)" ] ) # Slide 8: LLM-Based Evaluation add_content_slide( "Advanced LLM-Based Evaluation", [ "🤖 GPT Labeling Approach:", " • Use LLM (GPT-4/Groq) to annotate response sentences", " • Match each response sentence to supporting document spans", " • Detect fully supported, partially supported, and unsupported sentences", "", "📋 Evaluation Process:", " 1. Extract all sentences from both response and documents", " 2. Prompt LLM to identify relevant document sentences for query", " 3. Prompt LLM to map response sentences to document spans", " 4. Calculate support metrics at sentence and document level", "", "✨ Advantages:", " ✓ Semantic understanding (not just keyword matching)", " ✓ Detects hallucinations and contradictions", " ✓ Provides explainable audit trails", " ✓ Works across different domains and languages" ] ) # Slide 9: Evaluation Output Metrics add_two_column_slide( "Evaluation Output & Metrics", "Per-Response Metrics", [ "✓ Context Relevance (0-1)", " How much retrieved content is relevant?", "", "✓ Context Utilization (0-1)", " How much retrieved content was used?", "", "✓ Adherence (0-1)", " Is response grounded in documents?", "", "✓ Completeness (0-1)", " Does response cover relevant information?", ], "Aggregate Metrics", [ "📊 RMSE Metrics", " Root Mean Squared Error for each metric", "", "📈 AUC-ROC Metrics", " Area Under ROC Curve for binary classification", "", "🎯 Average Score", " Mean of all 4 TRACE metrics", "", "📋 Detailed Audit Trail", " Sentence-level support information", ] ) # Slide 10: Audit Trail & Explainability add_content_slide( "Explainability & Audit Trails", [ "🔍 Detailed Audit Information Captured:", "", "✓ Original Query", " User's question or request", "", "✓ LLM Prompt", " Exact instructions sent to LLM for evaluation", "", "✓ LLM Response", " Complete evaluation reasoning from LLM", "", "✓ Retrieved Documents", " Context provided to the RAG system", "", "✓ Sentence-Level Support Map", " Which document spans support each response sentence", "", "🎯 Enables: Root cause analysis, model improvements, and trust building" ] ) # Slide 11: System Architecture add_content_slide( "System Architecture Overview", [ "📱 Frontend: Streamlit Web Interface", " • Interactive configuration panel", " • Real-time collection management", " • Chat interface with context display", " • Evaluation results visualization", "", "⚙️ Backend: Python Services", " • Vector store management (ChromaDB with SQLite indexing)", " • Embedding pipeline with 8 models", " • LLM integration (Groq API with rate limiting)", " • Advanced evaluation engine", "", "📚 Data Layer: ChromaDB", " • Persistent vector storage", " • SQLite metadata indexing", " • Multi-collection support", " • 4 active collections from RAGBench" ] ) # Slide 12: Key Features add_two_column_slide( "Key System Features", "Data Management", [ "✓ 15+ RAGBench datasets", "✓ Flexible chunking strategies", "✓ Multiple embedding models", "✓ Real-time collection loading", "✓ Batch processing capability", "✓ Persistent storage (ChromaDB)", "✓ SQLite metadata indexing", ], "Evaluation & Monitoring", [ "✓ LLM-based evaluation", "✓ 4 TRACE metrics", "✓ RMSE & AUC metrics", "✓ Sentence-level analysis", "✓ Hallucination detection", "✓ Detailed audit trails", "✓ JSON export & visualization", ] ) # Slide 13: LLM Configuration add_content_slide( "LLM Configuration & Settings", [ "🔧 Groq LLM Models Supported:", " • meta-llama/llama-4-maverick-17b-128e-instruct", " • llama-3.1-8b-instant", " • openai/gpt-oss-120b", "", "⚙️ Configurable Parameters:", " • Temperature: 0.0 (deterministic for evaluation)", " • Max Tokens: 2048 (sufficient for detailed analysis)", " • Rate Limit: 30 RPM (Groq API limit)", " • Rate Limit Delay: 2.0 seconds (throttling)", "", "🎯 System Prompt:", " Specialized fact-checking and citation verification prompt", " Enables LLM to evaluate without additional fine-tuning" ] ) # Slide 14: Data Flow Example add_content_slide( "Data Flow Example: A Question in RAG", [ "1️⃣ USER QUERY", ' "What are the COVID-19 vaccine side effects?"', "", "2️⃣ RETRIEVAL", " ChromaDB retrieves top 5 similar chunks from CovidQA dataset", "", "3️⃣ CONTEXT PREPARATION", " Relevant medical documents selected and formatted", "", "4️⃣ RESPONSE GENERATION", " Groq LLM generates answer: 'Common side effects include...'", "", "5️⃣ EVALUATION", " • LLM verifies: Are claims supported by documents?", " • Calculates: Relevance=0.92, Utilization=0.87, Adherence=1.0, Completeness=0.95", "", "6️⃣ OUTPUT", " JSON with metrics, audit trail, and source documents" ] ) # Slide 15: Use Cases add_content_slide( "Real-World Use Cases", [ "📋 Document Q&A Systems", " Help desk, knowledge base search, document retrieval", "", "🏥 Medical Information Retrieval", " Clinical decision support, patient education", "", "⚖️ Legal Document Analysis", " Contract review, case law research, compliance checking", "", "💰 Financial Analysis", " SEC filing analysis, market research, investment insights", "", "🎓 Academic Research", " Paper indexing, literature review, citation analysis", "", "🏢 Enterprise Knowledge Management", " Internal document search, policy retrieval, FAQs" ] ) # Slide 16: Performance & Results add_content_slide( "System Performance & Achievements", [ "✅ Successfully Processed:", " • 4 collections from RAGBench datasets", " • Recovered and re-indexed 4M+ vector embeddings in ChromaDB", " • 8 different embedding models tested", " • 6 chunking strategies implemented and evaluated", "", "📊 Evaluation Coverage:", " • Batch evaluation of 100+ test cases", " • Per-sentence analysis with GPT labeling", " • Comprehensive audit trails with LLM reasoning", "", "⚡ Performance Metrics:", " • Sub-second retrieval latency", " • Batch evaluation: ~2-3 seconds per query (with GPT labeling)", " • Rate limiting: Controlled via Groq API settings" ] ) # Slide 17: Technical Innovations add_content_slide( "Technical Innovations", [ "🔹 Advanced ChromaDB Recovery", " Smart SQLite index rebuilding preserving all vector data", "", "🔹 Smart Collection Naming", " Automatic metadata extraction with interactive fallback UI", "", "🔹 Sentence-Level Evaluation", " Maps individual response sentences to document spans", "", "🔹 Multi-Metric Evaluation", " RMSE and AUC-ROC metrics alongside TRACE framework", "", "🔹 Explainable AI", " Complete audit trails showing LLM reasoning for each decision", "", "🔹 Flexible Pipeline", " Modular design allows easy swapping of chunking, embedding, and LLM components" ] ) # Slide 18: Challenges & Solutions add_two_column_slide( "Challenges & Solutions", "Challenges Faced", [ "🔴 ChromaDB Index Corruption", " Collection folders orphaned from SQLite", "", "🔴 Evaluation Consistency", " Different chunking strategies vary in effectiveness", "", "🔴 Rate Limiting", " Groq API has strict RPM limits", "", "🔴 Hallucination Detection", " Hard to detect factual errors without reference", "", "🔴 Scalability", " Large batch evaluations take time", ], "Solutions Implemented", [ "✅ Data-Preserving Recovery", " Direct SQLite rebuild scripts", "", "✅ Comprehensive Testing", " Baseline metrics for different strategies", "", "✅ Intelligent Queuing", " Configurable rate limit delays", "", "✅ LLM Verification", " Adherence metric detects unsupported claims", "", "✅ Batch Processing", " Parallel processing where possible", ] ) # Slide 19: Future Roadmap add_content_slide( "Future Development Roadmap", [ "🚀 Phase 2: Production Enhancements", " • Distributed processing for large-scale evaluation", " • Caching layer for frequently accessed documents", " • Real-time monitoring dashboard", "", "🚀 Phase 3: Advanced Features", " • Multimodal RAG (images, tables, PDFs)", " • Knowledge graph integration", " • Cross-domain transfer learning", "", "🚀 Phase 4: Enterprise Features", " • Multi-tenant support", " • Fine-tuned models for specific domains", " • Advanced security and compliance", "", "🚀 Phase 5: Research Contributions", " • Publication of benchmark results", " • Open-source evaluation framework", " • Industry collaboration" ] ) # Slide 20: Conclusion add_title_slide( "Key Takeaways", "Advanced RAG with Comprehensive Evaluation" ) # Add content to conclusion slide = prs.slides[-1] text_box = slide.shapes.add_textbox(Inches(1), Inches(2.5), Inches(8), Inches(4)) text_frame = text_box.text_frame points = [ "✓ Complete RAG pipeline from ingestion to evaluation", "✓ Flexible architecture supporting multiple chunking and embedding strategies", "✓ LLM-based evaluation with sentence-level grounding verification", "✓ Explainable AI with comprehensive audit trails", "✓ Production-ready implementation with real data (RAGBench datasets)", "✓ Addresses critical RAG evaluation challenges", ] for i, point in enumerate(points): if i == 0: p = text_frame.paragraphs[0] else: p = text_frame.add_paragraph() p.text = point p.font.size = Pt(20) p.font.color.rgb = TEXT_DARK p.space_before = Pt(12) p.space_after = Pt(12) # Save presentation output_file = "RAG_Capstone_Project_Presentation.pptx" prs.save(output_file) print(f"✅ Presentation created successfully: {output_file}") print(f"📊 Total slides: {len(prs.slides)}") print(f"💾 File size: {len(open(output_file, 'rb').read()) / 1024:.2f} KB") if __name__ == "__main__": create_presentation()