Spaces:

gopikrishnait
/

CapStoneRAG10

Sleeping

File size: 24,276 Bytes

1d10b0a

"""Create a comprehensive PowerPoint presentation for RAG Capstone Project."""
from pptx import Presentation
from pptx.util import Inches, Pt
from pptx.enum.text import PP_ALIGN
from pptx.dml.color import RGBColor
from datetime import datetime


def create_presentation():
    """Create the RAG Capstone Project presentation."""
    prs = Presentation()
    prs.slide_width = Inches(10)
    prs.slide_height = Inches(7.5)
    
    # Define color scheme
    DARK_BLUE = RGBColor(25, 55, 109)
    ACCENT_BLUE = RGBColor(0, 120, 215)
    LIGHT_GRAY = RGBColor(240, 240, 240)
    TEXT_DARK = RGBColor(33, 33, 33)
    
    def add_title_slide(title, subtitle=""):
        """Add a title slide."""
        slide = prs.slides.add_slide(prs.slide_layouts[6])  # Blank layout
        background = slide.background
        fill = background.fill
        fill.solid()
        fill.fore_color.rgb = DARK_BLUE
        
        # Title
        title_box = slide.shapes.add_textbox(Inches(0.5), Inches(2.5), Inches(9), Inches(1.5))
        title_frame = title_box.text_frame
        title_frame.word_wrap = True
        p = title_frame.paragraphs[0]
        p.text = title
        p.font.size = Pt(54)
        p.font.bold = True
        p.font.color.rgb = RGBColor(255, 255, 255)
        p.alignment = PP_ALIGN.CENTER
        
        # Subtitle
        if subtitle:
            subtitle_box = slide.shapes.add_textbox(Inches(0.5), Inches(4.2), Inches(9), Inches(1))
            subtitle_frame = subtitle_box.text_frame
            p = subtitle_frame.paragraphs[0]
            p.text = subtitle
            p.font.size = Pt(28)
            p.font.color.rgb = ACCENT_BLUE
            p.alignment = PP_ALIGN.CENTER
        
        return slide
    
    def add_content_slide(title, content_items):
        """Add a content slide with bullet points."""
        slide = prs.slides.add_slide(prs.slide_layouts[6])
        background = slide.background
        fill = background.fill
        fill.solid()
        fill.fore_color.rgb = RGBColor(255, 255, 255)
        
        # Title bar
        title_shape = slide.shapes.add_shape(1, Inches(0), Inches(0), Inches(10), Inches(0.8))
        title_shape.fill.solid()
        title_shape.fill.fore_color.rgb = DARK_BLUE
        title_shape.line.color.rgb = DARK_BLUE
        
        # Title text
        title_frame = title_shape.text_frame
        p = title_frame.paragraphs[0]
        p.text = title
        p.font.size = Pt(40)
        p.font.bold = True
        p.font.color.rgb = RGBColor(255, 255, 255)
        p.space_before = Pt(8)
        p.space_after = Pt(8)
        
        # Content
        text_box = slide.shapes.add_textbox(Inches(0.7), Inches(1.2), Inches(8.6), Inches(6))
        text_frame = text_box.text_frame
        text_frame.word_wrap = True
        
        for i, item in enumerate(content_items):
            if i > 0:
                p = text_frame.add_paragraph()
            else:
                p = text_frame.paragraphs[0]
            
            p.text = item
            p.level = 0
            p.font.size = Pt(18)
            p.font.color.rgb = TEXT_DARK
            p.space_before = Pt(6)
            p.space_after = Pt(6)
        
        return slide
    
    def add_two_column_slide(title, left_title, left_items, right_title, right_items):
        """Add a two-column content slide."""
        slide = prs.slides.add_slide(prs.slide_layouts[6])
        background = slide.background
        fill = background.fill
        fill.solid()
        fill.fore_color.rgb = RGBColor(255, 255, 255)
        
        # Title bar
        title_shape = slide.shapes.add_shape(1, Inches(0), Inches(0), Inches(10), Inches(0.8))
        title_shape.fill.solid()
        title_shape.fill.fore_color.rgb = DARK_BLUE
        title_shape.line.color.rgb = DARK_BLUE
        
        title_frame = title_shape.text_frame
        p = title_frame.paragraphs[0]
        p.text = title
        p.font.size = Pt(40)
        p.font.bold = True
        p.font.color.rgb = RGBColor(255, 255, 255)
        p.space_before = Pt(8)
        p.space_after = Pt(8)
        
        # Left column
        left_box = slide.shapes.add_textbox(Inches(0.4), Inches(1.2), Inches(4.6), Inches(6))
        left_frame = left_box.text_frame
        left_frame.word_wrap = True
        
        p = left_frame.paragraphs[0]
        p.text = left_title
        p.font.size = Pt(20)
        p.font.bold = True
        p.font.color.rgb = ACCENT_BLUE
        p.space_after = Pt(8)
        
        for item in left_items:
            p = left_frame.add_paragraph()
            p.text = item
            p.level = 0
            p.font.size = Pt(15)
            p.font.color.rgb = TEXT_DARK
            p.space_after = Pt(6)
        
        # Right column
        right_box = slide.shapes.add_textbox(Inches(5.0), Inches(1.2), Inches(4.6), Inches(6))
        right_frame = right_box.text_frame
        right_frame.word_wrap = True
        
        p = right_frame.paragraphs[0]
        p.text = right_title
        p.font.size = Pt(20)
        p.font.bold = True
        p.font.color.rgb = ACCENT_BLUE
        p.space_after = Pt(8)
        
        for item in right_items:
            p = right_frame.add_paragraph()
            p.text = item
            p.level = 0
            p.font.size = Pt(15)
            p.font.color.rgb = TEXT_DARK
            p.space_after = Pt(6)
        
        return slide
    
    # Slide 1: Title Slide
    add_title_slide(
        "RAG Capstone Project",
        "Retrieval-Augmented Generation Pipeline with Advanced Evaluation"
    )
    
    # Slide 2: Project Overview
    add_content_slide(
        "Project Overview",
        [
            "🎯 Goal: Build a production-ready RAG system with comprehensive evaluation",
            "",
            "📊 Key Components:",
            "  • Document ingestion from RAGBench datasets (15+ datasets)",
            "  • Flexible chunking strategies (6 different approaches)",
            "  • Multiple embedding models (8 different embeddings)",
            "  • Advanced LLM-based evaluation framework",
            "  • Real-time monitoring and audit trails",
            "",
            "🔧 Tech Stack: Python, Streamlit, ChromaDB, Groq LLM API, Sentence Transformers"
        ]
    )
    
    # Slide 3: RAG Pipeline Architecture
    add_content_slide(
        "RAG Pipeline Architecture",
        [
            "1️⃣ DATA INGESTION",
            "   Load documents from 15+ RAGBench datasets (CovidQA, CUAD, FinQA, etc.)",
            "",
            "2️⃣ DOCUMENT CHUNKING",
            "   Apply 6 chunking strategies to split documents into manageable pieces",
            "",
            "3️⃣ EMBEDDING & VECTORIZATION",
            "   Convert chunks to dense vectors using multiple embedding models",
            "",
            "4️⃣ VECTOR STORAGE",
            "   Store in ChromaDB with semantic search capabilities",
            "",
            "5️⃣ RETRIEVAL & RANKING",
            "   Retrieve relevant documents based on query similarity",
            "",
            "6️⃣ RESPONSE GENERATION",
            "   Use Groq LLM to generate answers grounded in retrieved documents"
        ]
    )
    
    # Slide 4: Chunking Strategies
    add_two_column_slide(
        "Document Chunking Strategies",
        "Chunking Methods",
        [
            "1. Dense Chunking",
            "   Fixed-size chunks (512 tokens) with overlap",
            "   Best for: Uniform content",
            "",
            "2. Sparse Chunking",
            "   Semantic boundaries (paragraphs)",
            "   Best for: Structured documents",
            "",
            "3. Hybrid Chunking",
            "   Combines dense + semantic splitting",
            "   Best for: Mixed content types",
        ],
        "Advanced Methods",
        [
            "4. Re-ranking Chunking",
            "   Chunks with relevance re-ranking",
            "   Best for: High precision retrieval",
            "",
            "5. Row-based Chunking",
            "   Preserves data structure for tables",
            "   Best for: Tabular data",
            "",
            "6. Entity-based Chunking",
            "   Groups by semantic entities",
            "   Best for: Knowledge extraction",
        ]
    )
    
    # Slide 5: Embedding Models
    add_content_slide(
        "Embedding Models Used",
        [
            "🔹 General Purpose Models:",
            "   • sentence-transformers/all-mpnet-base-v2 (High quality, 768-dim)",
            "   • sentence-transformers/all-MiniLM-L6-v2 (Fast, lightweight, 384-dim)",
            "",
            "🔹 Domain-Specific Models:",
            "   • emilyalsentzer/Bio_ClinicalBERT (Clinical text, 768-dim)",
            "   • microsoft/BiomedNLP-PubMedBERT (Medical abstracts, 768-dim)",
            "   • allenai/specter (Academic papers, 768-dim)",
            "",
            "🔹 Multilingual Models:",
            "   • sentence-transformers/multilingual-MiniLM-L12-v2 (110 languages)",
            "   • sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2",
            "",
            "🔹 API-Based Model:",
            "   • gemini-embedding-001 (Google Gemini API embeddings)"
        ]
    )
    
    # Slide 6: RAG Evaluation Challenge
    add_content_slide(
        "The RAG Evaluation Challenge",
        [
            "❌ Why Traditional Metrics Fail?",
            "   • BLEU/ROUGE only measure surface-level similarity",
            "   • Don't evaluate grounding in retrieved documents",
            "   • Can't detect hallucinations or factual errors",
            "",
            "✅ What We Need?",
            "   • Metrics that measure document relevance to query",
            "   • Metrics that measure document usage in response",
            "   • Metrics that evaluate response grounding (no hallucinations)",
            "   • Metrics that assess completeness of coverage",
            "",
            "🎓 Our Solution: LLM-based Evaluation Framework",
            "   Inspired by RAGBench paper (arXiv:2407.11005)"
        ]
    )
    
    # Slide 7: TRACE Framework
    add_content_slide(
        "TRACE Framework - 4 Core Metrics",
        [
            "🔴 RELEVANCE (R)",
            "   Fraction of retrieved context relevant to the query",
            "   Formula: Σ Len(Relevant spans) / Σ Len(All retrieved docs)",
            "",
            "🔵 UTILIZATION (T)",
            "   Fraction of retrieved context used in the response",
            "   Formula: Σ Len(Used spans) / Σ Len(All retrieved docs)",
            "",
            "🟢 ADHERENCE (A)",
            "   Boolean: Is the response fully grounded in documents?",
            "   Detects hallucinations and unsupported claims",
            "",
            "🟡 COMPLETENESS (C)",
            "   Fraction of relevant information covered by response",
            "   Formula: Len(Relevant ∩ Used) / Len(Relevant)"
        ]
    )
    
    # Slide 8: LLM-Based Evaluation
    add_content_slide(
        "Advanced LLM-Based Evaluation",
        [
            "🤖 GPT Labeling Approach:",
            "   • Use LLM (GPT-4/Groq) to annotate response sentences",
            "   • Match each response sentence to supporting document spans",
            "   • Detect fully supported, partially supported, and unsupported sentences",
            "",
            "📋 Evaluation Process:",
            "   1. Extract all sentences from both response and documents",
            "   2. Prompt LLM to identify relevant document sentences for query",
            "   3. Prompt LLM to map response sentences to document spans",
            "   4. Calculate support metrics at sentence and document level",
            "",
            "✨ Advantages:",
            "   ✓ Semantic understanding (not just keyword matching)",
            "   ✓ Detects hallucinations and contradictions",
            "   ✓ Provides explainable audit trails",
            "   ✓ Works across different domains and languages"
        ]
    )
    
    # Slide 9: Evaluation Output Metrics
    add_two_column_slide(
        "Evaluation Output & Metrics",
        "Per-Response Metrics",
        [
            "✓ Context Relevance (0-1)",
            "   How much retrieved content is relevant?",
            "",
            "✓ Context Utilization (0-1)",
            "   How much retrieved content was used?",
            "",
            "✓ Adherence (0-1)",
            "   Is response grounded in documents?",
            "",
            "✓ Completeness (0-1)",
            "   Does response cover relevant information?",
        ],
        "Aggregate Metrics",
        [
            "📊 RMSE Metrics",
            "   Root Mean Squared Error for each metric",
            "",
            "📈 AUC-ROC Metrics",
            "   Area Under ROC Curve for binary classification",
            "",
            "🎯 Average Score",
            "   Mean of all 4 TRACE metrics",
            "",
            "📋 Detailed Audit Trail",
            "   Sentence-level support information",
        ]
    )
    
    # Slide 10: Audit Trail & Explainability
    add_content_slide(
        "Explainability & Audit Trails",
        [
            "🔍 Detailed Audit Information Captured:",
            "",
            "✓ Original Query",
            "   User's question or request",
            "",
            "✓ LLM Prompt",
            "   Exact instructions sent to LLM for evaluation",
            "",
            "✓ LLM Response",
            "   Complete evaluation reasoning from LLM",
            "",
            "✓ Retrieved Documents",
            "   Context provided to the RAG system",
            "",
            "✓ Sentence-Level Support Map",
            "   Which document spans support each response sentence",
            "",
            "🎯 Enables: Root cause analysis, model improvements, and trust building"
        ]
    )
    
    # Slide 11: System Architecture
    add_content_slide(
        "System Architecture Overview",
        [
            "📱 Frontend: Streamlit Web Interface",
            "   • Interactive configuration panel",
            "   • Real-time collection management",
            "   • Chat interface with context display",
            "   • Evaluation results visualization",
            "",
            "⚙️ Backend: Python Services",
            "   • Vector store management (ChromaDB with SQLite indexing)",
            "   • Embedding pipeline with 8 models",
            "   • LLM integration (Groq API with rate limiting)",
            "   • Advanced evaluation engine",
            "",
            "📚 Data Layer: ChromaDB",
            "   • Persistent vector storage",
            "   • SQLite metadata indexing",
            "   • Multi-collection support",
            "   • 4 active collections from RAGBench"
        ]
    )
    
    # Slide 12: Key Features
    add_two_column_slide(
        "Key System Features",
        "Data Management",
        [
            "✓ 15+ RAGBench datasets",
            "✓ Flexible chunking strategies",
            "✓ Multiple embedding models",
            "✓ Real-time collection loading",
            "✓ Batch processing capability",
            "✓ Persistent storage (ChromaDB)",
            "✓ SQLite metadata indexing",
        ],
        "Evaluation & Monitoring",
        [
            "✓ LLM-based evaluation",
            "✓ 4 TRACE metrics",
            "✓ RMSE & AUC metrics",
            "✓ Sentence-level analysis",
            "✓ Hallucination detection",
            "✓ Detailed audit trails",
            "✓ JSON export & visualization",
        ]
    )
    
    # Slide 13: LLM Configuration
    add_content_slide(
        "LLM Configuration & Settings",
        [
            "🔧 Groq LLM Models Supported:",
            "   • meta-llama/llama-4-maverick-17b-128e-instruct",
            "   • llama-3.1-8b-instant",
            "   • openai/gpt-oss-120b",
            "",
            "⚙️ Configurable Parameters:",
            "   • Temperature: 0.0 (deterministic for evaluation)",
            "   • Max Tokens: 2048 (sufficient for detailed analysis)",
            "   • Rate Limit: 30 RPM (Groq API limit)",
            "   • Rate Limit Delay: 2.0 seconds (throttling)",
            "",
            "🎯 System Prompt:",
            "   Specialized fact-checking and citation verification prompt",
            "   Enables LLM to evaluate without additional fine-tuning"
        ]
    )
    
    # Slide 14: Data Flow Example
    add_content_slide(
        "Data Flow Example: A Question in RAG",
        [
            "1️⃣ USER QUERY",
            '   "What are the COVID-19 vaccine side effects?"',
            "",
            "2️⃣ RETRIEVAL",
            "   ChromaDB retrieves top 5 similar chunks from CovidQA dataset",
            "",
            "3️⃣ CONTEXT PREPARATION",
            "   Relevant medical documents selected and formatted",
            "",
            "4️⃣ RESPONSE GENERATION",
            "   Groq LLM generates answer: 'Common side effects include...'",
            "",
            "5️⃣ EVALUATION",
            "   • LLM verifies: Are claims supported by documents?",
            "   • Calculates: Relevance=0.92, Utilization=0.87, Adherence=1.0, Completeness=0.95",
            "",
            "6️⃣ OUTPUT",
            "   JSON with metrics, audit trail, and source documents"
        ]
    )
    
    # Slide 15: Use Cases
    add_content_slide(
        "Real-World Use Cases",
        [
            "📋 Document Q&A Systems",
            "   Help desk, knowledge base search, document retrieval",
            "",
            "🏥 Medical Information Retrieval",
            "   Clinical decision support, patient education",
            "",
            "⚖️ Legal Document Analysis",
            "   Contract review, case law research, compliance checking",
            "",
            "💰 Financial Analysis",
            "   SEC filing analysis, market research, investment insights",
            "",
            "🎓 Academic Research",
            "   Paper indexing, literature review, citation analysis",
            "",
            "🏢 Enterprise Knowledge Management",
            "   Internal document search, policy retrieval, FAQs"
        ]
    )
    
    # Slide 16: Performance & Results
    add_content_slide(
        "System Performance & Achievements",
        [
            "✅ Successfully Processed:",
            "   • 4 collections from RAGBench datasets",
            "   • Recovered and re-indexed 4M+ vector embeddings in ChromaDB",
            "   • 8 different embedding models tested",
            "   • 6 chunking strategies implemented and evaluated",
            "",
            "📊 Evaluation Coverage:",
            "   • Batch evaluation of 100+ test cases",
            "   • Per-sentence analysis with GPT labeling",
            "   • Comprehensive audit trails with LLM reasoning",
            "",
            "⚡ Performance Metrics:",
            "   • Sub-second retrieval latency",
            "   • Batch evaluation: ~2-3 seconds per query (with GPT labeling)",
            "   • Rate limiting: Controlled via Groq API settings"
        ]
    )
    
    # Slide 17: Technical Innovations
    add_content_slide(
        "Technical Innovations",
        [
            "🔹 Advanced ChromaDB Recovery",
            "   Smart SQLite index rebuilding preserving all vector data",
            "",
            "🔹 Smart Collection Naming",
            "   Automatic metadata extraction with interactive fallback UI",
            "",
            "🔹 Sentence-Level Evaluation",
            "   Maps individual response sentences to document spans",
            "",
            "🔹 Multi-Metric Evaluation",
            "   RMSE and AUC-ROC metrics alongside TRACE framework",
            "",
            "🔹 Explainable AI",
            "   Complete audit trails showing LLM reasoning for each decision",
            "",
            "🔹 Flexible Pipeline",
            "   Modular design allows easy swapping of chunking, embedding, and LLM components"
        ]
    )
    
    # Slide 18: Challenges & Solutions
    add_two_column_slide(
        "Challenges & Solutions",
        "Challenges Faced",
        [
            "🔴 ChromaDB Index Corruption",
            "   Collection folders orphaned from SQLite",
            "",
            "🔴 Evaluation Consistency",
            "   Different chunking strategies vary in effectiveness",
            "",
            "🔴 Rate Limiting",
            "   Groq API has strict RPM limits",
            "",
            "🔴 Hallucination Detection",
            "   Hard to detect factual errors without reference",
            "",
            "🔴 Scalability",
            "   Large batch evaluations take time",
        ],
        "Solutions Implemented",
        [
            "✅ Data-Preserving Recovery",
            "   Direct SQLite rebuild scripts",
            "",
            "✅ Comprehensive Testing",
            "   Baseline metrics for different strategies",
            "",
            "✅ Intelligent Queuing",
            "   Configurable rate limit delays",
            "",
            "✅ LLM Verification",
            "   Adherence metric detects unsupported claims",
            "",
            "✅ Batch Processing",
            "   Parallel processing where possible",
        ]
    )
    
    # Slide 19: Future Roadmap
    add_content_slide(
        "Future Development Roadmap",
        [
            "🚀 Phase 2: Production Enhancements",
            "   • Distributed processing for large-scale evaluation",
            "   • Caching layer for frequently accessed documents",
            "   • Real-time monitoring dashboard",
            "",
            "🚀 Phase 3: Advanced Features",
            "   • Multimodal RAG (images, tables, PDFs)",
            "   • Knowledge graph integration",
            "   • Cross-domain transfer learning",
            "",
            "🚀 Phase 4: Enterprise Features",
            "   • Multi-tenant support",
            "   • Fine-tuned models for specific domains",
            "   • Advanced security and compliance",
            "",
            "🚀 Phase 5: Research Contributions",
            "   • Publication of benchmark results",
            "   • Open-source evaluation framework",
            "   • Industry collaboration"
        ]
    )
    
    # Slide 20: Conclusion
    add_title_slide(
        "Key Takeaways",
        "Advanced RAG with Comprehensive Evaluation"
    )
    
    # Add content to conclusion
    slide = prs.slides[-1]
    text_box = slide.shapes.add_textbox(Inches(1), Inches(2.5), Inches(8), Inches(4))
    text_frame = text_box.text_frame
    
    points = [
        "✓ Complete RAG pipeline from ingestion to evaluation",
        "✓ Flexible architecture supporting multiple chunking and embedding strategies",
        "✓ LLM-based evaluation with sentence-level grounding verification",
        "✓ Explainable AI with comprehensive audit trails",
        "✓ Production-ready implementation with real data (RAGBench datasets)",
        "✓ Addresses critical RAG evaluation challenges",
    ]
    
    for i, point in enumerate(points):
        if i == 0:
            p = text_frame.paragraphs[0]
        else:
            p = text_frame.add_paragraph()
        p.text = point
        p.font.size = Pt(20)
        p.font.color.rgb = TEXT_DARK
        p.space_before = Pt(12)
        p.space_after = Pt(12)
    
    # Save presentation
    output_file = "RAG_Capstone_Project_Presentation.pptx"
    prs.save(output_file)
    print(f"✅ Presentation created successfully: {output_file}")
    print(f"📊 Total slides: {len(prs.slides)}")
    print(f"💾 File size: {len(open(output_file, 'rb').read()) / 1024:.2f} KB")


if __name__ == "__main__":
    create_presentation()