File size: 24,276 Bytes
1d10b0a
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
503
504
505
506
507
508
509
510
511
512
513
514
515
516
517
518
519
520
521
522
523
524
525
526
527
528
529
530
531
532
533
534
535
536
537
538
539
540
541
542
543
544
545
546
547
548
549
550
551
552
553
554
555
556
557
558
559
560
561
562
563
564
565
566
567
568
569
570
571
572
573
574
575
576
577
578
579
580
581
582
583
584
585
586
587
588
589
590
591
592
593
594
595
596
597
598
599
600
601
602
603
604
605
606
607
608
609
610
611
612
613
614
615
616
617
618
619
620
621
622
623
624
625
626
627
628
629
630
631
632
633
634
635
636
637
638
639
640
641
642
643
644
645
646
647
648
649
650
651
652
653
654
655
656
657
"""Create a comprehensive PowerPoint presentation for RAG Capstone Project."""
from pptx import Presentation
from pptx.util import Inches, Pt
from pptx.enum.text import PP_ALIGN
from pptx.dml.color import RGBColor
from datetime import datetime


def create_presentation():
    """Create the RAG Capstone Project presentation."""
    prs = Presentation()
    prs.slide_width = Inches(10)
    prs.slide_height = Inches(7.5)
    
    # Define color scheme
    DARK_BLUE = RGBColor(25, 55, 109)
    ACCENT_BLUE = RGBColor(0, 120, 215)
    LIGHT_GRAY = RGBColor(240, 240, 240)
    TEXT_DARK = RGBColor(33, 33, 33)
    
    def add_title_slide(title, subtitle=""):
        """Add a title slide."""
        slide = prs.slides.add_slide(prs.slide_layouts[6])  # Blank layout
        background = slide.background
        fill = background.fill
        fill.solid()
        fill.fore_color.rgb = DARK_BLUE
        
        # Title
        title_box = slide.shapes.add_textbox(Inches(0.5), Inches(2.5), Inches(9), Inches(1.5))
        title_frame = title_box.text_frame
        title_frame.word_wrap = True
        p = title_frame.paragraphs[0]
        p.text = title
        p.font.size = Pt(54)
        p.font.bold = True
        p.font.color.rgb = RGBColor(255, 255, 255)
        p.alignment = PP_ALIGN.CENTER
        
        # Subtitle
        if subtitle:
            subtitle_box = slide.shapes.add_textbox(Inches(0.5), Inches(4.2), Inches(9), Inches(1))
            subtitle_frame = subtitle_box.text_frame
            p = subtitle_frame.paragraphs[0]
            p.text = subtitle
            p.font.size = Pt(28)
            p.font.color.rgb = ACCENT_BLUE
            p.alignment = PP_ALIGN.CENTER
        
        return slide
    
    def add_content_slide(title, content_items):
        """Add a content slide with bullet points."""
        slide = prs.slides.add_slide(prs.slide_layouts[6])
        background = slide.background
        fill = background.fill
        fill.solid()
        fill.fore_color.rgb = RGBColor(255, 255, 255)
        
        # Title bar
        title_shape = slide.shapes.add_shape(1, Inches(0), Inches(0), Inches(10), Inches(0.8))
        title_shape.fill.solid()
        title_shape.fill.fore_color.rgb = DARK_BLUE
        title_shape.line.color.rgb = DARK_BLUE
        
        # Title text
        title_frame = title_shape.text_frame
        p = title_frame.paragraphs[0]
        p.text = title
        p.font.size = Pt(40)
        p.font.bold = True
        p.font.color.rgb = RGBColor(255, 255, 255)
        p.space_before = Pt(8)
        p.space_after = Pt(8)
        
        # Content
        text_box = slide.shapes.add_textbox(Inches(0.7), Inches(1.2), Inches(8.6), Inches(6))
        text_frame = text_box.text_frame
        text_frame.word_wrap = True
        
        for i, item in enumerate(content_items):
            if i > 0:
                p = text_frame.add_paragraph()
            else:
                p = text_frame.paragraphs[0]
            
            p.text = item
            p.level = 0
            p.font.size = Pt(18)
            p.font.color.rgb = TEXT_DARK
            p.space_before = Pt(6)
            p.space_after = Pt(6)
        
        return slide
    
    def add_two_column_slide(title, left_title, left_items, right_title, right_items):
        """Add a two-column content slide."""
        slide = prs.slides.add_slide(prs.slide_layouts[6])
        background = slide.background
        fill = background.fill
        fill.solid()
        fill.fore_color.rgb = RGBColor(255, 255, 255)
        
        # Title bar
        title_shape = slide.shapes.add_shape(1, Inches(0), Inches(0), Inches(10), Inches(0.8))
        title_shape.fill.solid()
        title_shape.fill.fore_color.rgb = DARK_BLUE
        title_shape.line.color.rgb = DARK_BLUE
        
        title_frame = title_shape.text_frame
        p = title_frame.paragraphs[0]
        p.text = title
        p.font.size = Pt(40)
        p.font.bold = True
        p.font.color.rgb = RGBColor(255, 255, 255)
        p.space_before = Pt(8)
        p.space_after = Pt(8)
        
        # Left column
        left_box = slide.shapes.add_textbox(Inches(0.4), Inches(1.2), Inches(4.6), Inches(6))
        left_frame = left_box.text_frame
        left_frame.word_wrap = True
        
        p = left_frame.paragraphs[0]
        p.text = left_title
        p.font.size = Pt(20)
        p.font.bold = True
        p.font.color.rgb = ACCENT_BLUE
        p.space_after = Pt(8)
        
        for item in left_items:
            p = left_frame.add_paragraph()
            p.text = item
            p.level = 0
            p.font.size = Pt(15)
            p.font.color.rgb = TEXT_DARK
            p.space_after = Pt(6)
        
        # Right column
        right_box = slide.shapes.add_textbox(Inches(5.0), Inches(1.2), Inches(4.6), Inches(6))
        right_frame = right_box.text_frame
        right_frame.word_wrap = True
        
        p = right_frame.paragraphs[0]
        p.text = right_title
        p.font.size = Pt(20)
        p.font.bold = True
        p.font.color.rgb = ACCENT_BLUE
        p.space_after = Pt(8)
        
        for item in right_items:
            p = right_frame.add_paragraph()
            p.text = item
            p.level = 0
            p.font.size = Pt(15)
            p.font.color.rgb = TEXT_DARK
            p.space_after = Pt(6)
        
        return slide
    
    # Slide 1: Title Slide
    add_title_slide(
        "RAG Capstone Project",
        "Retrieval-Augmented Generation Pipeline with Advanced Evaluation"
    )
    
    # Slide 2: Project Overview
    add_content_slide(
        "Project Overview",
        [
            "🎯 Goal: Build a production-ready RAG system with comprehensive evaluation",
            "",
            "πŸ“Š Key Components:",
            "  β€’ Document ingestion from RAGBench datasets (15+ datasets)",
            "  β€’ Flexible chunking strategies (6 different approaches)",
            "  β€’ Multiple embedding models (8 different embeddings)",
            "  β€’ Advanced LLM-based evaluation framework",
            "  β€’ Real-time monitoring and audit trails",
            "",
            "πŸ”§ Tech Stack: Python, Streamlit, ChromaDB, Groq LLM API, Sentence Transformers"
        ]
    )
    
    # Slide 3: RAG Pipeline Architecture
    add_content_slide(
        "RAG Pipeline Architecture",
        [
            "1️⃣ DATA INGESTION",
            "   Load documents from 15+ RAGBench datasets (CovidQA, CUAD, FinQA, etc.)",
            "",
            "2️⃣ DOCUMENT CHUNKING",
            "   Apply 6 chunking strategies to split documents into manageable pieces",
            "",
            "3️⃣ EMBEDDING & VECTORIZATION",
            "   Convert chunks to dense vectors using multiple embedding models",
            "",
            "4️⃣ VECTOR STORAGE",
            "   Store in ChromaDB with semantic search capabilities",
            "",
            "5️⃣ RETRIEVAL & RANKING",
            "   Retrieve relevant documents based on query similarity",
            "",
            "6️⃣ RESPONSE GENERATION",
            "   Use Groq LLM to generate answers grounded in retrieved documents"
        ]
    )
    
    # Slide 4: Chunking Strategies
    add_two_column_slide(
        "Document Chunking Strategies",
        "Chunking Methods",
        [
            "1. Dense Chunking",
            "   Fixed-size chunks (512 tokens) with overlap",
            "   Best for: Uniform content",
            "",
            "2. Sparse Chunking",
            "   Semantic boundaries (paragraphs)",
            "   Best for: Structured documents",
            "",
            "3. Hybrid Chunking",
            "   Combines dense + semantic splitting",
            "   Best for: Mixed content types",
        ],
        "Advanced Methods",
        [
            "4. Re-ranking Chunking",
            "   Chunks with relevance re-ranking",
            "   Best for: High precision retrieval",
            "",
            "5. Row-based Chunking",
            "   Preserves data structure for tables",
            "   Best for: Tabular data",
            "",
            "6. Entity-based Chunking",
            "   Groups by semantic entities",
            "   Best for: Knowledge extraction",
        ]
    )
    
    # Slide 5: Embedding Models
    add_content_slide(
        "Embedding Models Used",
        [
            "πŸ”Ή General Purpose Models:",
            "   β€’ sentence-transformers/all-mpnet-base-v2 (High quality, 768-dim)",
            "   β€’ sentence-transformers/all-MiniLM-L6-v2 (Fast, lightweight, 384-dim)",
            "",
            "πŸ”Ή Domain-Specific Models:",
            "   β€’ emilyalsentzer/Bio_ClinicalBERT (Clinical text, 768-dim)",
            "   β€’ microsoft/BiomedNLP-PubMedBERT (Medical abstracts, 768-dim)",
            "   β€’ allenai/specter (Academic papers, 768-dim)",
            "",
            "πŸ”Ή Multilingual Models:",
            "   β€’ sentence-transformers/multilingual-MiniLM-L12-v2 (110 languages)",
            "   β€’ sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2",
            "",
            "πŸ”Ή API-Based Model:",
            "   β€’ gemini-embedding-001 (Google Gemini API embeddings)"
        ]
    )
    
    # Slide 6: RAG Evaluation Challenge
    add_content_slide(
        "The RAG Evaluation Challenge",
        [
            "❌ Why Traditional Metrics Fail?",
            "   β€’ BLEU/ROUGE only measure surface-level similarity",
            "   β€’ Don't evaluate grounding in retrieved documents",
            "   β€’ Can't detect hallucinations or factual errors",
            "",
            "βœ… What We Need?",
            "   β€’ Metrics that measure document relevance to query",
            "   β€’ Metrics that measure document usage in response",
            "   β€’ Metrics that evaluate response grounding (no hallucinations)",
            "   β€’ Metrics that assess completeness of coverage",
            "",
            "πŸŽ“ Our Solution: LLM-based Evaluation Framework",
            "   Inspired by RAGBench paper (arXiv:2407.11005)"
        ]
    )
    
    # Slide 7: TRACE Framework
    add_content_slide(
        "TRACE Framework - 4 Core Metrics",
        [
            "πŸ”΄ RELEVANCE (R)",
            "   Fraction of retrieved context relevant to the query",
            "   Formula: Ξ£ Len(Relevant spans) / Ξ£ Len(All retrieved docs)",
            "",
            "πŸ”΅ UTILIZATION (T)",
            "   Fraction of retrieved context used in the response",
            "   Formula: Ξ£ Len(Used spans) / Ξ£ Len(All retrieved docs)",
            "",
            "🟒 ADHERENCE (A)",
            "   Boolean: Is the response fully grounded in documents?",
            "   Detects hallucinations and unsupported claims",
            "",
            "🟑 COMPLETENESS (C)",
            "   Fraction of relevant information covered by response",
            "   Formula: Len(Relevant ∩ Used) / Len(Relevant)"
        ]
    )
    
    # Slide 8: LLM-Based Evaluation
    add_content_slide(
        "Advanced LLM-Based Evaluation",
        [
            "πŸ€– GPT Labeling Approach:",
            "   β€’ Use LLM (GPT-4/Groq) to annotate response sentences",
            "   β€’ Match each response sentence to supporting document spans",
            "   β€’ Detect fully supported, partially supported, and unsupported sentences",
            "",
            "πŸ“‹ Evaluation Process:",
            "   1. Extract all sentences from both response and documents",
            "   2. Prompt LLM to identify relevant document sentences for query",
            "   3. Prompt LLM to map response sentences to document spans",
            "   4. Calculate support metrics at sentence and document level",
            "",
            "✨ Advantages:",
            "   βœ“ Semantic understanding (not just keyword matching)",
            "   βœ“ Detects hallucinations and contradictions",
            "   βœ“ Provides explainable audit trails",
            "   βœ“ Works across different domains and languages"
        ]
    )
    
    # Slide 9: Evaluation Output Metrics
    add_two_column_slide(
        "Evaluation Output & Metrics",
        "Per-Response Metrics",
        [
            "βœ“ Context Relevance (0-1)",
            "   How much retrieved content is relevant?",
            "",
            "βœ“ Context Utilization (0-1)",
            "   How much retrieved content was used?",
            "",
            "βœ“ Adherence (0-1)",
            "   Is response grounded in documents?",
            "",
            "βœ“ Completeness (0-1)",
            "   Does response cover relevant information?",
        ],
        "Aggregate Metrics",
        [
            "πŸ“Š RMSE Metrics",
            "   Root Mean Squared Error for each metric",
            "",
            "πŸ“ˆ AUC-ROC Metrics",
            "   Area Under ROC Curve for binary classification",
            "",
            "🎯 Average Score",
            "   Mean of all 4 TRACE metrics",
            "",
            "πŸ“‹ Detailed Audit Trail",
            "   Sentence-level support information",
        ]
    )
    
    # Slide 10: Audit Trail & Explainability
    add_content_slide(
        "Explainability & Audit Trails",
        [
            "πŸ” Detailed Audit Information Captured:",
            "",
            "βœ“ Original Query",
            "   User's question or request",
            "",
            "βœ“ LLM Prompt",
            "   Exact instructions sent to LLM for evaluation",
            "",
            "βœ“ LLM Response",
            "   Complete evaluation reasoning from LLM",
            "",
            "βœ“ Retrieved Documents",
            "   Context provided to the RAG system",
            "",
            "βœ“ Sentence-Level Support Map",
            "   Which document spans support each response sentence",
            "",
            "🎯 Enables: Root cause analysis, model improvements, and trust building"
        ]
    )
    
    # Slide 11: System Architecture
    add_content_slide(
        "System Architecture Overview",
        [
            "πŸ“± Frontend: Streamlit Web Interface",
            "   β€’ Interactive configuration panel",
            "   β€’ Real-time collection management",
            "   β€’ Chat interface with context display",
            "   β€’ Evaluation results visualization",
            "",
            "βš™οΈ Backend: Python Services",
            "   β€’ Vector store management (ChromaDB with SQLite indexing)",
            "   β€’ Embedding pipeline with 8 models",
            "   β€’ LLM integration (Groq API with rate limiting)",
            "   β€’ Advanced evaluation engine",
            "",
            "πŸ“š Data Layer: ChromaDB",
            "   β€’ Persistent vector storage",
            "   β€’ SQLite metadata indexing",
            "   β€’ Multi-collection support",
            "   β€’ 4 active collections from RAGBench"
        ]
    )
    
    # Slide 12: Key Features
    add_two_column_slide(
        "Key System Features",
        "Data Management",
        [
            "βœ“ 15+ RAGBench datasets",
            "βœ“ Flexible chunking strategies",
            "βœ“ Multiple embedding models",
            "βœ“ Real-time collection loading",
            "βœ“ Batch processing capability",
            "βœ“ Persistent storage (ChromaDB)",
            "βœ“ SQLite metadata indexing",
        ],
        "Evaluation & Monitoring",
        [
            "βœ“ LLM-based evaluation",
            "βœ“ 4 TRACE metrics",
            "βœ“ RMSE & AUC metrics",
            "βœ“ Sentence-level analysis",
            "βœ“ Hallucination detection",
            "βœ“ Detailed audit trails",
            "βœ“ JSON export & visualization",
        ]
    )
    
    # Slide 13: LLM Configuration
    add_content_slide(
        "LLM Configuration & Settings",
        [
            "πŸ”§ Groq LLM Models Supported:",
            "   β€’ meta-llama/llama-4-maverick-17b-128e-instruct",
            "   β€’ llama-3.1-8b-instant",
            "   β€’ openai/gpt-oss-120b",
            "",
            "βš™οΈ Configurable Parameters:",
            "   β€’ Temperature: 0.0 (deterministic for evaluation)",
            "   β€’ Max Tokens: 2048 (sufficient for detailed analysis)",
            "   β€’ Rate Limit: 30 RPM (Groq API limit)",
            "   β€’ Rate Limit Delay: 2.0 seconds (throttling)",
            "",
            "🎯 System Prompt:",
            "   Specialized fact-checking and citation verification prompt",
            "   Enables LLM to evaluate without additional fine-tuning"
        ]
    )
    
    # Slide 14: Data Flow Example
    add_content_slide(
        "Data Flow Example: A Question in RAG",
        [
            "1️⃣ USER QUERY",
            '   "What are the COVID-19 vaccine side effects?"',
            "",
            "2️⃣ RETRIEVAL",
            "   ChromaDB retrieves top 5 similar chunks from CovidQA dataset",
            "",
            "3️⃣ CONTEXT PREPARATION",
            "   Relevant medical documents selected and formatted",
            "",
            "4️⃣ RESPONSE GENERATION",
            "   Groq LLM generates answer: 'Common side effects include...'",
            "",
            "5️⃣ EVALUATION",
            "   β€’ LLM verifies: Are claims supported by documents?",
            "   β€’ Calculates: Relevance=0.92, Utilization=0.87, Adherence=1.0, Completeness=0.95",
            "",
            "6️⃣ OUTPUT",
            "   JSON with metrics, audit trail, and source documents"
        ]
    )
    
    # Slide 15: Use Cases
    add_content_slide(
        "Real-World Use Cases",
        [
            "πŸ“‹ Document Q&A Systems",
            "   Help desk, knowledge base search, document retrieval",
            "",
            "πŸ₯ Medical Information Retrieval",
            "   Clinical decision support, patient education",
            "",
            "βš–οΈ Legal Document Analysis",
            "   Contract review, case law research, compliance checking",
            "",
            "πŸ’° Financial Analysis",
            "   SEC filing analysis, market research, investment insights",
            "",
            "πŸŽ“ Academic Research",
            "   Paper indexing, literature review, citation analysis",
            "",
            "🏒 Enterprise Knowledge Management",
            "   Internal document search, policy retrieval, FAQs"
        ]
    )
    
    # Slide 16: Performance & Results
    add_content_slide(
        "System Performance & Achievements",
        [
            "βœ… Successfully Processed:",
            "   β€’ 4 collections from RAGBench datasets",
            "   β€’ Recovered and re-indexed 4M+ vector embeddings in ChromaDB",
            "   β€’ 8 different embedding models tested",
            "   β€’ 6 chunking strategies implemented and evaluated",
            "",
            "πŸ“Š Evaluation Coverage:",
            "   β€’ Batch evaluation of 100+ test cases",
            "   β€’ Per-sentence analysis with GPT labeling",
            "   β€’ Comprehensive audit trails with LLM reasoning",
            "",
            "⚑ Performance Metrics:",
            "   β€’ Sub-second retrieval latency",
            "   β€’ Batch evaluation: ~2-3 seconds per query (with GPT labeling)",
            "   β€’ Rate limiting: Controlled via Groq API settings"
        ]
    )
    
    # Slide 17: Technical Innovations
    add_content_slide(
        "Technical Innovations",
        [
            "πŸ”Ή Advanced ChromaDB Recovery",
            "   Smart SQLite index rebuilding preserving all vector data",
            "",
            "πŸ”Ή Smart Collection Naming",
            "   Automatic metadata extraction with interactive fallback UI",
            "",
            "πŸ”Ή Sentence-Level Evaluation",
            "   Maps individual response sentences to document spans",
            "",
            "πŸ”Ή Multi-Metric Evaluation",
            "   RMSE and AUC-ROC metrics alongside TRACE framework",
            "",
            "πŸ”Ή Explainable AI",
            "   Complete audit trails showing LLM reasoning for each decision",
            "",
            "πŸ”Ή Flexible Pipeline",
            "   Modular design allows easy swapping of chunking, embedding, and LLM components"
        ]
    )
    
    # Slide 18: Challenges & Solutions
    add_two_column_slide(
        "Challenges & Solutions",
        "Challenges Faced",
        [
            "πŸ”΄ ChromaDB Index Corruption",
            "   Collection folders orphaned from SQLite",
            "",
            "πŸ”΄ Evaluation Consistency",
            "   Different chunking strategies vary in effectiveness",
            "",
            "πŸ”΄ Rate Limiting",
            "   Groq API has strict RPM limits",
            "",
            "πŸ”΄ Hallucination Detection",
            "   Hard to detect factual errors without reference",
            "",
            "πŸ”΄ Scalability",
            "   Large batch evaluations take time",
        ],
        "Solutions Implemented",
        [
            "βœ… Data-Preserving Recovery",
            "   Direct SQLite rebuild scripts",
            "",
            "βœ… Comprehensive Testing",
            "   Baseline metrics for different strategies",
            "",
            "βœ… Intelligent Queuing",
            "   Configurable rate limit delays",
            "",
            "βœ… LLM Verification",
            "   Adherence metric detects unsupported claims",
            "",
            "βœ… Batch Processing",
            "   Parallel processing where possible",
        ]
    )
    
    # Slide 19: Future Roadmap
    add_content_slide(
        "Future Development Roadmap",
        [
            "πŸš€ Phase 2: Production Enhancements",
            "   β€’ Distributed processing for large-scale evaluation",
            "   β€’ Caching layer for frequently accessed documents",
            "   β€’ Real-time monitoring dashboard",
            "",
            "πŸš€ Phase 3: Advanced Features",
            "   β€’ Multimodal RAG (images, tables, PDFs)",
            "   β€’ Knowledge graph integration",
            "   β€’ Cross-domain transfer learning",
            "",
            "πŸš€ Phase 4: Enterprise Features",
            "   β€’ Multi-tenant support",
            "   β€’ Fine-tuned models for specific domains",
            "   β€’ Advanced security and compliance",
            "",
            "πŸš€ Phase 5: Research Contributions",
            "   β€’ Publication of benchmark results",
            "   β€’ Open-source evaluation framework",
            "   β€’ Industry collaboration"
        ]
    )
    
    # Slide 20: Conclusion
    add_title_slide(
        "Key Takeaways",
        "Advanced RAG with Comprehensive Evaluation"
    )
    
    # Add content to conclusion
    slide = prs.slides[-1]
    text_box = slide.shapes.add_textbox(Inches(1), Inches(2.5), Inches(8), Inches(4))
    text_frame = text_box.text_frame
    
    points = [
        "βœ“ Complete RAG pipeline from ingestion to evaluation",
        "βœ“ Flexible architecture supporting multiple chunking and embedding strategies",
        "βœ“ LLM-based evaluation with sentence-level grounding verification",
        "βœ“ Explainable AI with comprehensive audit trails",
        "βœ“ Production-ready implementation with real data (RAGBench datasets)",
        "βœ“ Addresses critical RAG evaluation challenges",
    ]
    
    for i, point in enumerate(points):
        if i == 0:
            p = text_frame.paragraphs[0]
        else:
            p = text_frame.add_paragraph()
        p.text = point
        p.font.size = Pt(20)
        p.font.color.rgb = TEXT_DARK
        p.space_before = Pt(12)
        p.space_after = Pt(12)
    
    # Save presentation
    output_file = "RAG_Capstone_Project_Presentation.pptx"
    prs.save(output_file)
    print(f"βœ… Presentation created successfully: {output_file}")
    print(f"πŸ“Š Total slides: {len(prs.slides)}")
    print(f"πŸ’Ύ File size: {len(open(output_file, 'rb').read()) / 1024:.2f} KB")


if __name__ == "__main__":
    create_presentation()