""" LAB 4: Full RAG Pipeline with ChromaDB ======================================== FREE — No API key needed! """ import gradio as gr from langchain.text_splitter import RecursiveCharacterTextSplitter from langchain_community.vectorstores import Chroma from langchain_community.embeddings import HuggingFaceEmbeddings from langchain.schema import Document embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2") vectorstore = None SAMPLE_DOCS = [ "RAG combines retrieval with LLMs to ground answers in real documents.", "Chunking splits documents into smaller pieces. Chunk size affects quality.", "Vector stores like ChromaDB store embeddings for fast similarity search.", "RAGAS evaluates RAG using Faithfulness, Relevancy, Precision, Recall.", "Temperature 0.0-0.2 is best for factual RAG applications.", "Re-ranking uses a cross-encoder to reorder retrieved chunks.", "HYDE generates a fake answer first, then uses it to search.", "Fine-tuning embeddings on domain data improves Context Recall by 20-40%.", ] def build_index(chunk_size: int, overlap: int) -> str: global vectorstore splitter = RecursiveCharacterTextSplitter( chunk_size=int(chunk_size), chunk_overlap=int(overlap) ) docs = [Document(page_content=d, metadata={"source": f"doc_{i}"}) for i, d in enumerate(SAMPLE_DOCS)] chunks = splitter.split_documents(docs) vectorstore = Chroma.from_documents(chunks, embeddings) return f"✅ Index built! {len(chunks)} chunks stored in ChromaDB." def query_rag(question: str, top_k: int) -> tuple[str, str]: if vectorstore is None: return "⚠️ Please build the index first!", "" results = vectorstore.similarity_search_with_score(question, k=int(top_k)) context = "\n\n".join([f"[Chunk {i+1}] {doc.page_content}" for i, (doc, _) in enumerate(results)]) scores = "\n".join([f"Chunk {i+1}: {1 - score:.4f}" for i, (_, score) in enumerate(results)]) return context, scores with gr.Blocks(title="Lab 4: Full RAG Pipeline", theme=gr.themes.Soft()) as demo: gr.Markdown("## 🗄️ Lab 4: Full RAG Pipeline with ChromaDB") gr.Markdown("### 📦 Step 1: Build the Vector Index") with gr.Row(): chunk_size = gr.Slider(100, 500, value=200, step=50, label="Chunk Size") overlap = gr.Slider(0, 100, value=20, step=10, label="Overlap") build_btn = gr.Button("🔨 Build Index", variant="primary") build_out = gr.Textbox(label="Status") build_btn.click(fn=build_index, inputs=[chunk_size, overlap], outputs=build_out) gr.Markdown("### 🔍 Step 2: Query the Index") question = gr.Textbox(label="Ask a Question", placeholder="e.g. How does re-ranking work?") top_k = gr.Slider(1, 5, value=3, step=1, label="Top-K Results") query_btn = gr.Button("🔍 Retrieve", variant="primary") with gr.Row(): ctx_out = gr.Textbox(label="Retrieved Context", lines=10) score_out = gr.Textbox(label="Similarity Scores", lines=6) query_btn.click(fn=query_rag, inputs=[question, top_k], outputs=[ctx_out, score_out]) demo.launch()