| """ |
| LAB 4: Full RAG Pipeline with ChromaDB |
| ======================================== |
| FREE β No API key needed! |
| """ |
|
|
| import gradio as gr |
| from langchain.text_splitter import RecursiveCharacterTextSplitter |
| from langchain_community.vectorstores import Chroma |
| from langchain_community.embeddings import HuggingFaceEmbeddings |
| from langchain.schema import Document |
|
|
| embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2") |
| vectorstore = None |
|
|
| SAMPLE_DOCS = [ |
| "RAG combines retrieval with LLMs to ground answers in real documents.", |
| "Chunking splits documents into smaller pieces. Chunk size affects quality.", |
| "Vector stores like ChromaDB store embeddings for fast similarity search.", |
| "RAGAS evaluates RAG using Faithfulness, Relevancy, Precision, Recall.", |
| "Temperature 0.0-0.2 is best for factual RAG applications.", |
| "Re-ranking uses a cross-encoder to reorder retrieved chunks.", |
| "HYDE generates a fake answer first, then uses it to search.", |
| "Fine-tuning embeddings on domain data improves Context Recall by 20-40%.", |
| ] |
|
|
| def build_index(chunk_size: int, overlap: int) -> str: |
| global vectorstore |
| splitter = RecursiveCharacterTextSplitter( |
| chunk_size=int(chunk_size), chunk_overlap=int(overlap) |
| ) |
| docs = [Document(page_content=d, metadata={"source": f"doc_{i}"}) |
| for i, d in enumerate(SAMPLE_DOCS)] |
| chunks = splitter.split_documents(docs) |
| vectorstore = Chroma.from_documents(chunks, embeddings) |
| return f"β
Index built! {len(chunks)} chunks stored in ChromaDB." |
|
|
| def query_rag(question: str, top_k: int) -> tuple[str, str]: |
| if vectorstore is None: |
| return "β οΈ Please build the index first!", "" |
| results = vectorstore.similarity_search_with_score(question, k=int(top_k)) |
| context = "\n\n".join([f"[Chunk {i+1}] {doc.page_content}" |
| for i, (doc, _) in enumerate(results)]) |
| scores = "\n".join([f"Chunk {i+1}: {1 - score:.4f}" |
| for i, (_, score) in enumerate(results)]) |
| return context, scores |
|
|
| with gr.Blocks(title="Lab 4: Full RAG Pipeline", theme=gr.themes.Soft()) as demo: |
| gr.Markdown("## ποΈ Lab 4: Full RAG Pipeline with ChromaDB") |
| gr.Markdown("### π¦ Step 1: Build the Vector Index") |
| with gr.Row(): |
| chunk_size = gr.Slider(100, 500, value=200, step=50, label="Chunk Size") |
| overlap = gr.Slider(0, 100, value=20, step=10, label="Overlap") |
| build_btn = gr.Button("π¨ Build Index", variant="primary") |
| build_out = gr.Textbox(label="Status") |
| build_btn.click(fn=build_index, inputs=[chunk_size, overlap], outputs=build_out) |
|
|
| gr.Markdown("### π Step 2: Query the Index") |
| question = gr.Textbox(label="Ask a Question", placeholder="e.g. How does re-ranking work?") |
| top_k = gr.Slider(1, 5, value=3, step=1, label="Top-K Results") |
| query_btn = gr.Button("π Retrieve", variant="primary") |
| with gr.Row(): |
| ctx_out = gr.Textbox(label="Retrieved Context", lines=10) |
| score_out = gr.Textbox(label="Similarity Scores", lines=6) |
| query_btn.click(fn=query_rag, inputs=[question, top_k], outputs=[ctx_out, score_out]) |
|
|
| demo.launch() |