Spaces:

Hitan2004
/

agentic-corrective-rag

Sleeping

App Files Files Community

Hitan2004 commited on Mar 27

Commit

d4cf06c

0 Parent(s):

initial RAG system

Browse files

Files changed (11) hide show

.gitignore +6 -0
agent.py +140 -0
app.py +138 -0
config.py +25 -0
ingestion.py +168 -0
main.py +109 -0
requirements.txt +18 -0
retriever.py +62 -0
start.sh +4 -0
test_sources.py +4 -0
verify.py +70 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,6 @@

+venv/
+__pycache__/
+*.pkl
+faiss.index
+embedder_model/
+.env

agent.py ADDED Viewed

	@@ -0,0 +1,140 @@

+from typing import TypedDict
+from langgraph.graph import StateGraph, END
+from langchain_groq import ChatGroq
+from langchain_core.messages import HumanMessage, AIMessage
+from config import GROQ_API_KEY, GROQ_MODEL, MAX_RETRIES
+llm = ChatGroq(
+    model=GROQ_MODEL,
+    temperature=0,
+    api_key=GROQ_API_KEY,
+)
+class RAGState(TypedDict):
+    question:          str
+    context_chunks:    list
+    answer:            str
+    validation_result: str
+    fail_reason:       str
+    retry_count:       int
+    chat_history:      list
+def generate_node(state: RAGState) -> dict:
+    context_text = "\n\n---\n\n".join(
+        f"[Source: {r['source']}]\n{r['chunk']}"
+        for r in state["context_chunks"]
+    )
+    history_lines = []
+    for msg in state.get("chat_history", [])[-6:]:
+        role = "User" if isinstance(msg, HumanMessage) else "Assistant"
+        history_lines.append(f"{role}: {msg.content}")
+    history_text = "\n".join(history_lines) or "None"
+    correction = ""
+    if state.get("retry_count", 0) > 0:
+        correction = (
+            f"\n\nIMPORTANT CORRECTION REQUIRED: Your previous answer was "
+            f"rejected because: {state.get('fail_reason', 'unverifiable claims')}. "
+            f"Re-answer using ONLY the context provided."
+        )
+    prompt = (
+        "You are an AI assistant that answers questions AND generates content based on provided documents.\n"
+        "Answer ONLY using information from the CONTEXT below.\n"
+        "If the answer cannot be found, say exactly: "
+        '"I don\'t have enough information in the provided documents."\n'
+        "Do NOT invent facts or use outside knowledge."
+        + correction
+        + f"\n\nPREVIOUS CONVERSATION:\n{history_text}"
+        + f"\n\nCONTEXT:\n{context_text}"
+        + f"\n\nQUESTION: {state['question']}\n\nAnswer:"
+    )
+    response = llm.invoke([HumanMessage(content=prompt)])
+    return {"answer": response.content}
+def validate_node(state: RAGState) -> dict:
+    context_text = "\n\n".join(r["chunk"] for r in state["context_chunks"])
+    prompt = (
+        "You are a strict hallucination checker for a RAG system.\n\n"
+        "Given the CONTEXT and the ANSWER below, check:\n"
+        "1. Is every factual claim directly supported by the context?\n"
+        "2. Does the answer address the question?\n"
+        "3. Are there any invented facts not in the context?\n\n"
+        f"Context:\n{context_text}\n\n"
+        f"Question: {state['question']}\n"
+        f"Answer: {state['answer']}\n\n"
+        "Respond in EXACTLY this format:\n"
+        "VERDICT: PASS\n"
+        "REASON: <one sentence>\n\n"
+        "or\n\n"
+        "VERDICT: FAIL\n"
+        "REASON: <one sentence explaining what is wrong>"
+    )
+    result = llm.invoke([HumanMessage(content=prompt)])
+    text   = result.content.strip()
+    verdict = "PASS" if "VERDICT: PASS" in text.upper() else "FAIL"
+    reason  = ""
+    for line in text.splitlines():
+        if line.upper().startswith("REASON:"):
+            reason = line.split(":", 1)[1].strip()
+            break
+    return {"validation_result": verdict, "fail_reason": reason}
+def increment_retry_node(state: RAGState) -> dict:
+    return {"retry_count": state.get("retry_count", 0) + 1}
+def route_after_validation(state: RAGState) -> str:
+    if (
+        state["validation_result"] == "FAIL"
+        and state.get("retry_count", 0) < MAX_RETRIES
+    ):
+        return "retry"
+    return "done"
+def _build_graph():
+    g = StateGraph(RAGState)
+    g.add_node("generate",        generate_node)
+    g.add_node("validate",        validate_node)
+    g.add_node("increment_retry", increment_retry_node)
+    g.set_entry_point("generate")
+    g.add_edge("generate", "validate")
+    g.add_conditional_edges(
+        "validate",
+        route_after_validation,
+        {"retry": "increment_retry", "done": END},
+    )
+    g.add_edge("increment_retry", "generate")
+    return g.compile()
+_rag_graph = _build_graph()
+def run_rag_agent(
+    question:       str,
+    context_chunks: list,
+    chat_history:   list = [],
+) -> tuple:
+    init_state: RAGState = {
+        "question":          question,
+        "context_chunks":    context_chunks,
+        "answer":            "",
+        "validation_result": "",
+        "fail_reason":       "",
+        "retry_count":       0,
+        "chat_history":      chat_history,
+    }
+    final = _rag_graph.invoke(init_state)
+    return final["answer"], final["retry_count"], final["validation_result"]

app.py ADDED Viewed

	@@ -0,0 +1,138 @@

+# app.py
+import uuid
+import streamlit as st
+import requests
+API = "http://localhost:8000"
+st.set_page_config(
+    page_title="Corrective RAG",
+    page_icon="📄",
+    layout="wide",
+)
+st.title("📄 Corrective RAG — Document Q&A")
+st.caption("Groq LLaMA 3 · FAISS · BM25 · LangGraph self-correction")
+# ── Session state init ────────────────────────────────────────
+if "session_id" not in st.session_state:
+    st.session_state.session_id = str(uuid.uuid4())
+if "messages" not in st.session_state:
+    st.session_state.messages = []
+# ── Sidebar ───────────────────────────────────────────────────
+with st.sidebar:
+    st.header("Upload documents")
+    uploaded_files = st.file_uploader(
+        "Choose .txt or .pdf files",
+        type=["txt", "pdf"],
+        accept_multiple_files=True,
+    )
+    if st.button("Index documents", type="primary") and uploaded_files:
+        for f in uploaded_files:
+            try:
+                r = requests.post(
+                    f"{API}/upload",
+                    files={"file": (f.name, f.getvalue())},
+                    timeout=30,
+                )
+                if r.status_code == 200:
+                    st.success(f"{f.name} — uploaded, indexing started")
+                else:
+                    st.error(f"{f.name} — {r.json().get('detail', r.text)}")
+            except requests.ConnectionError:
+                st.error("Cannot reach backend. Is `uvicorn main:app` running?")
+    st.divider()
+    # Health check
+    try:
+        h = requests.get(f"{API}/health", timeout=3).json()
+        idx_status = "ready" if h.get("indexes_loaded") else "not loaded"
+        st.caption(f"Backend: connected  |  Indexes: {idx_status}")
+    except Exception:
+        st.caption("Backend: not connected")
+    st.divider()
+    if st.button("Clear conversation"):
+        try:
+            requests.delete(f"{API}/session/{st.session_state.session_id}", timeout=5)
+        except Exception:
+            pass
+        st.session_state.messages = []
+        st.rerun()
+    st.caption(f"Session ID: `{st.session_state.session_id[:8]}...`")
+# ── Render chat history ───────────────────────────────────────
+for msg in st.session_state.messages:
+    with st.chat_message(msg["role"]):
+        st.markdown(msg["content"])
+        if msg["role"] == "assistant" and msg.get("meta"):
+            m = msg["meta"]
+            c1, c2, c3 = st.columns(3)
+            c1.metric("Retries used", m["retries"])
+            c2.metric("Validation",   m["validation"])
+            c3.metric("Sources found", m["num_sources"])
+            if m.get("sources"):
+                with st.expander("View source chunks"):
+                    for s in m["sources"]:
+                        st.markdown(f"**{s['source']}**")
+                        st.text(s["chunk"])
+                        st.divider()
+# ── Chat input ────────────────────────────────────────────────
+if question := st.chat_input("Ask a question about your documents..."):
+    st.session_state.messages.append({"role": "user", "content": question})
+    with st.chat_message("user"):
+        st.markdown(question)
+    with st.chat_message("assistant"):
+        with st.spinner("Retrieving and generating (with self-correction)..."):
+            answer = ""
+            meta   = {"retries": 0, "validation": "N/A",
+                      "num_sources": 0, "sources": []}
+            try:
+                r = requests.post(
+                    f"{API}/query",
+                    json={
+                        "question":   question,
+                        "session_id": st.session_state.session_id,
+                    },
+                    timeout=60,
+                )
+                if r.status_code == 200:
+                    data   = r.json()
+                    answer = data["answer"]
+                    meta   = {
+                        "retries":     data["retries_used"],
+                        "validation":  data["validation"],
+                        "num_sources": len(data["sources"]),
+                        "sources":     data["sources"],
+                    }
+                else:
+                    answer = f"Error {r.status_code}: {r.json().get('detail', r.text)}"
+            except requests.ConnectionError:
+                answer = "Cannot reach backend. Make sure `uvicorn main:app` is running."
+            except requests.Timeout:
+                answer = "Request timed out. The model may be slow — try again."
+            except Exception as e:
+                answer = f"Unexpected error: {e}"
+        st.markdown(answer)
+        c1, c2, c3 = st.columns(3)
+        c1.metric("Retries used",  meta["retries"])
+        c2.metric("Validation",    meta["validation"])
+        c3.metric("Sources found", meta["num_sources"])
+        if meta["sources"]:
+            with st.expander("View source chunks"):
+                for s in meta["sources"]:
+                    st.markdown(f"**{s['source']}**")
+                    st.text(s["chunk"])
+                    st.divider()
+    st.session_state.messages.append({
+        "role": "assistant",
+        "content": answer,
+        "meta": meta,
+    })

config.py ADDED Viewed

	@@ -0,0 +1,25 @@

+# config.py
+import os
+from dotenv import load_dotenv
+load_dotenv()
+GROQ_API_KEY      = os.getenv("GROQ_API_KEY", "")
+GROQ_MODEL        = "llama-3.3-70b-versatile"
+DOCS_DIR          = "./docs"
+FAISS_INDEX_PATH  = "./faiss.index"
+BM25_PATH         = "./bm25.pkl"
+CHUNKS_PATH       = "./chunks.pkl"
+SOURCES_PATH      = "./sources.pkl"
+EMBEDDER_PATH     = "./embedder_model"
+EMBEDDER_NAME     = "all-MiniLM-L6-v2"
+CHUNK_SIZE        = 500
+CHUNK_OVERLAP     = 50
+TOP_K             = 5
+MAX_RETRIES       = 3
+MAX_HISTORY_TURNS = 5
+if not GROQ_API_KEY:
+    raise ValueError("GROQ_API_KEY not set in .env file")

ingestion.py ADDED Viewed

	@@ -0,0 +1,168 @@

+# ingestion.py
+import os, pickle
+from pathlib import Path
+import numpy as np
+import faiss
+from sentence_transformers import SentenceTransformer
+from rank_bm25 import BM25Okapi
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from config import (
+    DOCS_DIR, FAISS_INDEX_PATH, BM25_PATH,
+    CHUNKS_PATH, SOURCES_PATH, EMBEDDER_PATH,
+    EMBEDDER_NAME, CHUNK_SIZE, CHUNK_OVERLAP
+)
+# ─────────────────────────────────────────────────────────────
+# Better PDF extraction (IMPORTANT)
+# ─────────────────────────────────────────────────────────────
+def read_pdf_text(fpath):
+    import fitz  # PyMuPDF
+    doc = fitz.open(fpath)
+    text = []
+    for page in doc:
+        text.append(page.get_text())
+    return "\n".join(text).strip()
+# ─────────────────────────────────────────────────────────────
+# Clean text (removes weird spacing)
+# ─────────────────────────────────────────────────────────────
+def clean_text(text):
+    return " ".join(text.split())
+# ─────────────────────────────────────────────────────────────
+# Load documents
+# ─────────────────────────────────────────────────────────────
+def load_documents():
+    docs, filenames = [], []
+    path = Path(DOCS_DIR)
+    path.mkdir(exist_ok=True)
+    # Load TXT files
+    for fpath in path.glob("*.txt"):
+        try:
+            text = fpath.read_text(encoding="utf-8")
+            text = clean_text(text)
+            docs.append(text)
+            filenames.append(fpath.name)
+            print(f"  Loaded text: {fpath.name}")
+        except Exception as e:
+            print(f"  Skipped {fpath.name}: {e}")
+    # Load PDF files (using PyMuPDF)
+    for fpath in path.glob("*.pdf"):
+        try:
+            text = read_pdf_text(fpath)
+            text = clean_text(text)
+            if text:
+                docs.append(text)
+                filenames.append(fpath.name)
+                print(f"  Loaded PDF:  {fpath.name}")
+            else:
+                print(f"  WARNING: {fpath.name} extracted empty text")
+        except Exception as e:
+            print(f"  Skipped {fpath.name}: {e}")
+    if not docs:
+        raise FileNotFoundError(
+            f"No .txt or .pdf files found in '{DOCS_DIR}'. "
+            "Add at least one document and re-run."
+        )
+    print(f"\nLoaded {len(docs)} document(s)")
+    return docs, filenames
+# ─────────────────────────────────────────────────────────────
+# Chunking (optimized for resumes)
+# ─────────────────────────────────────────────────────────────
+def semantic_chunk(docs, filenames):
+    splitter = RecursiveCharacterTextSplitter(
+        chunk_size=300,          # smaller chunks → better retrieval
+        chunk_overlap=80,
+        separators=["\n\n", "\n", ". ", " "],
+    )
+    all_chunks, all_sources = [], []
+    for doc, fname in zip(docs, filenames):
+        chunks = splitter.split_text(doc)
+        all_chunks.extend(chunks)
+        all_sources.extend([fname] * len(chunks))
+    print(f"Created {len(all_chunks)} chunks "
+          f"(avg {sum(len(c) for c in all_chunks)//len(all_chunks)} chars each)")
+    # Debug: show sample chunk
+    print("\n--- SAMPLE CHUNK ---")
+    print(all_chunks[0][:500])
+    print("--------------------\n")
+    return all_chunks, all_sources
+# ─────────────────────────────────────────────────────────────
+# Build indexes
+# ─────────────────────────────────────────────────────────────
+def build_indexes(chunks):
+    print("\nBuilding dense embeddings...")
+    model = SentenceTransformer(EMBEDDER_NAME)
+    embeddings = model.encode(chunks, show_progress_bar=True, batch_size=32)
+    embeddings = np.array(embeddings, dtype="float32")
+    faiss.normalize_L2(embeddings)
+    dim = embeddings.shape[1]
+    faiss_index = faiss.IndexFlatIP(dim)
+    faiss_index.add(embeddings)
+    print(f"FAISS index: {faiss_index.ntotal} vectors, dim={dim}")
+    tokenized = [c.lower().split() for c in chunks]
+    bm25_index = BM25Okapi(tokenized)
+    print("BM25 index: built")
+    return faiss_index, bm25_index, model
+# ─────────────────────────────────────────────────────────────
+# Save everything
+# ─────────────────────────────────────────────────────────────
+def save_indexes(faiss_index, bm25_index, chunks, sources, model):
+    faiss.write_index(faiss_index, FAISS_INDEX_PATH)
+    with open(BM25_PATH, "wb") as f:
+        pickle.dump(bm25_index, f)
+    with open(CHUNKS_PATH, "wb") as f:
+        pickle.dump(chunks, f)
+    with open(SOURCES_PATH, "wb") as f:
+        pickle.dump(sources, f)
+    model.save(EMBEDDER_PATH)
+    print("\nSaved indexes to disk.")
+# ─────────────────────────────────────────────────────────────
+# Main runner
+# ─────────────────────────────────────────────────────────────
+def run_ingestion():
+    print("=== Starting ingestion ===\n")
+    docs, filenames = load_documents()
+    chunks, sources = semantic_chunk(docs, filenames)
+    fi, bm25, model = build_indexes(chunks)
+    save_indexes(fi, bm25, chunks, sources, model)
+    print("\n=== Ingestion complete ===")
+if __name__ == "__main__":
+    run_ingestion()

main.py ADDED Viewed

	@@ -0,0 +1,109 @@

+import os
+import shutil
+from contextlib import asynccontextmanager
+from fastapi import FastAPI, UploadFile, File, HTTPException, BackgroundTasks
+from pydantic import BaseModel
+from langchain_core.messages import HumanMessage, AIMessage
+from retriever import load_indexes, reload_indexes, hybrid_retrieve
+from agent import run_rag_agent
+from ingestion import run_ingestion
+from config import DOCS_DIR, TOP_K, MAX_HISTORY_TURNS
+sessions: dict = {}
+@asynccontextmanager
+async def lifespan(app: FastAPI):
+    try:
+        load_indexes()
+    except FileNotFoundError:
+        print("WARNING: No indexes found. Upload documents first.")
+    yield
+app = FastAPI(title="Corrective RAG API", version="1.0", lifespan=lifespan)
+class QueryRequest(BaseModel):
+    question:   str
+    session_id: str = "default"
+    top_k:      int = TOP_K
+class QueryResponse(BaseModel):
+    answer:       str
+    sources:      list
+    retries_used: int
+    validation:   str
+    session_id:   str
+@app.post("/query", response_model=QueryResponse)
+async def query(req: QueryRequest):
+    if not indexes_loaded():
+        raise HTTPException(
+            status_code=503,
+            detail="Indexes not ready. Upload and index documents first."
+        )
+    results = hybrid_retrieve(req.question, top_k=req.top_k)
+    if not results:
+        raise HTTPException(status_code=404, detail="No relevant chunks found.")
+    history = sessions.get(req.session_id, [])
+    answer, retries, verdict = run_rag_agent(req.question, results, history)
+    history.append(HumanMessage(content=req.question))
+    history.append(AIMessage(content=answer))
+    sessions[req.session_id] = history[-(MAX_HISTORY_TURNS * 2):]
+    return QueryResponse(
+        answer=answer,
+        sources=[{"chunk": r["chunk"][:300], "source": r["source"]} for r in results],
+        retries_used=retries,
+        validation=verdict,
+        session_id=req.session_id,
+    )
+@app.post("/upload")
+async def upload(background_tasks: BackgroundTasks, file: UploadFile = File(...)):
+    allowed = {".txt", ".pdf"}
+    ext = os.path.splitext(file.filename or "")[1].lower()
+    if ext not in allowed:
+        raise HTTPException(status_code=400, detail="Only .txt and .pdf files allowed.")
+    os.makedirs(DOCS_DIR, exist_ok=True)
+    dest = os.path.join(DOCS_DIR, file.filename)
+    with open(dest, "wb") as f:
+        shutil.copyfileobj(file.file, f)
+    background_tasks.add_task(_reindex)
+    return {"status": "uploaded", "filename": file.filename,
+            "message": "Indexing started in background."}
+def _reindex():
+    try:
+        run_ingestion()
+        reload_indexes()
+        print("Re-indexing complete.")
+    except Exception as e:
+        print(f"Re-indexing failed: {e}")
+def indexes_loaded():
+    from retriever import indexes_loaded as _il
+    return _il()
+@app.delete("/session/{session_id}")
+def clear_session(session_id: str):
+    sessions.pop(session_id, None)
+    return {"status": "cleared", "session_id": session_id}
+@app.get("/health")
+def health():
+    return {"status": "ok", "indexes_loaded": indexes_loaded()}

requirements.txt ADDED Viewed

	@@ -0,0 +1,18 @@

+## requirements.txt
+langchain==0.3.25
+langchain-groq==0.3.2
+langgraph==0.3.29
+sentence-transformers==3.4.1
+faiss-cpu==1.9.0
+rank-bm25==0.2.2
+fastapi==0.115.12
+uvicorn==0.34.0
+streamlit==1.44.1
+pdfplumber==0.11.6
+python-dotenv==1.1.0
+numpy==1.26.4
+requests==2.32.3
+pydantic==2.11.1
+pip install python-multipart

retriever.py ADDED Viewed

	@@ -0,0 +1,62 @@

+import pickle
+import numpy as np
+import faiss
+from sentence_transformers import SentenceTransformer
+from config import (
+    FAISS_INDEX_PATH, BM25_PATH, CHUNKS_PATH,
+    SOURCES_PATH, EMBEDDER_PATH
+)
+_faiss_index = None
+_bm25_index  = None
+_chunks      = None
+_sources     = None
+_model       = None
+def indexes_loaded() -> bool:
+    return _faiss_index is not None
+def load_indexes():
+    global _faiss_index, _bm25_index, _chunks, _sources, _model
+    _faiss_index = faiss.read_index(FAISS_INDEX_PATH)
+    with open(BM25_PATH,   "rb") as f: _bm25_index = pickle.load(f)
+    with open(CHUNKS_PATH, "rb") as f: _chunks     = pickle.load(f)
+    with open(SOURCES_PATH,"rb") as f: _sources    = pickle.load(f)
+    _model = SentenceTransformer(EMBEDDER_PATH)
+    print(f"Indexes loaded: {_faiss_index.ntotal} vectors, {len(_chunks)} chunks")
+def reload_indexes():
+    global _faiss_index, _bm25_index, _chunks, _sources, _model
+    _faiss_index = _bm25_index = _chunks = _sources = _model = None
+    load_indexes()
+def _reciprocal_rank_fusion(lists: list, k: int = 60) -> list:
+    scores: dict = {}
+    for ranked_list in lists:
+        for rank, doc_id in enumerate(ranked_list):
+            scores[doc_id] = scores.get(doc_id, 0.0) + 1.0 / (k + rank + 1)
+    return sorted(scores.keys(), key=lambda x: scores[x], reverse=True)
+def hybrid_retrieve(query: str, top_k: int = 5) -> list:
+    if not indexes_loaded():
+        raise RuntimeError("Indexes not loaded. Call load_indexes() first.")
+    q_emb = _model.encode([query], convert_to_numpy=True).astype("float32")
+    faiss.normalize_L2(q_emb)
+    _, dense_ids = _faiss_index.search(q_emb, top_k * 3)
+    dense_ranking = [int(i) for i in dense_ids[0] if i >= 0]
+    bm25_scores   = _bm25_index.get_scores(query.lower().split())
+    sparse_ranking = np.argsort(bm25_scores)[::-1][:top_k * 3].tolist()
+    merged = _reciprocal_rank_fusion([dense_ranking, sparse_ranking])[:top_k]
+    return [
+        {"chunk": _chunks[i], "source": _sources[i], "chunk_id": i}
+        for i in merged
+    ]

start.sh ADDED Viewed

	@@ -0,0 +1,4 @@

+#!/usr/bin/env bash
+python ingestion.py
+uvicorn main:app --host 0.0.0.0 --port 10000

test_sources.py ADDED Viewed

	@@ -0,0 +1,4 @@

+from retriever import load_indexes, _sources
+load_indexes()
+print(set(_sources))

verify.py ADDED Viewed

	@@ -0,0 +1,70 @@

+# verify.py  — tests each component individually
+import sys
+def check(label, fn):
+    try:
+        fn()
+        print(f"  PASS  {label}")
+    except Exception as e:
+        print(f"  FAIL  {label}: {e}")
+        sys.exit(1)
+print("\n=== Corrective RAG — environment check ===\n")
+# 1. Config / API key
+def test_config():
+    from config import GROQ_API_KEY
+    assert len(GROQ_API_KEY) > 10, "GROQ_API_KEY looks invalid"
+check("Config + GROQ key loaded", test_config)
+# 2. Groq connection
+def test_groq():
+    from langchain_groq import ChatGroq
+    from langchain_core.messages import HumanMessage
+    from config import GROQ_API_KEY, GROQ_MODEL
+    llm = ChatGroq(model=GROQ_MODEL, temperature=0, api_key=GROQ_API_KEY)
+    r = llm.invoke([HumanMessage(content="Say OK")])
+    assert "ok" in r.content.lower() or len(r.content) > 0
+check("Groq API connection", test_groq)
+# 3. Ingestion
+def test_ingestion():
+    import os
+    from pathlib import Path
+    Path("./docs").mkdir(exist_ok=True)
+    test_file = "./docs/_verify_test.txt"
+    Path(test_file).write_text(
+        "The Eiffel Tower is in Paris, France. "
+        "It was built in 1889 for the World's Fair. "
+        "It is 330 metres tall."
+    )
+    from ingestion import run_ingestion
+    run_ingestion()
+    os.remove(test_file)
+check("Ingestion pipeline", test_ingestion)
+# 4. Retriever
+def test_retriever():
+    from retriever import load_indexes, hybrid_retrieve
+    load_indexes()
+    results = hybrid_retrieve("Where is the Eiffel Tower?", top_k=3)
+    assert len(results) > 0
+    assert "chunk" in results[0]
+    assert "source" in results[0]
+check("Hybrid retrieval (BM25 + FAISS)", test_retriever)
+# 5. Agent
+def test_agent():
+    from retriever import hybrid_retrieve
+    from agent import run_rag_agent
+    results = hybrid_retrieve("How tall is the Eiffel Tower?", top_k=3)
+    answer, retries, verdict = run_rag_agent(
+        "How tall is the Eiffel Tower?", results
+    )
+    assert len(answer) > 10, f"Answer too short: {answer}"
+    print(f"\n  Answer:   {answer[:120]}")
+    print(f"  Retries:  {retries}")
+    print(f"  Verdict:  {verdict}")
+check("LangGraph agent (generate + validate)", test_agent)
+print("\n=== All checks passed — ready to run ===\n")