Spaces:

Hitan2004
/

agentic-corrective-rag

Sleeping

App Files Files Community

Hitan2004 commited on Apr 7

Commit

b689b3f

1 Parent(s): d0245ab

initial commit

Browse files

Files changed (8) hide show

.github/workflows/ci.yml +27 -0
agent.py +1 -0
ingestion.py +2 -2
retriever.py +50 -22
test_sources.py +1 -0
tests/__init__.py +0 -0
tests/test_integration.py +51 -0
tests/test_unit.py +119 -0

.github/workflows/ci.yml ADDED Viewed

	@@ -0,0 +1,27 @@

+name: RAG Unit Tests
+on:
+  push:
+    branches: [main]
+  pull_request:
+    branches: [main]
+jobs:
+  test:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v4
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: "3.11"
+      - name: Install dependencies
+        run: pip install -r requirements.txt
+      - name: Run unit tests only   # ← integration tests are skipped here
+        env:
+          GROQ_API_KEY: ${{ secrets.GROQ_API_KEY }}  # add this in GitHub → Settings → Secrets
+        run: pytest tests/test_unit.py -v

agent.py CHANGED Viewed

@@ -1,3 +1,4 @@
 from typing import TypedDict
 from langgraph.graph import StateGraph, END
 from langchain_groq import ChatGroq

+#agent.py
 from typing import TypedDict
 from langgraph.graph import StateGraph, END
 from langchain_groq import ChatGroq

ingestion.py CHANGED Viewed

@@ -80,8 +80,8 @@ def load_documents():
 # ─────────────────────────────────────────────────────────────
 def semantic_chunk(docs, filenames):
     splitter = RecursiveCharacterTextSplitter(
-        chunk_size=300,          # smaller chunks → better retrieval
-        chunk_overlap=80,
         separators=["\n\n", "\n", ". ", " "],
     )

 # ─────────────────────────────────────────────────────────────
 def semantic_chunk(docs, filenames):
     splitter = RecursiveCharacterTextSplitter(
+        chunk_size=CHUNK_SIZE,
+        chunk_overlap=CHUNK_OVERLAP,
         separators=["\n\n", "\n", ". ", " "],
     )

retriever.py CHANGED Viewed

@@ -1,17 +1,20 @@
 import pickle
 import numpy as np
 import faiss
-from sentence_transformers import SentenceTransformer
 from config import (
     FAISS_INDEX_PATH, BM25_PATH, CHUNKS_PATH,
     SOURCES_PATH, EMBEDDER_PATH
 )
-_faiss_index = None
-_bm25_index  = None
-_chunks      = None
-_sources     = None
-_model       = None
 def indexes_loaded() -> bool:
@@ -19,44 +22,69 @@ def indexes_loaded() -> bool:
 def load_indexes():
-    global _faiss_index, _bm25_index, _chunks, _sources, _model
     _faiss_index = faiss.read_index(FAISS_INDEX_PATH)
-    with open(BM25_PATH,   "rb") as f: _bm25_index = pickle.load(f)
-    with open(CHUNKS_PATH, "rb") as f: _chunks     = pickle.load(f)
-    with open(SOURCES_PATH,"rb") as f: _sources    = pickle.load(f)
-    _model = SentenceTransformer(EMBEDDER_PATH)
     print(f"Indexes loaded: {_faiss_index.ntotal} vectors, {len(_chunks)} chunks")
 def reload_indexes():
-    global _faiss_index, _bm25_index, _chunks, _sources, _model
-    _faiss_index = _bm25_index = _chunks = _sources = _model = None
     load_indexes()
-def _reciprocal_rank_fusion(lists: list, k: int = 60) -> list:
     scores: dict = {}
     for ranked_list in lists:
         for rank, doc_id in enumerate(ranked_list):
             scores[doc_id] = scores.get(doc_id, 0.0) + 1.0 / (k + rank + 1)
-    return sorted(scores.keys(), key=lambda x: scores[x], reverse=True)
 def hybrid_retrieve(query: str, top_k: int = 5) -> list:
     if not indexes_loaded():
         raise RuntimeError("Indexes not loaded. Call load_indexes() first.")
     q_emb = _model.encode([query], convert_to_numpy=True).astype("float32")
     faiss.normalize_L2(q_emb)
-    _, dense_ids = _faiss_index.search(q_emb, top_k * 3)
-    dense_ranking = [int(i) for i in dense_ids[0] if i >= 0]
-    bm25_scores   = _bm25_index.get_scores(query.lower().split())
-    sparse_ranking = np.argsort(bm25_scores)[::-1][:top_k * 3].tolist()
-    merged = _reciprocal_rank_fusion([dense_ranking, sparse_ranking])[:top_k]
     return [
-        {"chunk": _chunks[i], "source": _sources[i], "chunk_id": i}
-        for i in merged
     ]

 import pickle
 import numpy as np
 import faiss
+from sentence_transformers import SentenceTransformer, CrossEncoder
 from config import (
     FAISS_INDEX_PATH, BM25_PATH, CHUNKS_PATH,
     SOURCES_PATH, EMBEDDER_PATH
 )
+_faiss_index    = None
+_bm25_index     = None
+_chunks         = None
+_sources        = None
+_model          = None
+_reranker       = None
+RERANKER_MODEL  = "cross-encoder/ms-marco-MiniLM-L-6-v2"
 def indexes_loaded() -> bool:
 def load_indexes():
+    global _faiss_index, _bm25_index, _chunks, _sources, _model, _reranker
     _faiss_index = faiss.read_index(FAISS_INDEX_PATH)
+    with open(BM25_PATH,    "rb") as f: _bm25_index = pickle.load(f)
+    with open(CHUNKS_PATH,  "rb") as f: _chunks     = pickle.load(f)
+    with open(SOURCES_PATH, "rb") as f: _sources    = pickle.load(f)
+    _model    = SentenceTransformer(EMBEDDER_PATH)
+    _reranker = CrossEncoder(RERANKER_MODEL)          # ← reranker loads once
     print(f"Indexes loaded: {_faiss_index.ntotal} vectors, {len(_chunks)} chunks")
 def reload_indexes():
+    global _faiss_index, _bm25_index, _chunks, _sources, _model, _reranker
+    _faiss_index = _bm25_index = _chunks = _sources = _model = _reranker = None
     load_indexes()
+def _reciprocal_rank_fusion(lists: list, k: int = 60) -> dict:
+    """Returns {doc_id: rrf_score} instead of just a sorted list."""
     scores: dict = {}
     for ranked_list in lists:
         for rank, doc_id in enumerate(ranked_list):
             scores[doc_id] = scores.get(doc_id, 0.0) + 1.0 / (k + rank + 1)
+    return scores                                      # ← return the dict now
 def hybrid_retrieve(query: str, top_k: int = 5) -> list:
     if not indexes_loaded():
         raise RuntimeError("Indexes not loaded. Call load_indexes() first.")
+    # ── Dense retrieval (FAISS) ───────────────────────────────
     q_emb = _model.encode([query], convert_to_numpy=True).astype("float32")
     faiss.normalize_L2(q_emb)
+    _, dense_ids   = _faiss_index.search(q_emb, top_k * 3)
+    dense_ranking  = [int(i) for i in dense_ids[0] if i >= 0]
+    # ── Sparse retrieval (BM25) ───────────────────────────────
+    bm25_scores    = _bm25_index.get_scores(query.lower().split())
+    sparse_ranking = np.argsort(bm25_scores)[::-1][: top_k * 3].tolist()
+    # ── Fusion (RRF) — now returns score dict ─────────────────
+    rrf_scores     = _reciprocal_rank_fusion([dense_ranking, sparse_ranking])
+    fused_ids      = sorted(rrf_scores, key=rrf_scores.get, reverse=True)[: top_k * 2]
+    # ── Cross-encoder reranking ───────────────────────────────
+    # The cross-encoder scores each (query, chunk) pair together
+    # much more accurately than embedding similarity alone
+    candidates     = [(query, _chunks[i]) for i in fused_ids]
+    ce_scores      = _reranker.predict(candidates)     # shape: (len(candidates),)
+    # Sort by cross-encoder score, keep top_k
+    ranked         = sorted(
+        zip(fused_ids, ce_scores),
+        key=lambda x: x[1],
+        reverse=True,
+    )[:top_k]
     return [
+        {
+            "chunk":     _chunks[i],
+            "source":    _sources[i],
+            "chunk_id":  i,
+            "rrf_score": round(float(rrf_scores[i]), 4),
+            "ce_score":  round(float(score), 4),       # ← reranker confidence
+        }
+        for i, score in ranked
     ]

test_sources.py CHANGED Viewed

@@ -1,3 +1,4 @@
 from retriever import load_indexes, _sources
 load_indexes()

+#test_source.py
 from retriever import load_indexes, _sources
 load_indexes()

tests/__init__.py ADDED Viewed

File without changes

tests/test_integration.py ADDED Viewed

	@@ -0,0 +1,51 @@

+# tests/test_integration.py
+# Run with:  pytest tests/test_integration.py -v -m integration
+# These call real APIs — don't run in CI automatically.
+import pytest
+pytestmark = pytest.mark.integration   # tag so CI can skip these
+def test_groq_connection_live():
+    from langchain_groq import ChatGroq
+    from langchain_core.messages import HumanMessage
+    from config import GROQ_API_KEY, GROQ_MODEL
+    llm = ChatGroq(model=GROQ_MODEL, temperature=0, api_key=GROQ_API_KEY)
+    r   = llm.invoke([HumanMessage(content="Reply with just the word OK")])
+    assert len(r.content) > 0
+def test_full_pipeline_live():
+    """Ingests a tiny doc, retrieves, runs agent — end to end."""
+    import os
+    from pathlib import Path
+    # Write test doc
+    Path("./docs").mkdir(exist_ok=True)
+    test_file = Path("./docs/_pytest_temp.txt")
+    test_file.write_text(
+        "The Eiffel Tower is in Paris, France. "
+        "It was built in 1889. It is 330 metres tall."
+    )
+    try:
+        from ingestion import run_ingestion
+        from retriever import load_indexes, hybrid_retrieve
+        from agent import run_rag_agent
+        run_ingestion()
+        load_indexes()
+        results = hybrid_retrieve("How tall is the Eiffel Tower?", top_k=3)
+        assert len(results) > 0
+        assert "ce_score" in results[0]          # reranker ran
+        answer, retries, verdict = run_rag_agent(
+            "How tall is the Eiffel Tower?", results
+        )
+        assert "330" in answer or "metres" in answer.lower()
+        assert verdict in {"PASS", "FAIL"}
+    finally:
+        test_file.unlink(missing_ok=True)        # always clean up

tests/test_unit.py ADDED Viewed

	@@ -0,0 +1,119 @@

+# tests/test_unit.py
+import pytest
+# ── RRF logic ─────────────────────────────────────────────────────────────────
+def test_rrf_prefers_doc_appearing_in_both_lists():
+    from retriever import _reciprocal_rank_fusion
+    scores = _reciprocal_rank_fusion([[0, 1, 2], [2, 0, 1]])
+    # doc 2 is rank-0 in sparse and rank-2 in dense → should beat doc 1
+    assert scores[2] > scores[1]
+def test_rrf_returns_all_docs():
+    from retriever import _reciprocal_rank_fusion
+    scores = _reciprocal_rank_fusion([[0, 1], [1, 2]])
+    assert set(scores.keys()) == {0, 1, 2}
+def test_rrf_scores_are_positive():
+    from retriever import _reciprocal_rank_fusion
+    scores = _reciprocal_rank_fusion([[0, 1, 2]])
+    assert all(v > 0 for v in scores.values())
+# ── Config sanity ─────────────────────────────────────────────────────────────
+def test_config_values_are_sane():
+    from config import CHUNK_SIZE, CHUNK_OVERLAP, TOP_K, MAX_RETRIES
+    assert CHUNK_SIZE > CHUNK_OVERLAP,  "overlap must be smaller than chunk size"
+    assert TOP_K > 0,                   "TOP_K must be positive"
+    assert MAX_RETRIES >= 1,            "need at least 1 retry"
+def test_groq_api_key_present(monkeypatch):
+    # patch so we don't need a real key in CI
+    monkeypatch.setenv("GROQ_API_KEY", "gsk_fakekeyfortesting1234567890")
+    import importlib, config
+    importlib.reload(config)             # re-reads env
+    assert len(config.GROQ_API_KEY) > 10
+# ── Agent routing logic ───────────────────────────────────────────────────────
+def test_route_returns_done_on_pass():
+    from agent import route_after_validation
+    state = {"validation_result": "PASS", "retry_count": 0}
+    assert route_after_validation(state) == "done"
+def test_route_returns_retry_on_fail_within_limit():
+    from agent import route_after_validation
+    state = {"validation_result": "FAIL", "retry_count": 0}
+    assert route_after_validation(state) == "retry"
+def test_route_returns_done_when_retries_exhausted():
+    from agent import route_after_validation
+    state = {"validation_result": "FAIL", "retry_count": 3}
+    assert route_after_validation(state) == "done"
+def test_increment_retry_node():
+    from agent import increment_retry_node
+    result = increment_retry_node({"retry_count": 1})
+    assert result["retry_count"] == 2
+# ── Retriever output shape (mocked indexes) ───────────────────────────────────
+@pytest.fixture
+def mock_indexes(monkeypatch):
+    """Patches all globals in retriever so no files need to exist."""
+    import numpy as np
+    import retriever
+    # Fake chunks and sources
+    fake_chunks  = ["Paris is in France.", "Tower is 330m tall.", "Built in 1889."]
+    fake_sources = ["doc1.txt", "doc1.txt", "doc1.txt"]
+    # Fake FAISS index that always returns ids [0, 1, 2]
+    class FakeFaiss:
+        ntotal = 3
+        def search(self, vec, k):
+            ids = np.array([[0, 1, 2]])
+            return None, ids
+    # Fake BM25 that returns uniform scores
+    class FakeBM25:
+        def get_scores(self, tokens):
+            return np.array([0.9, 0.5, 0.3])
+    # Fake embedder
+    class FakeModel:
+        def encode(self, texts, convert_to_numpy=True):
+            return np.random.rand(len(texts), 384).astype("float32")
+    # Fake cross-encoder
+    class FakeReranker:
+        def predict(self, pairs):
+            return np.array([0.9, 0.7, 0.5][: len(pairs)])
+    monkeypatch.setattr(retriever, "_faiss_index", FakeFaiss())
+    monkeypatch.setattr(retriever, "_bm25_index",  FakeBM25())
+    monkeypatch.setattr(retriever, "_chunks",      fake_chunks)
+    monkeypatch.setattr(retriever, "_sources",     fake_sources)
+    monkeypatch.setattr(retriever, "_model",       FakeModel())
+    monkeypatch.setattr(retriever, "_reranker",    FakeReranker())
+    return fake_chunks
+def test_hybrid_retrieve_returns_top_k(mock_indexes):
+    from retriever import hybrid_retrieve
+    results = hybrid_retrieve("Where is Paris?", top_k=2)
+    assert len(results) == 2
+def test_hybrid_retrieve_result_has_required_keys(mock_indexes):
+    from retriever import hybrid_retrieve
+    result = hybrid_retrieve("Where is Paris?", top_k=1)[0]
+    assert "chunk"     in result
+    assert "source"    in result
+    assert "rrf_score" in result
+    assert "ce_score"  in result
+def test_hybrid_retrieve_scores_are_floats(mock_indexes):
+    from retriever import hybrid_retrieve
+    result = hybrid_retrieve("test", top_k=1)[0]
+    assert isinstance(result["rrf_score"], float)
+    assert isinstance(result["ce_score"],  float)