File size: 5,507 Bytes
e1624f5 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 | """
SOTA RAG Pipeline β Integration Test Suite.
Tests the full multi-stage retrieval pipeline:
1. Bi-Encoder recall from ChromaDB
2. Distance Gate filtering
3. Cross-Encoder Re-ranking
4. Token Trimming
5. Collection stats
"""
import sys
import os
# Ensure project root is on path
sys.path.insert(0, os.path.join(os.path.dirname(__file__), ".."))
from rag_engine.retriever import OncoRAGRetriever
def test_standard_query(
query: str = "What is the recommended treatment for advanced HCC?",
) -> None:
"""
Test the standard SOTA query pipeline.
Args:
query: A clinical question to search for in the guidelines.
"""
print("=" * 70)
print("π§ͺ TEST 1: Standard SOTA Query Pipeline")
print("=" * 70)
retriever = OncoRAGRetriever()
stats = retriever.get_collection_stats()
print(f"\nπ Collection: {stats['name']} | Docs: {stats['count']}")
print(f" Distance Threshold: {stats['distance_threshold']}")
print(f" Context Budget: {stats['max_context_chars']} chars")
print(f"\nβ Query: '{query}'")
results = retriever.query(query, n_results=5, use_reranking=True)
if not results:
print("\nβ οΈ No results passed the distance gate!")
print(" β This means the query is likely outside guideline coverage.")
print(" β Anti-Hallucination policy: 'InformaciΓ³n no concluyente'")
return
print(f"\nπ {len(results)} results passed all stages:\n")
for i, r in enumerate(results, 1):
ce_score = r.get("cross_encoder_score", "N/A")
bi_dist = r.get("bi_encoder_distance", "N/A")
print(f"--- Result {i} ---")
print(f" π Source: {r['source']} (Page: {r['page']})")
print(f" π·οΈ Section: {r['header']}")
print(f" π Bi-Encoder Distance: {bi_dist}")
print(f" π― Cross-Encoder Score: {ce_score}")
print(f" π Excerpt: {r['text'][:250]}...")
print()
# Show formatted context
formatted = retriever.format_context_for_llm(results)
print(f"\nπ Formatted LLM Context ({len(formatted)} chars):")
print("-" * 50)
print(formatted[:500] + "..." if len(formatted) > 500 else formatted)
def test_distance_gate() -> None:
"""
Test that the distance gate correctly rejects irrelevant queries.
A query about the common cold should return zero results from
oncology guidelines.
"""
print("\n" + "=" * 70)
print("π§ͺ TEST 2: Distance Gate (Anti-Hallucination)")
print("=" * 70)
retriever = OncoRAGRetriever()
irrelevant_query = "How to treat a common cold with chicken soup"
print(f"\nβ Irrelevant Query: '{irrelevant_query}'")
results = retriever.query(irrelevant_query, use_reranking=True)
if not results:
print("β
PASS β Distance gate correctly rejected all results!")
print(" β Anti-hallucination defense is working.")
else:
print(f"β οΈ WARN β {len(results)} results passed (may need tighter threshold)")
for r in results:
print(f" Distance: {r.get('bi_encoder_distance', '?')} | {r['header']}")
def test_cross_encoder_reranking() -> None:
"""
Test that cross-encoder re-ranking actually changes the order
compared to bi-encoder-only results.
"""
print("\n" + "=" * 70)
print("π§ͺ TEST 3: Cross-Encoder Re-Ranking Effect")
print("=" * 70)
retriever = OncoRAGRetriever()
query = "EGFR mutation non-small cell lung cancer targeted therapy"
print(f"\nβ Query: '{query}'")
# Without re-ranking (bi-encoder order)
results_no_rerank = retriever.query(query, n_results=5, use_reranking=False)
# With re-ranking
results_reranked = retriever.query(query, n_results=5, use_reranking=True)
print("\nπ Bi-Encoder Order (no re-rank):")
for i, r in enumerate(results_no_rerank, 1):
print(f" {i}. [{r.get('bi_encoder_distance', '?')}] {r['header'][:60]}")
print("\nπ After Cross-Encoder Re-Rank:")
for i, r in enumerate(results_reranked, 1):
print(f" {i}. [score={r.get('cross_encoder_score', '?')}] {r['header'][:60]}")
# Check if order changed
headers_no = [r["header"] for r in results_no_rerank]
headers_re = [r["header"] for r in results_reranked]
if headers_no != headers_re:
print("\nβ
PASS β Re-ranking changed the order (precision improvement).")
else:
print("\n βΉοΈ INFO β Same order (bi-encoder was already optimal for this query).")
def test_token_trimming() -> None:
"""
Verify that the total context stays within the character budget.
"""
print("\n" + "=" * 70)
print("π§ͺ TEST 4: Token Trimming (Context Budget)")
print("=" * 70)
retriever = OncoRAGRetriever(max_context_chars=2000) # Tight budget
query = "Breast cancer treatment recommendations"
results = retriever.query(query, n_results=10)
total_chars = sum(len(r["text"]) for r in results)
print(f"\n Budget: 2000 chars")
print(f" Actual: {total_chars} chars in {len(results)} results")
if total_chars <= 2000:
print("β
PASS β Context fits within budget.")
else:
print("β οΈ WARN β Context exceeds budget!")
if __name__ == "__main__":
test_standard_query()
test_distance_gate()
test_cross_encoder_reranking()
test_token_trimming()
print("\n" + "=" * 70)
print("π All SOTA RAG tests completed.")
print("=" * 70)
|