Enterprise-RAG-System / tools /debug_hf_retrieval.py
yuvrajsingh6
deploy: v2 production baked index (zero latency)
9c4c212
from src.retriever.hybrid_retriever import HybridRetriever
from src.embeddings.embedder import Embedder
from src.reranker.cross_encoder import Reranker
import os
def debug_retrieval():
query = "what is Emerging Contaminants according to DOD?"
print(f"--- Debugging Query: {query} ---")
embedder = Embedder()
retriever = HybridRetriever(
bm25_path="data/index/bm25.pkl",
faiss_path="data/index/faiss.index",
doc_map_path="data/index/doc_map.pkl",
embedder=embedder
)
reranker = Reranker()
# 1. Hybrid Search
print("\n1. Running Hybrid Search (top_k=20)...")
results = retriever.search(query, top_k=20)
print(f"Retrieved {len(results)} docs.")
# 2. Reranking
print("\n2. Reranking...")
reranked = reranker.rerank(query, results, top_k=5)
for i, (doc, score) in enumerate(reranked, 1):
print(f"\n[{i}] Score: {score:.2f}")
print(f"Content: {doc[:300]}...")
if __name__ == "__main__":
debug_retrieval()