Spaces:
Sleeping
Sleeping
File size: 1,044 Bytes
9c4c212 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 |
from src.retriever.hybrid_retriever import HybridRetriever
from src.embeddings.embedder import Embedder
from src.reranker.cross_encoder import Reranker
import os
def debug_retrieval():
query = "what is Emerging Contaminants according to DOD?"
print(f"--- Debugging Query: {query} ---")
embedder = Embedder()
retriever = HybridRetriever(
bm25_path="data/index/bm25.pkl",
faiss_path="data/index/faiss.index",
doc_map_path="data/index/doc_map.pkl",
embedder=embedder
)
reranker = Reranker()
# 1. Hybrid Search
print("\n1. Running Hybrid Search (top_k=20)...")
results = retriever.search(query, top_k=20)
print(f"Retrieved {len(results)} docs.")
# 2. Reranking
print("\n2. Reranking...")
reranked = reranker.rerank(query, results, top_k=5)
for i, (doc, score) in enumerate(reranked, 1):
print(f"\n[{i}] Score: {score:.2f}")
print(f"Content: {doc[:300]}...")
if __name__ == "__main__":
debug_retrieval()
|