File size: 1,044 Bytes
9c4c212
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
from src.retriever.hybrid_retriever import HybridRetriever
from src.embeddings.embedder import Embedder
from src.reranker.cross_encoder import Reranker
import os

def debug_retrieval():
    query = "what is Emerging Contaminants according to DOD?"
    
    print(f"--- Debugging Query: {query} ---")
    
    embedder = Embedder()
    retriever = HybridRetriever(
        bm25_path="data/index/bm25.pkl",
        faiss_path="data/index/faiss.index",
        doc_map_path="data/index/doc_map.pkl",
        embedder=embedder
    )
    
    reranker = Reranker()
    
    # 1. Hybrid Search
    print("\n1. Running Hybrid Search (top_k=20)...")
    results = retriever.search(query, top_k=20)
    
    print(f"Retrieved {len(results)} docs.")
    
    # 2. Reranking
    print("\n2. Reranking...")
    reranked = reranker.rerank(query, results, top_k=5)
    
    for i, (doc, score) in enumerate(reranked, 1):
        print(f"\n[{i}] Score: {score:.2f}")
        print(f"Content: {doc[:300]}...")

if __name__ == "__main__":
    debug_retrieval()