Spaces:
Running
Running
File size: 2,171 Bytes
f780124 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 | """
Test the full retrieval pipeline: hybrid search + re-ranking + diversity.
Compare it against pure dense search to show the improvement.
"""
import time
from src.utils.logger import setup_logger, get_logger
from src.retrieval.retrieval_pipeline import RetrievalPipeline
from src.vectorstore.qdrant_store import QdrantStore
from src.embeddings.embedding_model import EmbeddingModel
setup_logger()
logger = get_logger(__name__)
def test_pipeline(pipeline: RetrievalPipeline, query: str):
print(f"\n{'='*60}")
print(f"QUERY: {query}")
print(f"{'='*60}")
start = time.time()
results = pipeline.retrieve(query, top_k_final=5)
elapsed = time.time() - start
print(f"Retrieved {len(results)} results in {elapsed:.2f}s\n")
for i, r in enumerate(results):
print(f"[{i+1}] CE Score: {r.get('ce_score', 'N/A'):>7} | "
f"RRF: {r.get('rrf_score', 'N/A'):.4f}")
print(f" {r.get('title','')[:65]}...")
print(f" {r.get('text','')[:120].replace(chr(10),' ')}...")
print()
def main():
logger.info("Initializing full retrieval pipeline...")
pipeline = RetrievalPipeline()
# Test 1: Conceptual query
test_pipeline(
pipeline,
"how does self-attention mechanism work in transformers"
)
# Test 2: Specific method query - tests BM25 keyword advantage
test_pipeline(
pipeline,
"LoRA low-rank adaptation fine-tuning"
)
# Test 3: Comparison query
test_pipeline(
pipeline,
"reinforcement learning reward shaping techniques"
)
# Test 4: With year filter
print(f"\n{'='*60}")
print("FILTERED: 'graph neural networks' (2026 only)")
print(f"{'='*60}")
results = pipeline.retrieve(
"graph neural networks",
filter_year_gte = 2026,
top_k_final = 3
)
for i, r in enumerate(results):
print(
f"[{i+1}] {r.get('published_date', 'N/A')} | "
f"CE: {r.get('ce_score','N/A'):>6} | "
f"{r.get('title','')[:55]}..."
)
logger.info("\n✅ Retrieval pipeline test complete")
if __name__ == "__main__":
main() |