"""Diagnose why the Mamba paper (2312.00752) is missing from search results.""" import asyncio import sys from pathlib import Path sys.path.insert(0, str(Path(__file__).resolve().parent.parent)) from app import qdrant_svc, embed_svc, zilliz_svc, hybrid_search_svc, turso_svc MAMBA_ID = "2312.00752" async def main(): # Step 1: is the paper in Qdrant at all? vecs = await qdrant_svc.get_paper_vectors([MAMBA_ID]) in_qdrant = MAMBA_ID in vecs print(f"Mamba paper {MAMBA_ID} in Qdrant: {in_qdrant}") # Step 2: is it in Turso? meta = await turso_svc.fetch_metadata_batch([MAMBA_ID]) if MAMBA_ID in meta: print(f"Mamba paper in Turso: YES — title: {meta[MAMBA_ID].get('title')!r}") else: print("Mamba paper in Turso: NO") if not in_qdrant: print("\n--> Paper missing from Qdrant collection. End of investigation.") return # Step 3: where does it rank in dense, sparse, and fused? q = "Mamba state space model linear time" dense_vec, sparse_dict = embed_svc.encode_query(q) print(f"\nQuery: {q!r}") print(f"Sparse keys: {len(sparse_dict)}") fetch_k = 60 dense = await qdrant_svc.search_dense(dense_vec.tolist(), limit=fetch_k) sparse = await zilliz_svc.search_sparse(sparse_dict, limit=fetch_k) dense_ids = [r["arxiv_id"] for r in dense] sparse_ids = [r["arxiv_id"] for r in sparse] if MAMBA_ID in dense_ids: print(f"\nDense rank: {dense_ids.index(MAMBA_ID)+1}/{fetch_k}") else: print(f"\nDense top {fetch_k}: NOT present") if MAMBA_ID in sparse_ids: print(f"Sparse rank: {sparse_ids.index(MAMBA_ID)+1}/{fetch_k}") else: print(f"Sparse top {fetch_k}: NOT present") fused = hybrid_search_svc._rrf_fuse(dense, sparse, k=60) fused_ids = [item["arxiv_id"] for item in fused] if MAMBA_ID in fused_ids: print(f"RRF fused rank: {fused_ids.index(MAMBA_ID)+1}") else: print(f"RRF fused: NOT present in top {len(fused_ids)}") # Show top 5 of each print(f"\n=== Dense top 5 ===") for r in dense[:5]: print(f" {r['arxiv_id']} score={r['score']:.4f}") print(f"\n=== Sparse top 5 ===") for r in sparse[:5]: print(f" {r['arxiv_id']} score={r['score']:.4f}") asyncio.run(main())