ResearchIT / scripts /test_citation_boost.py
siddhm11
Phase 6.5: Pipeline telemetry, search UX fixes, latency profiling
ec67b2f
"""Side-by-side comparison: BEFORE vs AFTER citation boost.
Shows beginner vs expert results for the same topic.
Also verifies Band A (known-item) queries aren't broken.
"""
import asyncio, sys, time
from pathlib import Path
sys.path.insert(0, str(Path(__file__).resolve().parent.parent))
from app import hybrid_search_svc, turso_svc, embed_svc
# Pairs: (topic, beginner_query, expert_query)
COMPARISONS = [
("TRANSFORMERS",
"how do transformers work in NLP",
"attention is all you need"),
("DIFFUSION",
"what are diffusion models and how do they generate images",
"denoising diffusion probabilistic models"),
("GPT-4",
"how does GPT-4 work",
"GPT-4 Technical Report"),
("RLHF",
"what is reinforcement learning from human feedback",
"reinforcement learning from human feedback"),
]
BAND_A = [
("attention is all you need", "1706.03762"),
("Deep Residual Learning for Image Recognition", "1512.03385"),
("BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding", "1810.04805"),
]
async def run_query(q: str):
results = await hybrid_search_svc.search(q, limit=10)
meta = {}
if results:
meta = await turso_svc.fetch_metadata_batch(results)
return results, meta
async def main():
print("Warming up BGE-M3...")
embed_svc.encode_query("warmup")
await turso_svc.fetch_metadata_batch(["1706.03762"])
# === Band A verification ===
print()
print("=" * 90)
print("BAND A VERIFICATION - Known-item queries (must still be #1)")
print("=" * 90)
for q, expected in BAND_A:
results, meta = await run_query(q)
rank = results.index(expected) + 1 if expected in results else -1
status = "PASS" if rank == 1 else f"RANK #{rank}" if rank > 0 else "MISS"
cites = meta.get(expected, {}).get("citation_count", 0)
print(f" [{status:>8}] {q[:55]:55s} ({cites} cites)")
# === Side-by-side comparisons ===
print()
print("=" * 90)
print("SIDE-BY-SIDE: Beginner vs Expert queries (same topic)")
print("=" * 90)
for topic, beginner_q, expert_q in COMPARISONS:
print(f"\n--- {topic} ---")
# Beginner
print(f"\n BEGINNER: {beginner_q!r}")
results, meta = await run_query(beginner_q)
for i, aid in enumerate(results[:5], 1):
m = meta.get(aid, {})
title = (m.get("title") or "?")[:60]
cites = m.get("citation_count", 0)
print(f" {i}. [{cites:>6} cites] {title}")
# Expert
print(f"\n EXPERT: {expert_q!r}")
results, meta = await run_query(expert_q)
for i, aid in enumerate(results[:5], 1):
m = meta.get(aid, {})
title = (m.get("title") or "?")[:60]
cites = m.get("citation_count", 0)
print(f" {i}. [{cites:>6} cites] {title}")
print()
print("=" * 90)
print("DONE")
print("=" * 90)
if __name__ == "__main__":
asyncio.run(main())