File size: 1,669 Bytes
22e9366 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 | import sys, logging
sys.path.insert(0, ".")
logging.basicConfig(level=logging.WARNING)
from src.retriever_utils import (
search_semantic_scholar,
search_web,
build_citation_graph,
hybrid_score,
recency_score,
)
print("=== Phase 3: Retriever Utils ===\n")
# Test 1: Scoring functions
print("--- Scoring functions ---")
print(f"β recency linear year=2024: {recency_score(2024, 'linear'):.3f}")
print(f"β recency linear year=2019: {recency_score(2019, 'linear'):.3f}")
print(f"β recency log year=2019: {recency_score(2019, 'log'):.3f}")
print(f"β recency none year=2019: {recency_score(2019, 'none'):.3f}")
print(f"β hybrid score (sim=0.8, year=2023, citations=500): {hybrid_score(0.8, 2023, 500):.4f}")
# Test 2: Semantic Scholar
print("\n--- Semantic Scholar ---")
papers = search_semantic_scholar("KV cache compression LLM", limit=3)
if papers:
for p in papers:
print(f" β [{p.hybrid_score:.3f}] {p.title[:60]} ({p.year})")
else:
print(" β No results (S2 key may not be active yet β expected)")
# Test 3: Web search
print("\n--- DuckDuckGo ---")
results = search_web("KV cache compression large language models 2024", limit=3)
if results:
for r in results:
print(f" β [{r.source}] {r.title[:60]}")
else:
print(" β No results from DDG or Tavily")
# Test 4: Citation graph
print("\n--- Citation graph ---")
if papers:
graph = build_citation_graph(papers)
print(f" β Graph nodes: {len(graph)}")
edges = sum(len(v) for v in graph.values())
print(f" β Internal edges: {edges}")
else:
print(" β Skipped (no papers retrieved)")
print("\nβ
Phase 3 complete") |