| import sys, logging |
| sys.path.insert(0, ".") |
| logging.basicConfig(level=logging.WARNING) |
|
|
| from src.retriever_utils import ( |
| search_semantic_scholar, |
| search_web, |
| build_citation_graph, |
| hybrid_score, |
| recency_score, |
| ) |
|
|
| print("=== Phase 3: Retriever Utils ===\n") |
|
|
| |
| print("--- Scoring functions ---") |
| print(f"β recency linear year=2024: {recency_score(2024, 'linear'):.3f}") |
| print(f"β recency linear year=2019: {recency_score(2019, 'linear'):.3f}") |
| print(f"β recency log year=2019: {recency_score(2019, 'log'):.3f}") |
| print(f"β recency none year=2019: {recency_score(2019, 'none'):.3f}") |
| print(f"β hybrid score (sim=0.8, year=2023, citations=500): {hybrid_score(0.8, 2023, 500):.4f}") |
|
|
| |
| print("\n--- Semantic Scholar ---") |
| papers = search_semantic_scholar("KV cache compression LLM", limit=3) |
| if papers: |
| for p in papers: |
| print(f" β [{p.hybrid_score:.3f}] {p.title[:60]} ({p.year})") |
| else: |
| print(" β No results (S2 key may not be active yet β expected)") |
|
|
| |
| print("\n--- DuckDuckGo ---") |
| results = search_web("KV cache compression large language models 2024", limit=3) |
| if results: |
| for r in results: |
| print(f" β [{r.source}] {r.title[:60]}") |
| else: |
| print(" β No results from DDG or Tavily") |
|
|
| |
| print("\n--- Citation graph ---") |
| if papers: |
| graph = build_citation_graph(papers) |
| print(f" β Graph nodes: {len(graph)}") |
| edges = sum(len(v) for v in graph.values()) |
| print(f" β Internal edges: {edges}") |
| else: |
| print(" β Skipped (no papers retrieved)") |
|
|
| print("\nβ
Phase 3 complete") |