File size: 1,669 Bytes
22e9366
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
import sys, logging
sys.path.insert(0, ".")
logging.basicConfig(level=logging.WARNING)

from src.retriever_utils import (
    search_semantic_scholar,
    search_web,
    build_citation_graph,
    hybrid_score,
    recency_score,
)

print("=== Phase 3: Retriever Utils ===\n")

# Test 1: Scoring functions
print("--- Scoring functions ---")
print(f"βœ“ recency linear year=2024: {recency_score(2024, 'linear'):.3f}")
print(f"βœ“ recency linear year=2019: {recency_score(2019, 'linear'):.3f}")
print(f"βœ“ recency log    year=2019: {recency_score(2019, 'log'):.3f}")
print(f"βœ“ recency none   year=2019: {recency_score(2019, 'none'):.3f}")
print(f"βœ“ hybrid score (sim=0.8, year=2023, citations=500): {hybrid_score(0.8, 2023, 500):.4f}")

# Test 2: Semantic Scholar
print("\n--- Semantic Scholar ---")
papers = search_semantic_scholar("KV cache compression LLM", limit=3)
if papers:
    for p in papers:
        print(f"  βœ“ [{p.hybrid_score:.3f}] {p.title[:60]} ({p.year})")
else:
    print("  ⚠ No results (S2 key may not be active yet β€” expected)")

# Test 3: Web search
print("\n--- DuckDuckGo ---")
results = search_web("KV cache compression large language models 2024", limit=3)
if results:
    for r in results:
        print(f"  βœ“ [{r.source}] {r.title[:60]}")
else:
    print("  ⚠ No results from DDG or Tavily")

# Test 4: Citation graph
print("\n--- Citation graph ---")
if papers:
    graph = build_citation_graph(papers)
    print(f"  βœ“ Graph nodes: {len(graph)}")
    edges = sum(len(v) for v in graph.values())
    print(f"  βœ“ Internal edges: {edges}")
else:
    print("  ⚠ Skipped (no papers retrieved)")

print("\nβœ… Phase 3 complete")