File size: 3,595 Bytes
e955d78
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
import sys, logging
sys.path.insert(0, ".")
logging.basicConfig(level=logging.WARNING)

import requests, os
from dotenv import load_dotenv
load_dotenv()

from src.state import ResearchState, SessionContext
from src.agents.planner import planner_node
from src.agents.retriever import retriever_node
from src.memory import init_db

init_db()
print("=== Phase 5: Retriever Agent ===\n")

# Build initial state
state: ResearchState = {
    "original_query": "What is the current state of speculative decoding in LLMs?",
    "session_id": "test-session-005",
    "session_context": None,
    "sub_questions": [],
    "retrieved_papers": [],
    "citation_graph": {},
    "web_results": [],
    "critic_verdict": "",
    "critic_notes": "",
    "rewritten_questions": [],
    "retry_count": 0,
    "synthesized_position": "",
    "claim_confidences": [],
    "session_update": None,
    "export_md": "",
    "decay_config": "linear",
    "calibration_bin": "",
    "latency_ms": 0.0,
}

print("Step 1: Running planner...")
state = planner_node(state)
print(f"  Sub-questions: {len(state['sub_questions'])}")
for i, q in enumerate(state['sub_questions'], 1):
    print(f"    {i}. {q}")

# -------------------------------------------------------------------
# Raw S2 API debug — bypasses all our code
# -------------------------------------------------------------------
print("\n--- Raw S2 API debug ---")
s2_key = os.getenv("S2_API_KEY")
print(f"  Key present: {bool(s2_key)}")
print(f"  Key preview: {s2_key[:8] if s2_key else 'NONE'}...")

first_q = state['sub_questions'][0]
print(f"  Query: {first_q}")

headers = {"x-api-key": s2_key} if s2_key else {}
params = {
    "query": first_q,
    "limit": 3,
    "fields": "title,abstract,year,citationCount,paperId",
}

r = requests.get(
    "https://api.semanticscholar.org/graph/v1/paper/search",
    headers=headers,
    params=params,
    timeout=15,
)
print(f"  HTTP status: {r.status_code}")
data = r.json()
print(f"  Total in S2: {data.get('total', 0)}")
print(f"  Items in data[]: {len(data.get('data', []))}")

for i, p in enumerate(data.get("data", [])[:3]):
    has_abstract = "YES" if p.get("abstract") else "NONE"
    print(f"  [{i+1}] abstract:{has_abstract} | {p.get('title','?')[:55]}")

# -------------------------------------------------------------------
# Now test our wrapper directly
# -------------------------------------------------------------------
print("\n--- Our wrapper (use_cache=False) ---")
from src.retriever_utils import search_semantic_scholar
papers = search_semantic_scholar(first_q, limit=3, use_cache=False)
print(f"  Wrapper returned: {len(papers)} papers")
for p in papers:
    print(f"    [{p.hybrid_score:.3f}] {p.title[:55]} ({p.year})")

# -------------------------------------------------------------------
# Full retriever agent
# -------------------------------------------------------------------
print("\nStep 2: Running full retriever agent...")
state = retriever_node(state)

print(f"\n--- Final Results ---")
print(f"  Papers retrieved: {len(state['retrieved_papers'])}")
print(f"  Web results:      {len(state['web_results'])}")
print(f"  Citation edges:   {sum(len(v) for v in state['citation_graph'].values())}")

if state['retrieved_papers']:
    print(f"\n  Top 3 papers by hybrid score:")
    for p in state['retrieved_papers'][:3]:
        print(f"    [{p.hybrid_score:.3f}] {p.title[:65]} ({p.year})")

if state['web_results']:
    print(f"\n  Sample web results:")
    for r in state['web_results'][:3]:
        print(f"    [{r.source}] {r.title[:65]}")

print("\n✅ Phase 5 complete")