File size: 3,595 Bytes
e955d78 | 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 | import sys, logging
sys.path.insert(0, ".")
logging.basicConfig(level=logging.WARNING)
import requests, os
from dotenv import load_dotenv
load_dotenv()
from src.state import ResearchState, SessionContext
from src.agents.planner import planner_node
from src.agents.retriever import retriever_node
from src.memory import init_db
init_db()
print("=== Phase 5: Retriever Agent ===\n")
# Build initial state
state: ResearchState = {
"original_query": "What is the current state of speculative decoding in LLMs?",
"session_id": "test-session-005",
"session_context": None,
"sub_questions": [],
"retrieved_papers": [],
"citation_graph": {},
"web_results": [],
"critic_verdict": "",
"critic_notes": "",
"rewritten_questions": [],
"retry_count": 0,
"synthesized_position": "",
"claim_confidences": [],
"session_update": None,
"export_md": "",
"decay_config": "linear",
"calibration_bin": "",
"latency_ms": 0.0,
}
print("Step 1: Running planner...")
state = planner_node(state)
print(f" Sub-questions: {len(state['sub_questions'])}")
for i, q in enumerate(state['sub_questions'], 1):
print(f" {i}. {q}")
# -------------------------------------------------------------------
# Raw S2 API debug — bypasses all our code
# -------------------------------------------------------------------
print("\n--- Raw S2 API debug ---")
s2_key = os.getenv("S2_API_KEY")
print(f" Key present: {bool(s2_key)}")
print(f" Key preview: {s2_key[:8] if s2_key else 'NONE'}...")
first_q = state['sub_questions'][0]
print(f" Query: {first_q}")
headers = {"x-api-key": s2_key} if s2_key else {}
params = {
"query": first_q,
"limit": 3,
"fields": "title,abstract,year,citationCount,paperId",
}
r = requests.get(
"https://api.semanticscholar.org/graph/v1/paper/search",
headers=headers,
params=params,
timeout=15,
)
print(f" HTTP status: {r.status_code}")
data = r.json()
print(f" Total in S2: {data.get('total', 0)}")
print(f" Items in data[]: {len(data.get('data', []))}")
for i, p in enumerate(data.get("data", [])[:3]):
has_abstract = "YES" if p.get("abstract") else "NONE"
print(f" [{i+1}] abstract:{has_abstract} | {p.get('title','?')[:55]}")
# -------------------------------------------------------------------
# Now test our wrapper directly
# -------------------------------------------------------------------
print("\n--- Our wrapper (use_cache=False) ---")
from src.retriever_utils import search_semantic_scholar
papers = search_semantic_scholar(first_q, limit=3, use_cache=False)
print(f" Wrapper returned: {len(papers)} papers")
for p in papers:
print(f" [{p.hybrid_score:.3f}] {p.title[:55]} ({p.year})")
# -------------------------------------------------------------------
# Full retriever agent
# -------------------------------------------------------------------
print("\nStep 2: Running full retriever agent...")
state = retriever_node(state)
print(f"\n--- Final Results ---")
print(f" Papers retrieved: {len(state['retrieved_papers'])}")
print(f" Web results: {len(state['web_results'])}")
print(f" Citation edges: {sum(len(v) for v in state['citation_graph'].values())}")
if state['retrieved_papers']:
print(f"\n Top 3 papers by hybrid score:")
for p in state['retrieved_papers'][:3]:
print(f" [{p.hybrid_score:.3f}] {p.title[:65]} ({p.year})")
if state['web_results']:
print(f"\n Sample web results:")
for r in state['web_results'][:3]:
print(f" [{r.source}] {r.title[:65]}")
print("\n✅ Phase 5 complete") |