File size: 3,041 Bytes
10f9a75
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
import sys, os, json, time
sys.path.insert(0, '.')
from dotenv import load_dotenv
load_dotenv()

from src.agents.critic import _detect_contradictions, _check_contradiction
from src.agents.retriever import retriever_node
from src.agents.planner import planner_node
from src.state import ResearchState

# Load one Cat C question
questions = json.load(open('eval/questions.json'))
cat_c = [q for q in questions if q.get('category') == 'C']
test_q = cat_c[0]

print(f"Testing: [{test_q['id']}] {test_q['question']}")
print("=" * 70)

# Build minimal state and run planner + retriever
state = {
    "original_query": test_q["question"],
    "session_id": "debug-001",
    "session_context": None,
    "sub_questions": [],
    "retrieved_papers": [],
    "citation_graph": {},
    "web_results": [],
    "critic_verdict": "",
    "critic_notes": "",
    "rewritten_questions": [],
    "retry_count": 0,
    "synthesized_position": "",
    "claim_confidences": [],
    "session_update": None,
    "export_md": "",
    "decay_config": "linear",
    "calibration_bin": "",
    "latency_ms": 0.0,
    "paper_reliability_scores": {},
    "reliability_dominant_signals": {},
    "is_followup": False,
    "previous_papers": [],
    "previous_verdict": "",
    "previous_claims": [],
    "turn_number": 1,
    "explore_mode": False,
    "explorer_findings": [],
}

print("Step 1: Running planner...")
state = planner_node(state)
print(f"Sub-questions: {state.get('sub_questions', [])}")

print("\nStep 2: Running retriever...")
state = retriever_node(state)
papers = state.get("retrieved_papers", [])
print(f"Papers retrieved: {len(papers)}")
print("\nTop 6 papers:")
for i, p in enumerate(papers[:6]):
    print(f"  [{i+1}] {p.year} | citations={p.citation_count} | score={p.hybrid_score:.3f}")
    print(f"       {p.title[:80]}")

print("\nStep 3: Running _detect_contradictions on top 4 papers...")
top4 = papers[:4]
print("Paper pairs being checked (must have >= 2 year gap):")
checked = 0
for i, pa in enumerate(top4):
    for pb in top4[i+1:]:
        gap = abs((pa.year or 0) - (pb.year or 0))
        print(f"  Pair ({i},{i+1}): {pa.year} vs {pb.year} -- gap={gap}yrs -- {'WILL CHECK' if gap >= 2 else 'SKIPPED (gap < 2)'}")
        if gap >= 2:
            checked += 1
            older = pa if (pa.year or 0) < (pb.year or 0) else pb
            newer = pa if (pa.year or 0) > (pb.year or 0) else pb
            print(f"    older: [{older.year}] {older.title[:60]}")
            print(f"    newer: [{newer.year}] {newer.title[:60]}")
            print(f"    older abstract: {(older.abstract or 'NONE')[:200]}")
            print(f"    newer abstract: {(newer.abstract or 'NONE')[:200]}")
            result, reason = _check_contradiction(older, newer)
            print(f"    LLM result: contradicts={result}, reason={reason}")
            print()

print(f"\nTotal pairs actually LLM-checked: {checked}")
contradictions = _detect_contradictions(papers)
print(f"Final _detect_contradictions output: {len(contradictions)} contradictions found")