| import sys, os, json, time |
| sys.path.insert(0, '.') |
| from dotenv import load_dotenv |
| load_dotenv() |
|
|
| from src.agents.critic import _detect_contradictions, _check_contradiction |
| from src.agents.retriever import retriever_node |
| from src.agents.planner import planner_node |
| from src.state import ResearchState |
|
|
| |
| questions = json.load(open('eval/questions.json')) |
| cat_c = [q for q in questions if q.get('category') == 'C'] |
| test_q = cat_c[0] |
|
|
| print(f"Testing: [{test_q['id']}] {test_q['question']}") |
| print("=" * 70) |
|
|
| |
| state = { |
| "original_query": test_q["question"], |
| "session_id": "debug-001", |
| "session_context": None, |
| "sub_questions": [], |
| "retrieved_papers": [], |
| "citation_graph": {}, |
| "web_results": [], |
| "critic_verdict": "", |
| "critic_notes": "", |
| "rewritten_questions": [], |
| "retry_count": 0, |
| "synthesized_position": "", |
| "claim_confidences": [], |
| "session_update": None, |
| "export_md": "", |
| "decay_config": "linear", |
| "calibration_bin": "", |
| "latency_ms": 0.0, |
| "paper_reliability_scores": {}, |
| "reliability_dominant_signals": {}, |
| "is_followup": False, |
| "previous_papers": [], |
| "previous_verdict": "", |
| "previous_claims": [], |
| "turn_number": 1, |
| "explore_mode": False, |
| "explorer_findings": [], |
| } |
|
|
| print("Step 1: Running planner...") |
| state = planner_node(state) |
| print(f"Sub-questions: {state.get('sub_questions', [])}") |
|
|
| print("\nStep 2: Running retriever...") |
| state = retriever_node(state) |
| papers = state.get("retrieved_papers", []) |
| print(f"Papers retrieved: {len(papers)}") |
| print("\nTop 6 papers:") |
| for i, p in enumerate(papers[:6]): |
| print(f" [{i+1}] {p.year} | citations={p.citation_count} | score={p.hybrid_score:.3f}") |
| print(f" {p.title[:80]}") |
|
|
| print("\nStep 3: Running _detect_contradictions on top 4 papers...") |
| top4 = papers[:4] |
| print("Paper pairs being checked (must have >= 2 year gap):") |
| checked = 0 |
| for i, pa in enumerate(top4): |
| for pb in top4[i+1:]: |
| gap = abs((pa.year or 0) - (pb.year or 0)) |
| print(f" Pair ({i},{i+1}): {pa.year} vs {pb.year} -- gap={gap}yrs -- {'WILL CHECK' if gap >= 2 else 'SKIPPED (gap < 2)'}") |
| if gap >= 2: |
| checked += 1 |
| older = pa if (pa.year or 0) < (pb.year or 0) else pb |
| newer = pa if (pa.year or 0) > (pb.year or 0) else pb |
| print(f" older: [{older.year}] {older.title[:60]}") |
| print(f" newer: [{newer.year}] {newer.title[:60]}") |
| print(f" older abstract: {(older.abstract or 'NONE')[:200]}") |
| print(f" newer abstract: {(newer.abstract or 'NONE')[:200]}") |
| result, reason = _check_contradiction(older, newer) |
| print(f" LLM result: contradicts={result}, reason={reason}") |
| print() |
|
|
| print(f"\nTotal pairs actually LLM-checked: {checked}") |
| contradictions = _detect_contradictions(papers) |
| print(f"Final _detect_contradictions output: {len(contradictions)} contradictions found") |
|
|