File size: 5,124 Bytes
0efdc2f
 
 
 
cfb473d
9d21bf8
cfb473d
 
0efdc2f
 
 
fa696e8
 
0efdc2f
 
 
 
 
 
 
 
 
cfb473d
8625ded
2f8ae1f
cfb473d
 
0efdc2f
 
 
cfb473d
cd11dad
0a480cb
5d12635
cd11dad
 
0a480cb
cd11dad
 
9d21bf8
8625ded
2f8ae1f
8625ded
cd11dad
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
0efdc2f
 
 
cfb473d
 
 
 
 
 
fa696e8
 
 
cfb473d
 
0efdc2f
 
 
fa696e8
cfb473d
0efdc2f
 
 
cfb473d
0efdc2f
cfb473d
 
 
 
 
 
 
 
 
0efdc2f
 
 
 
 
cfb473d
9d21bf8
cfb473d
 
0efdc2f
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
#!/usr/bin/env python3
"""
Demo: Hypothesis Generation (Phase 7).

This script demonstrates the REAL hypothesis generation pipeline:
1. REAL search: PubMed + ClinicalTrials + Europe PMC (actual API calls)
2. REAL embeddings: Semantic deduplication
3. REAL LLM: Mechanistic hypothesis generation

Usage:
    # Requires OPENAI_API_KEY or ANTHROPIC_API_KEY
    uv run python examples/hypothesis_demo/run_hypothesis.py "testosterone libido"
    uv run python examples/hypothesis_demo/run_hypothesis.py "sildenafil erectile dysfunction"
"""

import argparse
import asyncio
import os
import sys
from typing import Any

from src.agents.hypothesis_agent import HypothesisAgent
from src.services.embeddings import EmbeddingService
from src.tools.clinicaltrials import ClinicalTrialsTool
from src.tools.europepmc import EuropePMCTool
from src.tools.pubmed import PubMedTool
from src.tools.search_handler import SearchHandler


async def run_hypothesis_demo(query: str) -> None:
    """Run the REAL hypothesis generation pipeline."""
    try:
        print(f"\n{'=' * 60}")
        print("DeepBoner Hypothesis Agent Demo (Phase 7)")
        print(f"Query: {query}")
        print("Mode: REAL (Live API calls)")
        print(f"{'=' * 60}\n")

        # Step 1: REAL Search
        print("[Step 1] Searching PubMed + ClinicalTrials + Europe PMC...")
        search_handler = SearchHandler(
            tools=[PubMedTool(), ClinicalTrialsTool(), EuropePMCTool()], timeout=30.0
        )
        result = await search_handler.execute(query, max_results_per_tool=5)

        print(f"  Found {result.total_found} results from {result.sources_searched}")
        if result.errors:
            print(f"  Warnings: {result.errors}")

        if not result.evidence:
            print("\nNo evidence found. Try a different query.")
            return

        # Step 2: REAL Embeddings - Deduplicate
        print("\n[Step 2] Semantic deduplication...")
        embedding_service = EmbeddingService()
        unique_evidence = await embedding_service.deduplicate(result.evidence, threshold=0.85)
        print(f"  {len(result.evidence)} -> {len(unique_evidence)} unique papers")

        # Show what we found
        print("\n[Evidence collected]")
        max_title_len = 50
        for i, e in enumerate(unique_evidence[:5], 1):
            raw_title = e.citation.title
            if len(raw_title) > max_title_len:
                title = raw_title[:max_title_len] + "..."
            else:
                title = raw_title
            print(f"  {i}. [{e.citation.source.upper()}] {title}")

        # Step 3: REAL LLM - Generate hypotheses
        print("\n[Step 3] Generating mechanistic hypotheses (LLM)...")
        evidence_store: dict[str, Any] = {"current": unique_evidence, "hypotheses": []}
        agent = HypothesisAgent(evidence_store, embedding_service)

        print("-" * 60)
        response = await agent.run(query)
        print(response.messages[0].text)
        print("-" * 60)

        # Show stored hypotheses
        hypotheses = evidence_store.get("hypotheses", [])
        print(f"\n{len(hypotheses)} hypotheses stored")

        if hypotheses:
            print("\nGenerated search queries for further investigation:")
            for h in hypotheses:
                queries = h.to_search_queries()
                print(f"  {h.drug} -> {h.target}:")
                for q in queries[:3]:
                    print(f"    - {q}")

    except Exception as e:
        print(f"\n❌ Error during hypothesis generation: {e}")
        raise


async def main() -> None:
    """Entry point."""
    parser = argparse.ArgumentParser(
        description="Hypothesis Generation Demo (REAL - No Mocks)",
        formatter_class=argparse.RawDescriptionHelpFormatter,
        epilog="""
Examples:
    uv run python examples/hypothesis_demo/run_hypothesis.py "testosterone libido"
    uv run python examples/hypothesis_demo/run_hypothesis.py "sildenafil erectile dysfunction"
    uv run python examples/hypothesis_demo/run_hypothesis.py "flibanserin mechanism"
        """,
    )
    parser.add_argument(
        "query",
        nargs="?",
        default="testosterone libido",
        help="Research query",
    )
    args = parser.parse_args()

    # Fail fast: require API key
    if not (os.getenv("OPENAI_API_KEY") or os.getenv("ANTHROPIC_API_KEY")):
        print("=" * 60)
        print("ERROR: This demo requires a real LLM.")
        print()
        print("Set one of the following in your .env file:")
        print("  OPENAI_API_KEY=sk-...")
        print("  ANTHROPIC_API_KEY=sk-ant-...")
        print()
        print("This is a REAL demo, not a mock. No fake data.")
        print("=" * 60)
        sys.exit(1)

    await run_hypothesis_demo(args.query)

    print("\n" + "=" * 60)
    print("Demo complete! This was a REAL pipeline:")
    print("  1. REAL search: PubMed + ClinicalTrials + Europe PMC APIs")
    print("  2. REAL embeddings: Actual sentence-transformers")
    print("  3. REAL LLM: Actual hypothesis generation")
    print("=" * 60 + "\n")


if __name__ == "__main__":
    asyncio.run(main())