|
|
|
|
|
""" |
|
|
Demo: Hypothesis Generation (Phase 7). |
|
|
|
|
|
This script demonstrates the REAL hypothesis generation pipeline: |
|
|
1. REAL search: PubMed + ClinicalTrials + Europe PMC (actual API calls) |
|
|
2. REAL embeddings: Semantic deduplication |
|
|
3. REAL LLM: Mechanistic hypothesis generation |
|
|
|
|
|
Usage: |
|
|
# Requires OPENAI_API_KEY or ANTHROPIC_API_KEY |
|
|
uv run python examples/hypothesis_demo/run_hypothesis.py "metformin Alzheimer's" |
|
|
uv run python examples/hypothesis_demo/run_hypothesis.py "sildenafil heart failure" |
|
|
""" |
|
|
|
|
|
import argparse |
|
|
import asyncio |
|
|
import os |
|
|
import sys |
|
|
from typing import Any |
|
|
|
|
|
from src.agents.hypothesis_agent import HypothesisAgent |
|
|
from src.services.embeddings import EmbeddingService |
|
|
from src.tools.clinicaltrials import ClinicalTrialsTool |
|
|
from src.tools.europepmc import EuropePMCTool |
|
|
from src.tools.pubmed import PubMedTool |
|
|
from src.tools.search_handler import SearchHandler |
|
|
|
|
|
|
|
|
async def run_hypothesis_demo(query: str) -> None: |
|
|
"""Run the REAL hypothesis generation pipeline.""" |
|
|
try: |
|
|
print(f"\n{'=' * 60}") |
|
|
print("DeepBoner Hypothesis Agent Demo (Phase 7)") |
|
|
print(f"Query: {query}") |
|
|
print("Mode: REAL (Live API calls)") |
|
|
print(f"{'=' * 60}\n") |
|
|
|
|
|
|
|
|
print("[Step 1] Searching PubMed + ClinicalTrials + Europe PMC...") |
|
|
search_handler = SearchHandler( |
|
|
tools=[PubMedTool(), ClinicalTrialsTool(), EuropePMCTool()], timeout=30.0 |
|
|
) |
|
|
result = await search_handler.execute(query, max_results_per_tool=5) |
|
|
|
|
|
print(f" Found {result.total_found} results from {result.sources_searched}") |
|
|
if result.errors: |
|
|
print(f" Warnings: {result.errors}") |
|
|
|
|
|
if not result.evidence: |
|
|
print("\nNo evidence found. Try a different query.") |
|
|
return |
|
|
|
|
|
|
|
|
print("\n[Step 2] Semantic deduplication...") |
|
|
embedding_service = EmbeddingService() |
|
|
unique_evidence = await embedding_service.deduplicate(result.evidence, threshold=0.85) |
|
|
print(f" {len(result.evidence)} -> {len(unique_evidence)} unique papers") |
|
|
|
|
|
|
|
|
print("\n[Evidence collected]") |
|
|
max_title_len = 50 |
|
|
for i, e in enumerate(unique_evidence[:5], 1): |
|
|
raw_title = e.citation.title |
|
|
if len(raw_title) > max_title_len: |
|
|
title = raw_title[:max_title_len] + "..." |
|
|
else: |
|
|
title = raw_title |
|
|
print(f" {i}. [{e.citation.source.upper()}] {title}") |
|
|
|
|
|
|
|
|
print("\n[Step 3] Generating mechanistic hypotheses (LLM)...") |
|
|
evidence_store: dict[str, Any] = {"current": unique_evidence, "hypotheses": []} |
|
|
agent = HypothesisAgent(evidence_store, embedding_service) |
|
|
|
|
|
print("-" * 60) |
|
|
response = await agent.run(query) |
|
|
print(response.messages[0].text) |
|
|
print("-" * 60) |
|
|
|
|
|
|
|
|
hypotheses = evidence_store.get("hypotheses", []) |
|
|
print(f"\n{len(hypotheses)} hypotheses stored") |
|
|
|
|
|
if hypotheses: |
|
|
print("\nGenerated search queries for further investigation:") |
|
|
for h in hypotheses: |
|
|
queries = h.to_search_queries() |
|
|
print(f" {h.drug} -> {h.target}:") |
|
|
for q in queries[:3]: |
|
|
print(f" - {q}") |
|
|
|
|
|
except Exception as e: |
|
|
print(f"\nβ Error during hypothesis generation: {e}") |
|
|
raise |
|
|
|
|
|
|
|
|
async def main() -> None: |
|
|
"""Entry point.""" |
|
|
parser = argparse.ArgumentParser( |
|
|
description="Hypothesis Generation Demo (REAL - No Mocks)", |
|
|
formatter_class=argparse.RawDescriptionHelpFormatter, |
|
|
epilog=""" |
|
|
Examples: |
|
|
uv run python examples/hypothesis_demo/run_hypothesis.py "metformin Alzheimer's" |
|
|
uv run python examples/hypothesis_demo/run_hypothesis.py "sildenafil heart failure" |
|
|
uv run python examples/hypothesis_demo/run_hypothesis.py "aspirin cancer prevention" |
|
|
""", |
|
|
) |
|
|
parser.add_argument( |
|
|
"query", |
|
|
nargs="?", |
|
|
default="metformin Alzheimer's disease", |
|
|
help="Research query", |
|
|
) |
|
|
args = parser.parse_args() |
|
|
|
|
|
|
|
|
if not (os.getenv("OPENAI_API_KEY") or os.getenv("ANTHROPIC_API_KEY")): |
|
|
print("=" * 60) |
|
|
print("ERROR: This demo requires a real LLM.") |
|
|
print() |
|
|
print("Set one of the following in your .env file:") |
|
|
print(" OPENAI_API_KEY=sk-...") |
|
|
print(" ANTHROPIC_API_KEY=sk-ant-...") |
|
|
print() |
|
|
print("This is a REAL demo, not a mock. No fake data.") |
|
|
print("=" * 60) |
|
|
sys.exit(1) |
|
|
|
|
|
await run_hypothesis_demo(args.query) |
|
|
|
|
|
print("\n" + "=" * 60) |
|
|
print("Demo complete! This was a REAL pipeline:") |
|
|
print(" 1. REAL search: PubMed + ClinicalTrials + Europe PMC APIs") |
|
|
print(" 2. REAL embeddings: Actual sentence-transformers") |
|
|
print(" 3. REAL LLM: Actual hypothesis generation") |
|
|
print("=" * 60 + "\n") |
|
|
|
|
|
|
|
|
if __name__ == "__main__": |
|
|
asyncio.run(main()) |
|
|
|