diff --git a/.gitignore b/.gitignore index 1c9b9c8a87bc267d0b7a0290874c0c9188f9a842..084126f0197830b515598eecd07a82d90caec1c0 100644 --- a/.gitignore +++ b/.gitignore @@ -49,6 +49,7 @@ reference_repos/claude-agent-sdk/ reference_repos/pydanticai-research-agent/ reference_repos/pubmed-mcp-server/ reference_repos/DeepCritical/ +reference_repos/GradioDemo/ # Keep the README in reference_repos !reference_repos/README.md diff --git a/AGENTS.md b/AGENTS.md index 7f31327c393611d2f8eb5ec6f883f64967a5f116..cc5a89dd31eab9d158798b1f7f971460e1ea3895 100644 --- a/AGENTS.md +++ b/AGENTS.md @@ -4,7 +4,7 @@ This file provides guidance to AI agents when working with code in this reposito ## Project Overview -DeepCritical is an AI-native drug repurposing research agent for a HuggingFace hackathon. It uses a search-and-judge loop to autonomously search biomedical databases (PubMed, ClinicalTrials.gov, bioRxiv) and synthesize evidence for queries like "What existing drugs might help treat long COVID fatigue?". +DeepBoner is an AI-native sexual health research agent. It uses a search-and-judge loop to autonomously search biomedical databases (PubMed, ClinicalTrials.gov, Europe PMC) and synthesize evidence for queries like "What drugs improve female libido post-menopause?" or "Evidence for testosterone therapy in women with HSDD?". **Current Status:** Phases 1-13 COMPLETE (Foundation through Modal sandbox integration). @@ -39,7 +39,7 @@ uv run pytest -m integration User Question → Orchestrator ↓ Search Loop: - 1. Query PubMed, ClinicalTrials.gov, bioRxiv + 1. Query PubMed, ClinicalTrials.gov, Europe PMC 2. Gather evidence 3. Judge quality ("Do we have enough?") 4. If NO → Refine query, search more @@ -53,7 +53,7 @@ Research Report with Citations - `src/orchestrator.py` - Main agent loop - `src/tools/pubmed.py` - PubMed E-utilities search - `src/tools/clinicaltrials.py` - ClinicalTrials.gov API -- `src/tools/biorxiv.py` - bioRxiv/medRxiv preprint search +- `src/tools/europepmc.py` - Europe PMC search - `src/tools/code_execution.py` - Modal sandbox execution - `src/tools/search_handler.py` - Scatter-gather orchestration - `src/services/embeddings.py` - Semantic search & deduplication (ChromaDB) @@ -82,7 +82,7 @@ Settings via pydantic-settings from `.env`: ## Exception Hierarchy ```text -DeepCriticalError (base) +DeepBonerError (base) ├── SearchError │ └── RateLimitError ├── JudgeError diff --git a/CLAUDE.md b/CLAUDE.md index 9978674c6b20cb6b96a4780e4feb85296dff3d75..7e6570cca7a7c5d143db99786655efd9bbd6e94b 100644 --- a/CLAUDE.md +++ b/CLAUDE.md @@ -4,7 +4,7 @@ This file provides guidance to Claude Code (claude.ai/code) when working with co ## Project Overview -DeepCritical is an AI-native drug repurposing research agent for a HuggingFace hackathon. It uses a search-and-judge loop to autonomously search biomedical databases (PubMed, ClinicalTrials.gov, bioRxiv) and synthesize evidence for queries like "What existing drugs might help treat long COVID fatigue?". +DeepBoner is an AI-native sexual health research agent. It uses a search-and-judge loop to autonomously search biomedical databases (PubMed, ClinicalTrials.gov, Europe PMC) and synthesize evidence for queries like "What drugs improve female libido post-menopause?" or "Evidence for testosterone therapy in women with HSDD?". **Current Status:** Phases 1-13 COMPLETE (Foundation through Modal sandbox integration). @@ -39,7 +39,7 @@ uv run pytest -m integration User Question → Orchestrator ↓ Search Loop: - 1. Query PubMed, ClinicalTrials.gov, bioRxiv + 1. Query PubMed, ClinicalTrials.gov, Europe PMC 2. Gather evidence 3. Judge quality ("Do we have enough?") 4. If NO → Refine query, search more @@ -53,7 +53,7 @@ Research Report with Citations - `src/orchestrator.py` - Main agent loop - `src/tools/pubmed.py` - PubMed E-utilities search - `src/tools/clinicaltrials.py` - ClinicalTrials.gov API -- `src/tools/biorxiv.py` - bioRxiv/medRxiv preprint search +- `src/tools/europepmc.py` - Europe PMC search - `src/tools/code_execution.py` - Modal sandbox execution - `src/tools/search_handler.py` - Scatter-gather orchestration - `src/services/embeddings.py` - Semantic search & deduplication (ChromaDB) @@ -82,7 +82,7 @@ Settings via pydantic-settings from `.env`: ## Exception Hierarchy ```text -DeepCriticalError (base) +DeepBonerError (base) ├── SearchError │ └── RateLimitError ├── JudgeError diff --git a/Dockerfile b/Dockerfile index 9d6fc14dce9d1bdbc102a1479304490324313167..e32960ace2c91e8f8faf004729b5cb4950349933 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,4 +1,4 @@ -# Dockerfile for DeepCritical +# Dockerfile for DeepBoner FROM python:3.11-slim # Set working directory diff --git a/GEMINI.md b/GEMINI.md index d22bdc9758c074cc3c759bbf126c0c80f924e6bd..1424b37e18b6ad99304dac3dc140ccffceb19581 100644 --- a/GEMINI.md +++ b/GEMINI.md @@ -1,9 +1,9 @@ -# DeepCritical Context +# DeepBoner Context ## Project Overview -**DeepCritical** is an AI-native Medical Drug Repurposing Research Agent. -**Goal:** To accelerate the discovery of new uses for existing drugs by intelligently searching biomedical literature (PubMed, ClinicalTrials.gov, bioRxiv), evaluating evidence, and hypothesizing potential applications. +**DeepBoner** is an AI-native Sexual Health Research Agent. +**Goal:** To accelerate research into sexual health, wellness, and reproductive medicine by intelligently searching biomedical literature (PubMed, ClinicalTrials.gov, Europe PMC), evaluating evidence, and synthesizing findings. **Architecture:** The project follows a **Vertical Slice Architecture** (Search -> Judge -> Orchestrator) and adheres to **Strict TDD** (Test-Driven Development). @@ -11,7 +11,7 @@ The project follows a **Vertical Slice Architecture** (Search -> Judge -> Orches **Current Status:** - **Phases 1-9:** COMPLETE. Foundation, Search, Judge, UI, Orchestrator, Embeddings, Hypothesis, Report, Cleanup. -- **Phases 10-11:** COMPLETE. ClinicalTrials.gov and bioRxiv integration. +- **Phases 10-11:** COMPLETE. ClinicalTrials.gov and Europe PMC integration. - **Phase 12:** COMPLETE. MCP Server integration (Gradio MCP at `/gradio_api/mcp/`). - **Phase 13:** COMPLETE. Modal sandbox for statistical analysis. @@ -41,7 +41,7 @@ The project follows a **Vertical Slice Architecture** (Search -> Judge -> Orches - `src/`: Source code - `utils/`: Shared utilities (`config.py`, `exceptions.py`, `models.py`) - - `tools/`: Search tools (`pubmed.py`, `clinicaltrials.py`, `biorxiv.py`, `code_execution.py`) + - `tools/`: Search tools (`pubmed.py`, `clinicaltrials.py`, `europepmc.py`, `code_execution.py`) - `services/`: Services (`embeddings.py`, `statistical_analyzer.py`) - `agents/`: Magentic multi-agent mode agents - `agent_factory/`: Agent definitions (judges, prompts) @@ -58,7 +58,7 @@ The project follows a **Vertical Slice Architecture** (Search -> Judge -> Orches - `src/orchestrator.py` - Main agent loop - `src/tools/pubmed.py` - PubMed E-utilities search - `src/tools/clinicaltrials.py` - ClinicalTrials.gov API -- `src/tools/biorxiv.py` - bioRxiv/medRxiv preprint search +- `src/tools/europepmc.py` - Europe PMC search - `src/tools/code_execution.py` - Modal sandbox execution - `src/services/statistical_analyzer.py` - Statistical analysis via Modal - `src/mcp_tools.py` - MCP tool wrappers diff --git a/README.md b/README.md index dd4f0e3822f9ed4143414983003e7e12f71d76aa..a4b8ec060613b26fe87481f63061125994708451 100644 --- a/README.md +++ b/README.md @@ -1,7 +1,7 @@ --- -title: DeepCritical -emoji: 🧬 -colorFrom: blue +title: DeepBoner +emoji: 🍆 +colorFrom: pink colorTo: purple sdk: gradio sdk_version: "6.0.1" @@ -10,26 +10,37 @@ app_file: src/app.py pinned: false license: mit tags: - - mcp-in-action-track-enterprise + - sexual-health + - reproductive-medicine + - hormone-therapy + - wellness-research - mcp-hackathon - - drug-repurposing - - biomedical-ai - pydantic-ai - llamaindex - modal --- -# DeepCritical +# DeepBoner 🍆 -AI-Powered Drug Repurposing Research Agent +AI-Native Sexual Health Research Agent + +Deep research for sexual wellness, ED treatments, hormone therapy, libido, and reproductive health - for all genders. ## Features -- **Multi-Source Search**: PubMed, ClinicalTrials.gov, bioRxiv/medRxiv +- **Multi-Source Search**: PubMed, ClinicalTrials.gov, Europe PMC - **MCP Integration**: Use our tools from Claude Desktop or any MCP client - **Modal Sandbox**: Secure execution of AI-generated statistical code - **LlamaIndex RAG**: Semantic search and evidence synthesis +## Example Queries + +- "What drugs improve female libido post-menopause?" +- "Clinical trials for erectile dysfunction alternatives to PDE5 inhibitors?" +- "Evidence for testosterone therapy in women with HSDD?" +- "Drug interactions with sildenafil?" +- "What's the latest research on flibanserin efficacy?" + ## Quick Start ### 1. Environment Setup @@ -62,7 +73,7 @@ Add this to your `claude_desktop_config.json`: ```json { "mcpServers": { - "deepcritical": { + "deepboner": { "url": "http://localhost:7860/gradio_api/mcp/" } } @@ -72,7 +83,7 @@ Add this to your `claude_desktop_config.json`: **Available Tools**: - `search_pubmed`: Search peer-reviewed biomedical literature. - `search_clinical_trials`: Search ClinicalTrials.gov. -- `search_biorxiv`: Search bioRxiv/medRxiv preprints. +- `search_europepmc`: Search Europe PMC preprints and papers. - `search_all`: Search all sources simultaneously. - `analyze_hypothesis`: Secure statistical analysis using Modal sandboxes. @@ -92,16 +103,16 @@ make check ## Architecture -DeepCritical uses a Vertical Slice Architecture: +DeepBoner uses a Vertical Slice Architecture: -1. **Search Slice**: Retrieving evidence from PubMed, ClinicalTrials.gov, and bioRxiv. +1. **Search Slice**: Retrieving evidence from PubMed, ClinicalTrials.gov, and Europe PMC. 2. **Judge Slice**: Evaluating evidence quality using LLMs. 3. **Orchestrator Slice**: Managing the research loop and UI. Built with: - **PydanticAI**: For robust agent interactions. - **Gradio**: For the streaming user interface. -- **PubMed, ClinicalTrials.gov, bioRxiv**: For biomedical data. +- **PubMed, ClinicalTrials.gov, Europe PMC**: For biomedical data. - **MCP**: For universal tool access. - **Modal**: For secure code execution. @@ -110,8 +121,7 @@ Built with: - The-Obstacle-Is-The-Way - MarioAderman - EmployeeNo427 -- Josephrp *(provided initial template)* ## Links -- [GitHub Repository](https://github.com/The-Obstacle-Is-The-Way/DeepCritical-1) \ No newline at end of file +- [GitHub Repository](https://github.com/The-Obstacle-Is-The-Way/DeepBoner) diff --git a/docs/architecture/design-patterns.md b/docs/architecture/design-patterns.md index 3fff9a0ce1dc7be118c9b328ee06c43f3445c3a6..5a29f2f53610b87e2edd5d1a9b7dfaf6b630575b 100644 --- a/docs/architecture/design-patterns.md +++ b/docs/architecture/design-patterns.md @@ -726,7 +726,7 @@ If evidence is weak, say so clearly.""" **Architecture**: ``` ┌─────────────────────────────────────────────────┐ -│ DeepCritical Agent │ +│ DeepBoner Agent │ │ (uses tools directly OR via MCP) │ └─────────────────────────────────────────────────┘ │ @@ -811,7 +811,7 @@ uvx fastmcp run src/mcp_servers/pubmed_server.py "pubmed": { "command": "python", "args": ["-m", "src.mcp_servers.pubmed_server"], - "cwd": "/path/to/deepcritical" + "cwd": "/path/to/deepboner" } } } @@ -865,7 +865,7 @@ def research_with_streaming(question: str) -> Generator[str, None, None]: # Gradio 5 UI with gr.Blocks(theme=gr.themes.Soft()) as demo: - gr.Markdown("# 🔬 DeepCritical: Drug Repurposing Research Agent") + gr.Markdown("# 🔬 DeepBoner: Drug Repurposing Research Agent") gr.Markdown("Ask a question about potential drug repurposing opportunities.") with gr.Row(): diff --git a/docs/architecture/overview.md b/docs/architecture/overview.md index 59467f2896848a2f8e5a9a503e5713bdd5e0d977..a4c706da617586e5f7d0e0cbbdac574a80a1213d 100644 --- a/docs/architecture/overview.md +++ b/docs/architecture/overview.md @@ -1,11 +1,11 @@ -# DeepCritical: Medical Drug Repurposing Research Agent +# DeepBoner: Medical Drug Repurposing Research Agent ## Project Overview --- ## Executive Summary -**DeepCritical** is a deep research agent designed to accelerate medical drug repurposing research by autonomously searching, analyzing, and synthesizing evidence from multiple biomedical databases. +**DeepBoner** is a deep research agent designed to accelerate medical drug repurposing research by autonomously searching, analyzing, and synthesizing evidence from multiple biomedical databases. ### The Problem We Solve @@ -16,7 +16,7 @@ Drug repurposing - finding new therapeutic uses for existing FDA-approved drugs - Assess safety profiles - Synthesize evidence into actionable insights -**DeepCritical automates this process from hours to minutes.** +**DeepBoner automates this process from hours to minutes.** ### What Is Drug Repurposing? diff --git a/docs/brainstorming/00_ROADMAP_SUMMARY.md b/docs/brainstorming/00_ROADMAP_SUMMARY.md index a67ae6741e446c774485534d2d6a2278d9b44686..13d1fe56b5dd1fe6609a0da765936a9304b50ca4 100644 --- a/docs/brainstorming/00_ROADMAP_SUMMARY.md +++ b/docs/brainstorming/00_ROADMAP_SUMMARY.md @@ -1,4 +1,4 @@ -# DeepCritical Data Sources: Roadmap Summary +# DeepBoner Data Sources: Roadmap Summary **Created**: 2024-11-27 **Purpose**: Future maintainability and hackathon continuation @@ -131,7 +131,7 @@ Keep current architecture working, add OpenAlex incrementally. ``` 2. **Copy OpenAlex tool from reference repo** - - File: `reference_repos/DeepCritical/DeepResearch/src/tools/openalex_tools.py` + - File: `reference_repos/DeepBoner/DeepResearch/src/tools/openalex_tools.py` - Adapt to our `SearchTool` base class 3. **Enable NCBI API Key** @@ -189,6 +189,6 @@ If you're picking this up after the hackathon: 1. **Start with OpenAlex** - biggest bang for buck 2. **Add rate limiting** - prevents API blocks 3. **Don't bother with bioRxiv** - use Europe PMC instead -4. **Reference repo is gold** - `reference_repos/DeepCritical/` has working implementations +4. **Reference repo is gold** - `reference_repos/DeepBoner/` has working implementations Good luck! 🚀 diff --git a/docs/brainstorming/01_PUBMED_IMPROVEMENTS.md b/docs/brainstorming/01_PUBMED_IMPROVEMENTS.md index 6142e17b227eccca82eba26235de9d1e1f4f03b6..cc096b97cfef3dfe519ceb42e67955bc178400e2 100644 --- a/docs/brainstorming/01_PUBMED_IMPROVEMENTS.md +++ b/docs/brainstorming/01_PUBMED_IMPROVEMENTS.md @@ -24,9 +24,9 @@ --- -## Reference Implementation (DeepCritical Reference Repo) +## Reference Implementation (DeepBoner Reference Repo) -The reference repo at `reference_repos/DeepCritical/DeepResearch/src/tools/bioinformatics_tools.py` has a more sophisticated implementation: +The reference repo at `reference_repos/DeepBoner/DeepResearch/src/tools/bioinformatics_tools.py` has a more sophisticated implementation: ### Features We're Missing diff --git a/docs/brainstorming/03_EUROPEPMC_IMPROVEMENTS.md b/docs/brainstorming/03_EUROPEPMC_IMPROVEMENTS.md index dfec6cb16ac9d0539b43153e8c12fab206bb3009..93f4a885dbec1762b0397a481a8cb3ca5eac90c2 100644 --- a/docs/brainstorming/03_EUROPEPMC_IMPROVEMENTS.md +++ b/docs/brainstorming/03_EUROPEPMC_IMPROVEMENTS.md @@ -182,7 +182,7 @@ Europe PMC is more generous than NCBI: # Recommend: 10-20 requests/second max # Use email in User-Agent for polite pool headers = { - "User-Agent": "DeepCritical/1.0 (mailto:your@email.com)" + "User-Agent": "DeepBoner/1.0 (mailto:your@email.com)" } ``` diff --git a/docs/brainstorming/04_OPENALEX_INTEGRATION.md b/docs/brainstorming/04_OPENALEX_INTEGRATION.md index 3a191e4ed7945003128e15ef866ddfc9a2873568..8f1aeaf84a9f28f3cc34c9e8e8523fc254ee268e 100644 --- a/docs/brainstorming/04_OPENALEX_INTEGRATION.md +++ b/docs/brainstorming/04_OPENALEX_INTEGRATION.md @@ -2,7 +2,7 @@ **Status**: NOT Implemented (Candidate for Addition) **Priority**: HIGH - Could Replace Multiple Tools -**Reference**: Already implemented in `reference_repos/DeepCritical` +**Reference**: Already implemented in `reference_repos/DeepBoner` --- @@ -20,7 +20,7 @@ OpenAlex is a **fully open** index of the global research system: --- -## Why OpenAlex for DeepCritical? +## Why OpenAlex for DeepBoner? ### Current Architecture @@ -60,7 +60,7 @@ Orchestrator (enrich with CT.gov for trials) ## Reference Implementation -From `reference_repos/DeepCritical/DeepResearch/src/tools/openalex_tools.py`: +From `reference_repos/DeepBoner/DeepResearch/src/tools/openalex_tools.py`: ```python class OpenAlexFetchTool(ToolRunner): @@ -212,7 +212,7 @@ class OpenAlexTool(SearchTool): "filter": "type:article,is_oa:true", "sort": "cited_by_count:desc", "per_page": max_results, - "mailto": "deepcritical@example.com", # Polite pool + "mailto": "deepboner@example.com", # Polite pool }, ) data = resp.json() diff --git a/docs/brainstorming/implementation/15_PHASE_OPENALEX.md b/docs/brainstorming/implementation/15_PHASE_OPENALEX.md index 9fb3afcc752cb37d22bd6c31a3412b4cb002df30..007b50a28e7a61557b2f54fabb927d26c2dd053b 100644 --- a/docs/brainstorming/implementation/15_PHASE_OPENALEX.md +++ b/docs/brainstorming/implementation/15_PHASE_OPENALEX.md @@ -305,7 +305,7 @@ class OpenAlexTool: Args: email: Optional email for polite pool (faster responses) """ - self.email = email or "deepcritical@example.com" + self.email = email or "deepboner@example.com" @property def name(self) -> str: diff --git a/docs/brainstorming/magentic-pydantic/00_SITUATION_AND_PLAN.md b/docs/brainstorming/magentic-pydantic/00_SITUATION_AND_PLAN.md index 77c443ae9f605904d9c55de3a729e4c06ac3f226..228a1cec78eb022508386bd13bc13cfa949f832b 100644 --- a/docs/brainstorming/magentic-pydantic/00_SITUATION_AND_PLAN.md +++ b/docs/brainstorming/magentic-pydantic/00_SITUATION_AND_PLAN.md @@ -167,7 +167,7 @@ The refactor branch (`feat/pubmed-fulltext`) has some valuable improvements: ## 9. Questions to Answer Before Proceeding 1. **For the hackathon**: Do we need full multi-agent orchestration, or is single-agent sufficient? -2. **For DeepCritical mainline**: Is the plan to use Microsoft Agent Framework for orchestration? +2. **For DeepBoner mainline**: Is the plan to use Microsoft Agent Framework for orchestration? 3. **Timeline**: How much time do we have to get this right? --- diff --git a/docs/brainstorming/magentic-pydantic/REVIEW_PROMPT_FOR_SENIOR_AGENT.md b/docs/brainstorming/magentic-pydantic/REVIEW_PROMPT_FOR_SENIOR_AGENT.md index 9f25b1f52a79193a28d4d5f9029cdfece1928be5..1239cb298d6c96b6e10d854ab8e8a06a6285a607 100644 --- a/docs/brainstorming/magentic-pydantic/REVIEW_PROMPT_FOR_SENIOR_AGENT.md +++ b/docs/brainstorming/magentic-pydantic/REVIEW_PROMPT_FOR_SENIOR_AGENT.md @@ -6,7 +6,7 @@ Copy and paste everything below this line to a fresh Claude/AI session: ## Context -I am a junior developer working on a HuggingFace hackathon project called DeepCritical. We made a significant architectural mistake and are now trying to course-correct. I need you to act as a **senior staff engineer** and critically review our proposed solution. +I am a junior developer working on a HuggingFace hackathon project called DeepBoner. We made a significant architectural mistake and are now trying to course-correct. I need you to act as a **senior staff engineer** and critically review our proposed solution. ## The Situation @@ -62,28 +62,28 @@ Please perform a **deep, critical review** of: Please read these files in order: -1. `/Users/ray/Desktop/CLARITY-DIGITAL-TWIN/DeepCritical-1/docs/brainstorming/magentic-pydantic/00_SITUATION_AND_PLAN.md` -2. `/Users/ray/Desktop/CLARITY-DIGITAL-TWIN/DeepCritical-1/docs/brainstorming/magentic-pydantic/01_ARCHITECTURE_SPEC.md` -3. `/Users/ray/Desktop/CLARITY-DIGITAL-TWIN/DeepCritical-1/docs/brainstorming/magentic-pydantic/02_IMPLEMENTATION_PHASES.md` -4. `/Users/ray/Desktop/CLARITY-DIGITAL-TWIN/DeepCritical-1/docs/brainstorming/magentic-pydantic/03_IMMEDIATE_ACTIONS.md` +1. `/Users/ray/Desktop/CLARITY-DIGITAL-TWIN/DeepBoner-1/docs/brainstorming/magentic-pydantic/00_SITUATION_AND_PLAN.md` +2. `/Users/ray/Desktop/CLARITY-DIGITAL-TWIN/DeepBoner-1/docs/brainstorming/magentic-pydantic/01_ARCHITECTURE_SPEC.md` +3. `/Users/ray/Desktop/CLARITY-DIGITAL-TWIN/DeepBoner-1/docs/brainstorming/magentic-pydantic/02_IMPLEMENTATION_PHASES.md` +4. `/Users/ray/Desktop/CLARITY-DIGITAL-TWIN/DeepBoner-1/docs/brainstorming/magentic-pydantic/03_IMMEDIATE_ACTIONS.md` And the architecture diagram: -5. `/Users/ray/Desktop/CLARITY-DIGITAL-TWIN/DeepCritical-1/assets/magentic-pydantic.png` +5. `/Users/ray/Desktop/CLARITY-DIGITAL-TWIN/DeepBoner-1/assets/magentic-pydantic.png` ## Reference Repositories to Consult We have local clones of the source-of-truth repositories: -- **Original DeepCritical:** `/Users/ray/Desktop/CLARITY-DIGITAL-TWIN/DeepCritical-1/reference_repos/DeepCritical/` -- **Microsoft Agent Framework:** `/Users/ray/Desktop/CLARITY-DIGITAL-TWIN/DeepCritical-1/reference_repos/agent-framework/` -- **Microsoft AutoGen:** `/Users/ray/Desktop/CLARITY-DIGITAL-TWIN/DeepCritical-1/reference_repos/autogen-microsoft/` +- **Original DeepBoner:** `/Users/ray/Desktop/CLARITY-DIGITAL-TWIN/DeepBoner-1/reference_repos/DeepBoner/` +- **Microsoft Agent Framework:** `/Users/ray/Desktop/CLARITY-DIGITAL-TWIN/DeepBoner-1/reference_repos/agent-framework/` +- **Microsoft AutoGen:** `/Users/ray/Desktop/CLARITY-DIGITAL-TWIN/DeepBoner-1/reference_repos/autogen-microsoft/` Please cross-reference our hackathon fork against these to verify architectural alignment. ## Codebase to Analyze Our hackathon fork is at: -`/Users/ray/Desktop/CLARITY-DIGITAL-TWIN/DeepCritical-1/` +`/Users/ray/Desktop/CLARITY-DIGITAL-TWIN/DeepBoner-1/` Key files to examine: - `src/agents/` - Agent framework integration diff --git a/docs/bugs/P1_GRADIO_SETTINGS_CLEANUP.md b/docs/bugs/P1_GRADIO_SETTINGS_CLEANUP.md index 7197b1ec4ef09ea29b98cc447994264e6b4b0f54..a7d7c01574dd3c6ce679fa5a42c2a5551fe4ed51 100644 --- a/docs/bugs/P1_GRADIO_SETTINGS_CLEANUP.md +++ b/docs/bugs/P1_GRADIO_SETTINGS_CLEANUP.md @@ -55,7 +55,7 @@ def create_demo(): def create_demo(): return gr.ChatInterface( # <--- FIX: Top-level component ..., - title="🧬 DeepCritical", + title="🧬 DeepBoner", description="*AI-Powered Drug Repurposing Agent...*\n\n---\n**MCP Server Active**...", additional_inputs_accordion=gr.Accordion(label="⚙️ Settings", open=False) ) @@ -69,7 +69,7 @@ def create_demo(): 2. **Check**: Open `http://localhost:7860` 3. **Verify**: * Settings accordion starts **COLLAPSED**. - * Header title ("DeepCritical") is visible. + * Header title ("DeepBoner") is visible. * Footer text ("MCP Server Active") is visible in the description area. * Chat functionality works (Magentic/Simple modes). diff --git a/docs/development/testing.md b/docs/development/testing.md index 47c8c32d6a96ebfc01ed9c54627e6287b5c0e722..3bcb62c1f9a4bdc2af1360c5adda48ffb5f4e8f1 100644 --- a/docs/development/testing.md +++ b/docs/development/testing.md @@ -1,5 +1,5 @@ # Testing Strategy -## ensuring DeepCritical is Ironclad +## ensuring DeepBoner is Ironclad --- diff --git a/docs/guides/deployment.md b/docs/guides/deployment.md index 35fe7e49ab2ea9f80ff502506cb22f37ce17608a..d0c9911ca58b50f084aed0060965d57a731d4140 100644 --- a/docs/guides/deployment.md +++ b/docs/guides/deployment.md @@ -1,11 +1,11 @@ # Deployment Guide -## Launching DeepCritical: Gradio, MCP, & Modal +## Launching DeepBoner: Gradio, MCP, & Modal --- ## Overview -DeepCritical is designed for a multi-platform deployment strategy to maximize hackathon impact: +DeepBoner is designed for a multi-platform deployment strategy to maximize hackathon impact: 1. **HuggingFace Spaces**: Host the Gradio UI (User Interface). 2. **MCP Server**: Expose research tools to Claude Desktop/Agents. @@ -69,10 +69,10 @@ def predict(message, history): ```json { "mcpServers": { - "deepcritical": { + "deepboner": { "command": "uv", "args": ["run", "fastmcp", "run", "src/mcp_servers/pubmed_server.py"], - "cwd": "/absolute/path/to/DeepCritical" + "cwd": "/absolute/path/to/DeepBoner" } } } @@ -111,7 +111,7 @@ Instead of calling Anthropic API, we call a Modal function: # src/llm/modal_client.py import modal -stub = modal.Stub("deepcritical-inference") +stub = modal.Stub("deepboner-inference") @stub.function(gpu="A100") def generate_text(prompt: str): diff --git a/docs/implementation/01_phase_foundation.md b/docs/implementation/01_phase_foundation.md index 2b44c4c0629e32444493f9ec60cf5ab0bfd22796..215fb121e16cb0f52e602efc4cd15ef65e739217 100644 --- a/docs/implementation/01_phase_foundation.md +++ b/docs/implementation/01_phase_foundation.md @@ -23,7 +23,7 @@ uv --version # Should be >= 0.4.0 ```bash # From project root -uv init --name deepcritical +uv init --name deepboner uv python install 3.11 # Pin Python version ``` @@ -35,9 +35,9 @@ uv python install 3.11 # Pin Python version ```toml [project] -name = "deepcritical" +name = "deepboner" version = "0.1.0" -description = "AI-Native Drug Repurposing Research Agent" +description = "AI-Native Sexual Health Research Agent" readme = "README.md" requires-python = ">=3.11" dependencies = [ @@ -401,25 +401,25 @@ settings = get_settings() ### `src/utils/exceptions.py` ```python -"""Custom exceptions for DeepCritical.""" +"""Custom exceptions for DeepBoner.""" -class DeepCriticalError(Exception): - """Base exception for all DeepCritical errors.""" +class DeepBonerError(Exception): + """Base exception for all DeepBoner errors.""" pass -class SearchError(DeepCriticalError): +class SearchError(DeepBonerError): """Raised when a search operation fails.""" pass -class JudgeError(DeepCriticalError): +class JudgeError(DeepBonerError): """Raised when the judge fails to assess evidence.""" pass -class ConfigurationError(DeepCriticalError): +class ConfigurationError(DeepBonerError): """Raised when configuration is invalid.""" pass @@ -558,7 +558,7 @@ uv run pre-commit install ## 10. Implementation Checklist - [ ] Install `uv` and verify version -- [ ] Run `uv init --name deepcritical` +- [ ] Run `uv init --name deepboner` - [ ] Create `pyproject.toml` (copy from above) - [ ] Create directory structure (run mkdir commands) - [ ] Create `.env.example` and `.env` diff --git a/docs/implementation/04_phase_ui.md b/docs/implementation/04_phase_ui.md index 90767d7515a40a9958287e9a171f6adf2bb702b6..1cc9035a16f48205ef5248e35825ce7bf72de9f5 100644 --- a/docs/implementation/04_phase_ui.md +++ b/docs/implementation/04_phase_ui.md @@ -401,7 +401,7 @@ Found {len(evidence)} sources. Consider refining your query for more specific re Using Gradio 5 generator pattern for real-time streaming. ```python -"""Gradio UI for DeepCritical agent.""" +"""Gradio UI for DeepBoner agent.""" import asyncio import gradio as gr from typing import AsyncGenerator @@ -557,11 +557,11 @@ def create_demo() -> gr.Blocks: Configured Gradio Blocks interface """ with gr.Blocks( - title="DeepCritical - Drug Repurposing Research Agent", + title="DeepBoner - Drug Repurposing Research Agent", theme=gr.themes.Soft(), ) as demo: gr.Markdown(""" - # 🧬 DeepCritical + # 🧬 DeepBoner ## AI-Powered Drug Repurposing Research Agent Ask questions about potential drug repurposing opportunities. @@ -935,7 +935,7 @@ class TestAgentEvent: ## 6. Dockerfile ```dockerfile -# Dockerfile for DeepCritical +# Dockerfile for DeepBoner FROM python:3.11-slim # Set working directory @@ -975,7 +975,7 @@ Create `README.md` header for HuggingFace Spaces: ```markdown --- -title: DeepCritical +title: DeepBoner emoji: 🧬 colorFrom: blue colorTo: purple @@ -986,7 +986,7 @@ pinned: false license: mit --- -# DeepCritical +# DeepBoner AI-Powered Drug Repurposing Research Agent ``` @@ -1088,7 +1088,7 @@ After deployment to HuggingFace Spaces: ## Project Complete! 🎉 -When Phase 4 is done, the DeepCritical MVP is complete: +When Phase 4 is done, the DeepBoner MVP is complete: - **Phase 1**: Foundation (uv, pytest, config) ✅ - **Phase 2**: Search Slice (PubMed, DuckDuckGo) ✅ diff --git a/docs/implementation/10_phase_clinicaltrials.md b/docs/implementation/10_phase_clinicaltrials.md index 382b5fb631fc10b029404133375b01ed5375bde0..c891f5e66d491eb867dcff888d4cb6094d458c1e 100644 --- a/docs/implementation/10_phase_clinicaltrials.md +++ b/docs/implementation/10_phase_clinicaltrials.md @@ -185,7 +185,7 @@ class ClinicalTrialsTool: requests.get, self.BASE_URL, params=params, - headers={"User-Agent": "DeepCritical-Research-Agent/1.0"}, + headers={"User-Agent": "DeepBoner-Research-Agent/1.0"}, timeout=30, ) response.raise_for_status() @@ -434,4 +434,4 @@ source .env && uv run python examples/search_demo/run_search.py "metformin alzhe | No phase info | Phase I/II/III evidence strength | **Demo pitch addition**: -> "DeepCritical searches PubMed for peer-reviewed evidence AND ClinicalTrials.gov for 400,000+ clinical trials." +> "DeepBoner searches PubMed for peer-reviewed evidence AND ClinicalTrials.gov for 400,000+ clinical trials." diff --git a/docs/implementation/11_phase_biorxiv.md b/docs/implementation/11_phase_biorxiv.md index 4e17d3c8c16c7e0bd9ec6b28086141337e98b40c..1ef5d55cb72084d29c6a457c478880f3c3273189 100644 --- a/docs/implementation/11_phase_biorxiv.md +++ b/docs/implementation/11_phase_biorxiv.md @@ -531,7 +531,7 @@ source .env && uv run python examples/search_demo/run_search.py "metformin diabe | Miss cutting-edge | Catch breakthroughs early | **Demo pitch (final)**: -> "DeepCritical searches PubMed for peer-reviewed evidence, ClinicalTrials.gov for 400,000+ clinical trials, and bioRxiv/medRxiv for cutting-edge preprints - then uses LLMs to generate mechanistic hypotheses and synthesize findings into publication-quality reports." +> "DeepBoner searches PubMed for peer-reviewed evidence, ClinicalTrials.gov for 400,000+ clinical trials, and bioRxiv/medRxiv for cutting-edge preprints - then uses LLMs to generate mechanistic hypotheses and synthesize findings into publication-quality reports." --- diff --git a/docs/implementation/12_phase_mcp_server.md b/docs/implementation/12_phase_mcp_server.md index 64bc5559e3e4986eb362382627ea8cd7c753a2e2..849daee97f0aff88a4a79057f8f2c00a76ce2231 100644 --- a/docs/implementation/12_phase_mcp_server.md +++ b/docs/implementation/12_phase_mcp_server.md @@ -1,6 +1,6 @@ # Phase 12 Implementation Spec: MCP Server Integration -**Goal**: Expose DeepCritical search tools as MCP servers for Track 2 compliance. +**Goal**: Expose DeepBoner search tools as MCP servers for Track 2 compliance. **Philosophy**: "MCP is the bridge between tools and LLMs." **Prerequisite**: Phase 11 complete (all search tools working) **Priority**: P0 - REQUIRED FOR HACKATHON TRACK 2 @@ -121,7 +121,7 @@ https://[space-id].hf.space/gradio_api/mcp/ ### 4.1 MCP Tool Wrappers (`src/mcp_tools.py`) ```python -"""MCP tool wrappers for DeepCritical search tools. +"""MCP tool wrappers for DeepBoner search tools. These functions expose our search tools via MCP protocol. Each function follows the MCP tool contract: @@ -130,15 +130,15 @@ Each function follows the MCP tool contract: - Formatted string returns """ -from src.tools.biorxiv import BioRxivTool from src.tools.clinicaltrials import ClinicalTrialsTool +from src.tools.europepmc import EuropePMCTool from src.tools.pubmed import PubMedTool # Singleton instances (avoid recreating on each call) _pubmed = PubMedTool() _trials = ClinicalTrialsTool() -_biorxiv = BioRxivTool() +_europepmc = EuropePMCTool() async def search_pubmed(query: str, max_results: int = 10) -> str: @@ -202,10 +202,10 @@ async def search_clinical_trials(query: str, max_results: int = 10) -> str: return "\n".join(formatted) -async def search_biorxiv(query: str, max_results: int = 10) -> str: - """Search bioRxiv/medRxiv for preprint research. +async def search_europepmc(query: str, max_results: int = 10) -> str: + """Search Europe PMC for preprint and open access research. - Searches bioRxiv and medRxiv preprint servers for cutting-edge research. + Searches Europe PMC for preprints and open access papers. Note: Preprints are NOT peer-reviewed but contain the latest findings. Args: @@ -217,10 +217,10 @@ async def search_biorxiv(query: str, max_results: int = 10) -> str: """ max_results = max(1, min(50, max_results)) - results = await _biorxiv.search(query, max_results) + results = await _europepmc.search(query, max_results) if not results: - return f"No bioRxiv/medRxiv preprints found for: {query}" + return f"No Europe PMC results found for: {query}" formatted = [f"## Preprint Results for: {query}\n"] for i, evidence in enumerate(results, 1): @@ -236,7 +236,7 @@ async def search_biorxiv(query: str, max_results: int = 10) -> str: async def search_all_sources(query: str, max_per_source: int = 5) -> str: """Search all biomedical sources simultaneously. - Performs parallel search across PubMed, ClinicalTrials.gov, and bioRxiv. + Performs parallel search across PubMed, ClinicalTrials.gov, and Europe PMC. This is the most comprehensive search option for drug repurposing research. Args: @@ -253,10 +253,10 @@ async def search_all_sources(query: str, max_per_source: int = 5) -> str: # Run all searches in parallel pubmed_task = search_pubmed(query, max_per_source) trials_task = search_clinical_trials(query, max_per_source) - biorxiv_task = search_biorxiv(query, max_per_source) + europepmc_task = search_europepmc(query, max_per_source) - pubmed_results, trials_results, biorxiv_results = await asyncio.gather( - pubmed_task, trials_task, biorxiv_task, return_exceptions=True + pubmed_results, trials_results, europepmc_results = await asyncio.gather( + pubmed_task, trials_task, europepmc_task, return_exceptions=True ) formatted = [f"# Comprehensive Search: {query}\n"] @@ -272,10 +272,10 @@ async def search_all_sources(query: str, max_per_source: int = 5) -> str: else: formatted.append(f"## Clinical Trials\n*Error: {trials_results}*\n") - if isinstance(biorxiv_results, str): - formatted.append(biorxiv_results) + if isinstance(europepmc_results, str): + formatted.append(europepmc_results) else: - formatted.append(f"## Preprints\n*Error: {biorxiv_results}*\n") + formatted.append(f"## Preprints\n*Error: {europepmc_results}*\n") return "\n---\n".join(formatted) ``` @@ -283,7 +283,7 @@ async def search_all_sources(query: str, max_per_source: int = 5) -> str: ### 4.2 Update Gradio App (`src/app.py`) ```python -"""Gradio UI for DeepCritical agent with MCP server support.""" +"""Gradio UI for DeepBoner agent with MCP server support.""" import os from collections.abc import AsyncGenerator @@ -294,12 +294,12 @@ import gradio as gr from src.agent_factory.judges import JudgeHandler, MockJudgeHandler from src.mcp_tools import ( search_all_sources, - search_biorxiv, + search_europepmc, search_clinical_trials, search_pubmed, ) from src.orchestrator_factory import create_orchestrator -from src.tools.biorxiv import BioRxivTool +from src.tools.europepmc import EuropePMCTool from src.tools.clinicaltrials import ClinicalTrialsTool from src.tools.pubmed import PubMedTool from src.tools.search_handler import SearchHandler @@ -317,15 +317,15 @@ def create_demo() -> Any: Configured Gradio Blocks interface with MCP server enabled """ with gr.Blocks( - title="DeepCritical - Drug Repurposing Research Agent", + title="DeepBoner - Drug Repurposing Research Agent", theme=gr.themes.Soft(), ) as demo: gr.Markdown(""" - # DeepCritical + # DeepBoner ## AI-Powered Drug Repurposing Research Agent Ask questions about potential drug repurposing opportunities. - The agent searches PubMed, ClinicalTrials.gov, and bioRxiv/medRxiv preprints. + The agent searches PubMed, ClinicalTrials.gov, and Europe PMC preprints. **Example questions:** - "What drugs could be repurposed for Alzheimer's disease?" @@ -381,13 +381,13 @@ def create_demo() -> Any: with gr.Tab("Preprints"): gr.Interface( - fn=search_biorxiv, + fn=search_europepmc, inputs=[ gr.Textbox(label="Query", placeholder="long covid treatment"), gr.Slider(1, 50, value=10, step=1, label="Max Results"), ], outputs=gr.Markdown(label="Results"), - api_name="search_biorxiv", + api_name="search_europepmc", ) with gr.Tab("Search All"): @@ -406,7 +406,7 @@ def create_demo() -> Any: **Note**: This is a research tool and should not be used for medical decisions. Always consult healthcare professionals for medical advice. - Built with PydanticAI + PubMed, ClinicalTrials.gov & bioRxiv + Built with PydanticAI + PubMed, ClinicalTrials.gov & Europe PMC **MCP Server**: Available at `/gradio_api/mcp/` for Claude Desktop integration """) @@ -444,7 +444,7 @@ import pytest from src.mcp_tools import ( search_all_sources, - search_biorxiv, + search_europepmc, search_clinical_trials, search_pubmed, ) @@ -525,18 +525,18 @@ class TestSearchClinicalTrials: assert "Clinical Trials" in result -class TestSearchBiorxiv: - """Tests for search_biorxiv MCP tool.""" +class TestSearchEuropePMC: + """Tests for search_europepmc MCP tool.""" @pytest.mark.asyncio async def test_returns_formatted_string(self, mock_evidence: Evidence) -> None: """Should return formatted markdown string.""" - mock_evidence.citation.source = "biorxiv" # type: ignore + mock_evidence.citation.source = "europepmc" # type: ignore - with patch("src.mcp_tools._biorxiv") as mock_tool: + with patch("src.mcp_tools._europepmc") as mock_tool: mock_tool.search = AsyncMock(return_value=[mock_evidence]) - result = await search_biorxiv("preprint search", 10) + result = await search_europepmc("preprint search", 10) assert isinstance(result, str) assert "Preprint Results" in result @@ -550,11 +550,11 @@ class TestSearchAllSources: """Should combine results from all sources.""" with patch("src.mcp_tools.search_pubmed", new_callable=AsyncMock) as mock_pubmed, \ patch("src.mcp_tools.search_clinical_trials", new_callable=AsyncMock) as mock_trials, \ - patch("src.mcp_tools.search_biorxiv", new_callable=AsyncMock) as mock_biorxiv: + patch("src.mcp_tools.search_europepmc", new_callable=AsyncMock) as mock_europepmc: mock_pubmed.return_value = "## PubMed Results" mock_trials.return_value = "## Clinical Trials" - mock_biorxiv.return_value = "## Preprints" + mock_europepmc.return_value = "## Preprints" result = await search_all_sources("metformin", 5) @@ -568,11 +568,11 @@ class TestSearchAllSources: """Should handle partial failures gracefully.""" with patch("src.mcp_tools.search_pubmed", new_callable=AsyncMock) as mock_pubmed, \ patch("src.mcp_tools.search_clinical_trials", new_callable=AsyncMock) as mock_trials, \ - patch("src.mcp_tools.search_biorxiv", new_callable=AsyncMock) as mock_biorxiv: + patch("src.mcp_tools.search_europepmc", new_callable=AsyncMock) as mock_europepmc: mock_pubmed.return_value = "## PubMed Results" mock_trials.side_effect = Exception("API Error") - mock_biorxiv.return_value = "## Preprints" + mock_europepmc.return_value = "## Preprints" result = await search_all_sources("metformin", 5) @@ -599,10 +599,10 @@ class TestMCPDocstrings: assert search_clinical_trials.__doc__ is not None assert "Args:" in search_clinical_trials.__doc__ - def test_search_biorxiv_has_args_section(self) -> None: + def test_search_europepmc_has_args_section(self) -> None: """Docstring must have Args section for MCP schema generation.""" - assert search_biorxiv.__doc__ is not None - assert "Args:" in search_biorxiv.__doc__ + assert search_europepmc.__doc__ is not None + assert "Args:" in search_europepmc.__doc__ def test_search_all_sources_has_args_section(self) -> None: """Docstring must have Args section for MCP schema generation.""" @@ -672,7 +672,7 @@ class TestMCPServerIntegration: // %APPDATA%\Claude\claude_desktop_config.json (Windows) { "mcpServers": { - "deepcritical": { + "deepboner": { "url": "http://localhost:7860/gradio_api/mcp/" } } @@ -684,8 +684,8 @@ class TestMCPServerIntegration: ```json { "mcpServers": { - "deepcritical": { - "url": "https://MCP-1st-Birthday-deepcritical.hf.space/gradio_api/mcp/" + "deepboner": { + "url": "https://your-space.hf.space/gradio_api/mcp/" } } } @@ -696,7 +696,7 @@ class TestMCPServerIntegration: ```json { "mcpServers": { - "deepcritical": { + "deepboner": { "url": "https://your-space.hf.space/gradio_api/mcp/", "headers": { "Authorization": "Bearer hf_xxxxxxxxxxxxx" @@ -761,7 +761,7 @@ Phase 12 is **COMPLETE** when: ``` 2. **Show Claude Desktop using our tools**: - - Open Claude Desktop with DeepCritical MCP configured + - Open Claude Desktop with DeepBoner MCP configured - Ask: "Search PubMed for metformin Alzheimer's" - Show real results appearing - Ask: "Now search clinical trials for the same" @@ -817,14 +817,14 @@ Phase 12 is **COMPLETE** when: │ Gradio MCP Server │ │ /gradio_api/mcp/ │ │ ┌──────────────┐ ┌──────────────┐ ┌──────────────┐ ┌─────────┐ │ -│ │search_pubmed │ │search_trials │ │search_biorxiv│ │search_ │ │ +│ │search_pubmed │ │search_trials │ │search_epmc │ │search_ │ │ │ │ │ │ │ │ │ │all │ │ │ └──────┬───────┘ └──────┬───────┘ └──────┬───────┘ └────┬────┘ │ └─────────┼────────────────┼────────────────┼──────────────┼──────┘ │ │ │ │ ▼ ▼ ▼ ▼ ┌──────────┐ ┌──────────┐ ┌──────────┐ (calls all) - │PubMedTool│ │Trials │ │BioRxiv │ + │PubMedTool│ │Trials │ │EuropePMC │ │ │ │Tool │ │Tool │ └──────────┘ └──────────┘ └──────────┘ ``` diff --git a/docs/implementation/13_phase_modal_integration.md b/docs/implementation/13_phase_modal_integration.md index edb0f7c628f3b7cf1687ab0ad24c188e6ecbfff8..95224dba82b877831bb5ef94b821f767a08aa246 100644 --- a/docs/implementation/13_phase_modal_integration.md +++ b/docs/implementation/13_phase_modal_integration.md @@ -872,7 +872,7 @@ async def main() -> None: sys.exit(1) print(f"\n{'=' * 60}") - print("DeepCritical Modal Analysis Demo") + print("DeepBoner Modal Analysis Demo") print(f"Query: {args.query}") print(f"{'=' * 60}\n") diff --git a/docs/implementation/14_phase_demo_submission.md b/docs/implementation/14_phase_demo_submission.md index 3dee9bc235fbe58e5aea4e0b48135bf4b08d4da5..78cc55336b60d1db0a1756cd6fffbb5e4ddeb169 100644 --- a/docs/implementation/14_phase_demo_submission.md +++ b/docs/implementation/14_phase_demo_submission.md @@ -71,7 +71,7 @@ tags: [Show Gradio UI] -"DeepCritical is an AI-powered drug repurposing research agent. +"DeepBoner is an AI-powered drug repurposing research agent. It searches peer-reviewed literature, clinical trials, and cutting-edge preprints to find new uses for existing drugs." @@ -83,7 +83,7 @@ to find new uses for existing drugs." [Type query: "Can metformin treat Alzheimer's disease?"] -"When I ask about metformin for Alzheimer's, DeepCritical: +"When I ask about metformin for Alzheimer's, DeepBoner: 1. Searches PubMed for peer-reviewed papers 2. Queries ClinicalTrials.gov for active trials 3. Scans bioRxiv for the latest preprints" @@ -101,10 +101,10 @@ synthesize findings into a structured research report." [Switch to Claude Desktop] -"What makes DeepCritical unique is full MCP integration. +"What makes DeepBoner unique is full MCP integration. These same tools are available to any MCP client." -[Show Claude Desktop with DeepCritical tools] +[Show Claude Desktop with DeepBoner tools] "I can ask Claude: 'Search PubMed for aspirin cancer prevention'" @@ -140,7 +140,7 @@ returning verdicts like SUPPORTED, REFUTED, or INCONCLUSIVE." [Return to Gradio UI] -"DeepCritical brings together: +"DeepBoner brings together: - Three biomedical data sources - MCP protocol for universal tool access - Modal sandboxes for safe code execution @@ -164,7 +164,7 @@ and let us know what you think." ```markdown --- -title: DeepCritical +title: DeepBoner emoji: 🧬 colorFrom: blue colorTo: purple @@ -183,7 +183,7 @@ tags: - modal --- -# DeepCritical +# DeepBoner AI-Powered Drug Repurposing Research Agent @@ -198,7 +198,7 @@ AI-Powered Drug Repurposing Research Agent Connect to our MCP server at: ``` -https://MCP-1st-Birthday-deepcritical.hf.space/gradio_api/mcp/ +https://your-space.hf.space/gradio_api/mcp/ ``` Available tools: @@ -214,7 +214,7 @@ Available tools: ## Links -- [GitHub Repository](https://github.com/The-Obstacle-Is-The-Way/DeepCritical-1) +- [GitHub Repository](https://github.com/The-Obstacle-Is-The-Way/DeepBoner-1) - [Demo Video](link-to-video) ``` @@ -237,7 +237,7 @@ MODAL_TOKEN_SECRET=... ### Twitter/X Template ``` -🧬 Excited to submit DeepCritical to MCP's 1st Birthday Hackathon! +🧬 Excited to submit DeepBoner to MCP's 1st Birthday Hackathon! An AI agent that: ✅ Searches PubMed, ClinicalTrials.gov & bioRxiv @@ -254,10 +254,10 @@ Demo: [Video link] ### LinkedIn Template ``` -Thrilled to share DeepCritical - our submission to MCP's 1st Birthday Hackathon! +Thrilled to share DeepBoner - our submission to MCP's 1st Birthday Hackathon! 🔬 What it does: -DeepCritical is an AI-powered drug repurposing research agent that searches +DeepBoner is an AI-powered drug repurposing research agent that searches peer-reviewed literature, clinical trials, and preprints to find new uses for existing drugs. diff --git a/docs/implementation/roadmap.md b/docs/implementation/roadmap.md index 1f4862e9ee898881d04dbecd8c27b8bc4848fd61..75d4fd2e3ebadcc143fc04c5471ca404917d3fd9 100644 --- a/docs/implementation/roadmap.md +++ b/docs/implementation/roadmap.md @@ -1,8 +1,8 @@ -# Implementation Roadmap: DeepCritical (Vertical Slices) +# Implementation Roadmap: DeepBoner (Vertical Slices) **Philosophy:** AI-Native Engineering, Vertical Slice Architecture, TDD, Modern Tooling (2025). -This roadmap defines the execution strategy to deliver **DeepCritical** effectively. We reject "overplanning" in favor of **ironclad, testable vertical slices**. Each phase delivers a fully functional slice of end-to-end value. +This roadmap defines the execution strategy to deliver **DeepBoner** effectively. We reject "overplanning" in favor of **ironclad, testable vertical slices**. Each phase delivers a fully functional slice of end-to-end value. --- @@ -114,7 +114,7 @@ tests/ - [ ] Implement `src/orchestrator.py` (Connects Search + Judge loops). - [ ] Build `src/app.py` (Gradio with Streaming). -- **Deliverable**: Working DeepCritical Agent on HuggingFace. +- **Deliverable**: Working DeepBoner Agent on HuggingFace. --- diff --git a/docs/index.md b/docs/index.md index 400ddfa44d974f61407c1754bfe57e5d6dfedace..09328b45971df1d559f43347133caa7b4eb0e568 100644 --- a/docs/index.md +++ b/docs/index.md @@ -1,4 +1,4 @@ -# DeepCritical Documentation +# DeepBoner Documentation ## Medical Drug Repurposing Research Agent diff --git a/docs/to_do/DEEP_RESEARCH_ROADMAP.md b/docs/to_do/DEEP_RESEARCH_ROADMAP.md new file mode 100644 index 0000000000000000000000000000000000000000..2214228f2c9f8ee3546b0f3bfe887b6bb172e547 --- /dev/null +++ b/docs/to_do/DEEP_RESEARCH_ROADMAP.md @@ -0,0 +1,337 @@ +# Deep Research Roadmap + +> How to properly add GPT-Researcher-style deep research to DeepBoner +> using the EXISTING Magentic + Pydantic AI architecture. + +## Current State + +We already have: + +| Feature | Location | Status | +|---------|----------|--------| +| Multi-agent orchestration | `orchestrator_magentic.py` | Working | +| SearchAgent, JudgeAgent, HypothesisAgent, ReportAgent | `agents/magentic_agents.py` | Working | +| HuggingFace free tier | `agent_factory/judges.py` (HFInferenceJudgeHandler) | Working | +| Budget constraints | MagenticOrchestrator (max_round_count, max_stall_count) | Built-in | +| Simple mode (linear) | `orchestrator.py` | Working | + +## What Deep Research Adds + +GPT-Researcher style "deep research" means: + +1. **Query Analysis** - Detect if query needs simple lookup vs comprehensive report +2. **Section Planning** - Break complex query into 3-7 parallel research sections +3. **Parallel Research** - Run multiple research loops simultaneously +4. **Long-form Writing** - Synthesize sections into cohesive report +5. **RAG** - Semantic search over accumulated evidence + +## Implementation Plan (TDD, Vertical Slices) + +### Phase 1: Input Parser (Est. 50-100 lines) + +**Goal**: Detect research mode from query. + +```python +# src/agents/input_parser.py + +class ParsedQuery(BaseModel): + original_query: str + improved_query: str + research_mode: Literal["iterative", "deep"] + key_entities: list[str] + +async def parse_query(query: str) -> ParsedQuery: + """ + Detect if query needs deep research. + + Deep indicators: + - "comprehensive", "report", "overview", "analysis" + - Multiple topics/drugs mentioned + - Requests for sections/structure + + Iterative indicators: + - Single focused question + - "what is", "how does", "find" + """ +``` + +**Test first**: +```python +def test_parse_query_detects_deep_mode(): + result = await parse_query("Write a comprehensive report on Alzheimer's treatments") + assert result.research_mode == "deep" + +def test_parse_query_detects_iterative_mode(): + result = await parse_query("What is the mechanism of metformin?") + assert result.research_mode == "iterative" +``` + +**Wire in**: +```python +# In app.py or orchestrator_factory.py +parsed = await parse_query(user_query) +if parsed.research_mode == "deep": + orchestrator = create_deep_orchestrator() +else: + orchestrator = create_orchestrator() # existing +``` + +--- + +### Phase 2: Section Planner (Est. 80-120 lines) + +**Goal**: Create report outline for deep research. + +```python +# src/agents/planner.py + +class ReportSection(BaseModel): + title: str + query: str # Search query for this section + description: str + +class ReportPlan(BaseModel): + title: str + sections: list[ReportSection] + +# Use existing ChatAgent pattern from magentic_agents.py +def create_planner_agent(chat_client: OpenAIChatClient | None = None) -> ChatAgent: + return ChatAgent( + name="PlannerAgent", + description="Creates structured report outlines", + instructions="""Given a research query, create a report plan with 3-7 sections. + Each section should have: + - A clear title + - A focused search query + - Brief description of what to cover + + Example for "Alzheimer's drug repurposing": + 1. Current Treatment Landscape + 2. Mechanism-Based Candidates (targeting amyloid, tau, inflammation) + 3. Clinical Trial Evidence + 4. Safety Considerations + 5. Emerging Research Directions + """, + chat_client=client, + ) +``` + +**Test first**: +```python +def test_planner_creates_sections(): + plan = await planner.create_plan("Comprehensive Alzheimer's drug repurposing report") + assert len(plan.sections) >= 3 + assert all(s.query for s in plan.sections) +``` + +**Wire in**: Used by Phase 3. + +--- + +### Phase 3: Parallel Research Flow (Est. 100-150 lines) + +**Goal**: Run multiple MagenticOrchestrator instances in parallel. + +```python +# src/orchestrator_deep.py + +class DeepResearchOrchestrator: + """ + Runs parallel research loops using EXISTING MagenticOrchestrator. + + NOT a new orchestration system - just a wrapper that: + 1. Plans sections + 2. Runs existing orchestrator per section (in parallel) + 3. Aggregates results + """ + + def __init__(self, max_parallel: int = 5): + self.planner = create_planner_agent() + self.max_parallel = max_parallel + + async def run(self, query: str) -> AsyncGenerator[AgentEvent, None]: + # 1. Create plan + plan = await self.planner.create_plan(query) + yield AgentEvent(type="planning", message=f"Created {len(plan.sections)} section plan") + + # 2. Run parallel research (reuse existing orchestrator!) + from src.orchestrator_magentic import MagenticOrchestrator + + async def research_section(section: ReportSection) -> str: + orchestrator = MagenticOrchestrator(max_rounds=5) # Fewer rounds per section + result = "" + async for event in orchestrator.run(section.query): + if event.type == "complete": + result = event.message + return result + + # Run in parallel with semaphore + semaphore = asyncio.Semaphore(self.max_parallel) + async def bounded_research(section): + async with semaphore: + return await research_section(section) + + results = await asyncio.gather(*[ + bounded_research(s) for s in plan.sections + ]) + + # 3. Aggregate + yield AgentEvent( + type="complete", + message=self._aggregate_sections(plan, results) + ) +``` + +**Key insight**: We're NOT replacing MagenticOrchestrator. We're running multiple instances of it. + +**Test first**: +```python +@pytest.mark.integration +async def test_deep_orchestrator_runs_parallel(): + orchestrator = DeepResearchOrchestrator(max_parallel=2) + events = [e async for e in orchestrator.run("Comprehensive Alzheimer's report")] + assert any(e.type == "planning" for e in events) + assert any(e.type == "complete" for e in events) +``` + +--- + +### Phase 4: RAG Integration (Est. 100-150 lines) + +**Goal**: Semantic search over accumulated evidence. + +> **Note**: We already have `src/services/embeddings.py` (EmbeddingService) which provides +> ChromaDB + sentence-transformers with `add_evidence()` and `search_similar()` methods. +> The code below is illustrative - in practice, extend EmbeddingService or use it directly. +> See also: `src/services/llamaindex_rag.py` for OpenAI-based RAG (different use case). + +```python +# src/services/rag.py (illustrative - use EmbeddingService instead) + +class RAGService: + """ + Simple RAG using ChromaDB + sentence-transformers. + No LlamaIndex dependency - keep it lightweight. + """ + + def __init__(self): + import chromadb + from sentence_transformers import SentenceTransformer + + self.client = chromadb.Client() + self.collection = self.client.get_or_create_collection("evidence") + self.encoder = SentenceTransformer("all-MiniLM-L6-v2") + + def add_evidence(self, evidence: list[Evidence]) -> int: + """Add evidence to vector store, return count added.""" + # Dedupe by URL + existing = set(self.collection.get()["ids"]) + new_evidence = [e for e in evidence if e.citation.url not in existing] + + if not new_evidence: + return 0 + + self.collection.add( + ids=[e.citation.url for e in new_evidence], + documents=[e.content for e in new_evidence], + metadatas=[{"title": e.citation.title, "source": e.citation.source} for e in new_evidence], + ) + return len(new_evidence) + + def search(self, query: str, n_results: int = 5) -> list[Evidence]: + """Semantic search for relevant evidence.""" + results = self.collection.query(query_texts=[query], n_results=n_results) + # Convert back to Evidence objects + ... +``` + +**Wire in as tool**: +```python +# Add to SearchAgent's tools +def rag_search(query: str, n_results: int = 5) -> str: + """Search previously collected evidence for relevant information.""" + service = get_rag_service() + results = service.search(query, n_results) + return format_evidence(results) + +# In magentic_agents.py +ChatAgent( + tools=[search_pubmed, search_clinical_trials, search_preprints, rag_search], # ADD RAG +) +``` + +--- + +### Phase 5: Long Writer (Est. 80-100 lines) + +**Goal**: Write longer reports section-by-section. + +```python +# Extend existing ReportAgent or create LongWriterAgent + +def create_long_writer_agent() -> ChatAgent: + return ChatAgent( + name="LongWriterAgent", + description="Writes detailed report sections with proper citations", + instructions="""Write a detailed section for a research report. + + You will receive: + - Section title + - Relevant evidence/findings + - What previous sections covered (to avoid repetition) + + Output: + - 500-1000 words per section + - Proper citations [1], [2], etc. + - Smooth transitions + - No repetition of earlier content + """, + tools=[get_bibliography, rag_search], + ) +``` + +--- + +## What NOT To Build + +These are REDUNDANT with existing Magentic system: + +| Component | Why Skip | +|-----------|----------| +| GraphOrchestrator | MagenticBuilder already handles agent coordination | +| BudgetTracker | MagenticBuilder has max_round_count, max_stall_count | +| WorkflowManager | asyncio.gather() + Semaphore is simpler | +| StateMachine | contextvars already used in agents/state.py | +| New agent primitives | ChatAgent pattern already works | + +## Implementation Order + +``` +Week 1: Phase 1 (InputParser) - Ship it working +Week 2: Phase 2 (Planner) - Ship it working +Week 3: Phase 3 (Parallel Flow) - Ship it working +Week 4: Phase 4 (RAG) - Ship it working +Week 5: Phase 5 (LongWriter) - Ship it working +``` + +Each phase: +1. Write tests first +2. Implement minimal code +3. Wire into app.py +4. Manual test +5. PR with <200 lines +6. Ship + +## References + +- GPT-Researcher: https://github.com/assafelovic/gpt-researcher +- LangGraph patterns: https://python.langchain.com/docs/langgraph +- Your existing Magentic setup: `src/orchestrator_magentic.py` + +## Why This Approach + +1. **Builds on existing working code** - Don't replace, extend +2. **Each phase ships value** - User sees improvement after each PR +3. **Tests prove it works** - Not "trust me it imports" +4. **Minimal new abstractions** - Reuse ChatAgent, MagenticOrchestrator +5. **~500 total lines** vs 7,000 lines of parallel infrastructure diff --git a/docs/to_do/REFERENCE_GRADDIO_DEMO_ANALYSIS.md b/docs/to_do/REFERENCE_GRADDIO_DEMO_ANALYSIS.md new file mode 100644 index 0000000000000000000000000000000000000000..924c357ece4114925a942336e761d38521cbfc14 --- /dev/null +++ b/docs/to_do/REFERENCE_GRADDIO_DEMO_ANALYSIS.md @@ -0,0 +1,229 @@ +# Reference: GradioDemo Analysis + +> Analysis of code from https://github.com/DeepBoner/GradioDemo +> Purpose: Extract good ideas, understand patterns, avoid mistakes + +## Overview + +| Metric | Value | +|--------|-------| +| Total lines added | ~7,000 | +| New Python files | +20 | +| Test pass rate | 80% (62 errors due to missing mocks) | +| Integration status | **NOT WIRED IN** | + +## Component Catalog + +### REDUNDANT (Already have equivalent) + +| Component | Lines | What We Have Instead | +|-----------|-------|---------------------| +| `orchestrator/graph_orchestrator.py` | 974 | MagenticBuilder | +| `middleware/budget_tracker.py` | 391 | MagenticBuilder max_round_count | +| `middleware/state_machine.py` | 130 | agents/state.py with contextvars | +| `middleware/workflow_manager.py` | 300 | asyncio.gather() | +| `orchestrator/research_flow.py` (IterativeResearchFlow) | 500 | MagenticOrchestrator | +| HuggingFace integration | various | HFInferenceJudgeHandler | + +### POTENTIALLY USEFUL (Ideas to cherry-pick) + +#### 1. InputParser (`agents/input_parser.py` - 179 lines) + +**Idea**: Detect research mode from query text. + +```python +# Key logic (simplified) +research_mode: Literal["iterative", "deep"] = "iterative" +if any(keyword in query.lower() for keyword in [ + "comprehensive", "report", "sections", "analyze", "analysis", "overview", "market" +]): + research_mode = "deep" +``` + +**Good pattern**: Heuristic fallback when LLM fails. +**Our implementation**: See Phase 1 in DEEP_RESEARCH_ROADMAP.md + +#### 2. PlannerAgent (`orchestrator/planner_agent.py` - 184 lines) + +**Idea**: LLM creates section outline for report. + +```python +class ReportPlan(BaseModel): + title: str + sections: list[ReportSection] + estimated_time_minutes: int + +class ReportSection(BaseModel): + title: str + query: str + description: str + priority: int +``` + +**Good pattern**: Structured output with Pydantic models. +**Our implementation**: See Phase 2 in DEEP_RESEARCH_ROADMAP.md + +#### 3. DeepResearchFlow (`orchestrator/research_flow.py` - 500 lines) + +**Idea**: Run parallel research loops per section. + +```python +# Their pattern (simplified) +async def run_parallel_loops(sections: list[ReportSection]): + tasks = [run_single_loop(s) for s in sections] + results = await asyncio.gather(*tasks, return_exceptions=True) +``` + +**Problem**: They built new IterativeResearchFlow instead of reusing MagenticOrchestrator. +**Our implementation**: Just run multiple MagenticOrchestrator instances. + +#### 4. LlamaIndex RAG (`services/llamaindex_rag.py` - 454 lines) + +**Idea**: Semantic search over collected evidence. + +```python +# Their approach +class LlamaIndexRAGService: + def __init__(self): + # ChromaDB + LlamaIndex + HuggingFace embeddings + self.vector_store = ChromaVectorStore(...) + self.index = VectorStoreIndex(...) + + def retrieve(self, query: str, top_k: int = 5) -> list[dict]: + retriever = VectorIndexRetriever(index=self.index, similarity_top_k=top_k) + return retriever.retrieve(query) +``` + +**Good**: Full-featured RAG with multiple embedding providers. +**Simpler alternative**: Direct ChromaDB + sentence-transformers (no LlamaIndex). +**Our implementation**: See Phase 4 in DEEP_RESEARCH_ROADMAP.md + +#### 5. LongWriterAgent (`agents/long_writer.py` - ~300 lines) + +**Idea**: Write reports section-by-section to handle length. + +```python +class SectionOutput(BaseModel): + section_content: str + references: list[str] + next_section_context: str # What to avoid repeating + +async def write_next_section( + section_title: str, + findings: str, + previous_sections: str, # Avoid repetition +) -> SectionOutput: +``` + +**Good pattern**: Passing context to avoid repetition. +**Our implementation**: See Phase 5 in DEEP_RESEARCH_ROADMAP.md + +#### 6. ProofreaderAgent (`agents/proofreader.py` - ~200 lines) + +**Idea**: Final cleanup pass on report. + +```python +# Tasks: +# 1. Remove duplicate information +# 2. Fix citation numbering +# 3. Add executive summary +# 4. Ensure consistent formatting +``` + +**Good pattern**: Separate concerns - writer writes, proofreader polishes. +**Our implementation**: Optional Phase 6 if needed. + +### Graph Architecture (Educational Reference) + +The graph system is well-designed in theory: + +```python +# Node types +class AgentNode(GraphNode): + agent: Any # Pydantic AI agent + input_transformer: Callable # Transform input + output_transformer: Callable # Transform output + +class DecisionNode(GraphNode): + decision_function: Callable[[Any], str] # Returns next node ID + options: list[str] + +class ParallelNode(GraphNode): + parallel_nodes: list[str] # Run these in parallel + aggregator: Callable # Combine results + +# Graph structure +class ResearchGraph: + nodes: dict[str, GraphNode] + edges: dict[str, list[GraphEdge]] + entry_node: str + exit_nodes: list[str] +``` + +**Why we don't need it**: MagenticBuilder already provides: +- Agent coordination via manager +- Conditional routing (manager decides) +- Multiple participants + +This is essentially reimplementing what `agent-framework` already does. + +## Key Lessons + +### What Went Wrong + +1. **Parallel architecture** - Built new system instead of extending existing +2. **Horizontal sprawl** - All infrastructure, nothing wired in +3. **Test mocking** - Tests don't mock API clients properly +4. **No manual testing** - Code never ran end-to-end + +### What To Learn From + +1. **Pydantic models for structured output** - Good pattern +2. **Heuristic fallbacks** - When LLM fails, have a fallback +3. **Section-by-section writing** - For long reports +4. **RAG for evidence retrieval** - Useful for large evidence sets + +### The 7,000 Line vs 500 Line Comparison + +**Their approach**: +- New GraphOrchestrator (974 lines) +- New ResearchFlow (999 lines) +- New BudgetTracker (391 lines) +- New StateMachine (130 lines) +- New WorkflowManager (300 lines) +- New agents (InputParser, Writer, LongWriter, Proofreader, etc.) +- Total: ~7,000 lines, not integrated + +**Our approach**: +- InputParser (50-100 lines) - extends existing +- PlannerAgent (80-120 lines) - uses ChatAgent pattern +- DeepOrchestrator (100-150 lines) - wraps MagenticOrchestrator +- RAGService (100-150 lines) - simple ChromaDB +- LongWriter (80-100 lines) - extends ReportAgent +- Total: ~500 lines, each phase ships working + +## File Locations (for reference) + +``` +reference_repos/GradioDemo/src/ +├── orchestrator/ +│ ├── graph_orchestrator.py # 974 lines - graph execution +│ ├── research_flow.py # 999 lines - iterative/deep flows +│ └── planner_agent.py # 184 lines - section planning +├── agents/ +│ ├── input_parser.py # 179 lines - query analysis +│ ├── writer.py # 210 lines - report writing +│ ├── long_writer.py # ~300 lines - section writing +│ ├── proofreader.py # ~200 lines - cleanup +│ └── knowledge_gap.py # gap detection +├── middleware/ +│ ├── budget_tracker.py # 391 lines - token/time tracking +│ ├── state_machine.py # 130 lines - workflow state +│ └── workflow_manager.py # 300 lines - parallel loop mgmt +├── services/ +│ └── llamaindex_rag.py # 454 lines - RAG service +├── tools/ +│ └── rag_tool.py # 191 lines - RAG as search tool +└── agent_factory/ + └── graph_builder.py # ~400 lines - graph construction +``` diff --git a/docs/workflow-diagrams.md b/docs/workflow-diagrams.md index 509a0985e70e07fc1829eecaa7fae34ff74462c2..e6a7083ed6135b043175b603dd8f382a1825d758 100644 --- a/docs/workflow-diagrams.md +++ b/docs/workflow-diagrams.md @@ -1,4 +1,4 @@ -# DeepCritical Workflow - Simplified Magentic Architecture +# DeepBoner Workflow - Simplified Magentic Architecture > **Architecture Pattern**: Microsoft Magentic Orchestration > **Design Philosophy**: Simple, dynamic, manager-driven coordination @@ -475,7 +475,7 @@ stateDiagram-v2 ```mermaid graph TD - App[Gradio App
DeepCritical Research Agent] + App[Gradio App
DeepBoner Research Agent] App --> Input[Input Section] App --> Status[Status Section] @@ -514,7 +514,7 @@ graph TD ```mermaid graph LR - User[👤 Researcher
Asks research questions] -->|Submits query| DC[DeepCritical
Magentic Workflow] + User[👤 Researcher
Asks research questions] -->|Submits query| DC[DeepBoner
Magentic Workflow] DC -->|Literature search| PubMed[PubMed API
Medical papers] DC -->|Preprint search| ArXiv[arXiv API
Scientific preprints] @@ -549,7 +549,7 @@ graph LR ```mermaid gantt - title DeepCritical Magentic Workflow - Typical Execution + title DeepBoner Magentic Workflow - Typical Execution dateFormat mm:ss axisFormat %M:%S diff --git a/examples/README.md b/examples/README.md index 856e74fbc3f15a6080bb12b0c6501309392c484b..c6fd280ec993fc0729e391e16207ab4cf2e9cbf1 100644 --- a/examples/README.md +++ b/examples/README.md @@ -1,4 +1,4 @@ -# DeepCritical Examples +# DeepBoner Examples **NO MOCKS. NO FAKE DATA. REAL SCIENCE.** @@ -181,4 +181,4 @@ Mocks belong in `tests/unit/`, not in demos. When you run these examples, you se - Real scientific hypotheses - Real research reports -This is what DeepCritical actually does. No fake data. No canned responses. +This is what DeepBoner actually does. No fake data. No canned responses. diff --git a/examples/embeddings_demo/run_embeddings.py b/examples/embeddings_demo/run_embeddings.py index 26ba4d374326a8dcdf272ac552527b5d77171529..ea218cca93015df83a57993894336a735ac879b1 100644 --- a/examples/embeddings_demo/run_embeddings.py +++ b/examples/embeddings_demo/run_embeddings.py @@ -35,7 +35,7 @@ def create_fresh_service(name_suffix: str = "") -> EmbeddingService: async def demo_real_pipeline() -> None: """Run the demo using REAL PubMed data.""" print("\n" + "=" * 60) - print("DeepCritical Embeddings Demo (REAL DATA)") + print("DeepBoner Embeddings Demo (REAL DATA)") print("=" * 60) # 1. Fetch Real Data diff --git a/examples/full_stack_demo/run_full.py b/examples/full_stack_demo/run_full.py index 2464084cd802c55285cebc4f54cf7c4832f5ba4e..55d65b321e2504cc745fb5efa2fe7979632101cb 100644 --- a/examples/full_stack_demo/run_full.py +++ b/examples/full_stack_demo/run_full.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 """ -Demo: Full Stack DeepCritical Agent (Phases 1-8). +Demo: Full Stack DeepBoner Agent (Phases 1-8). This script demonstrates the COMPLETE REAL drug repurposing research pipeline: - Phase 2: REAL Search (PubMed + ClinicalTrials + Europe PMC) @@ -104,7 +104,7 @@ async def _handle_judge_step( async def run_full_demo(query: str, max_iterations: int) -> None: """Run the REAL full stack pipeline.""" - print_header("DeepCritical Full Stack Demo (REAL)") + print_header("DeepBoner Full Stack Demo (REAL)") print(f"Query: {query}") print(f"Max iterations: {max_iterations}") print("Mode: REAL (All live API calls - no mocks)\n") @@ -172,7 +172,7 @@ async def run_full_demo(query: str, max_iterations: int) -> None: async def main() -> None: """Entry point.""" parser = argparse.ArgumentParser( - description="DeepCritical Full Stack Demo - REAL, No Mocks", + description="DeepBoner Full Stack Demo - REAL, No Mocks", formatter_class=argparse.RawDescriptionHelpFormatter, epilog=""" This demo runs the COMPLETE pipeline with REAL API calls: @@ -222,7 +222,7 @@ Examples: await run_full_demo(args.query, args.iterations) print("\n" + "=" * 70) - print(" DeepCritical Full Stack Demo Complete!") + print(" DeepBoner Full Stack Demo Complete!") print(" ") print(" Everything you just saw was REAL:") print(" - Real PubMed + ClinicalTrials + Europe PMC searches") diff --git a/examples/hypothesis_demo/run_hypothesis.py b/examples/hypothesis_demo/run_hypothesis.py index 65a24224a0d8a32ced8f25de702e152ecba13590..3e1b38bdaf0596133f9e1debd7a9f1342b1500cd 100644 --- a/examples/hypothesis_demo/run_hypothesis.py +++ b/examples/hypothesis_demo/run_hypothesis.py @@ -31,7 +31,7 @@ async def run_hypothesis_demo(query: str) -> None: """Run the REAL hypothesis generation pipeline.""" try: print(f"\n{'=' * 60}") - print("DeepCritical Hypothesis Agent Demo (Phase 7)") + print("DeepBoner Hypothesis Agent Demo (Phase 7)") print(f"Query: {query}") print("Mode: REAL (Live API calls)") print(f"{'=' * 60}\n") diff --git a/examples/modal_demo/run_analysis.py b/examples/modal_demo/run_analysis.py index 82bbe7ff0fcdedb4b871d4479924db2108affcd8..c8e54b195875ff761bb93b25b4eeaa194584b861 100644 --- a/examples/modal_demo/run_analysis.py +++ b/examples/modal_demo/run_analysis.py @@ -32,7 +32,7 @@ async def main() -> None: sys.exit(1) print(f"\n{'=' * 60}") - print("DeepCritical Modal Analysis Demo") + print("DeepBoner Modal Analysis Demo") print(f"Query: {args.query}") print(f"{'=' * 60}\n") diff --git a/examples/orchestrator_demo/run_agent.py b/examples/orchestrator_demo/run_agent.py index 44e569b0bf66a882e51ddebfd46e462c7d575365..83ca2243e0e23a7e3196e7ff1dd4342e11580098 100644 --- a/examples/orchestrator_demo/run_agent.py +++ b/examples/orchestrator_demo/run_agent.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 """ -Demo: DeepCritical Agent Loop (Search + Judge + Orchestrator). +Demo: DeepBoner Agent Loop (Search + Judge + Orchestrator). This script demonstrates the REAL Phase 4 orchestration: - REAL Iterative Search (PubMed + ClinicalTrials + Europe PMC) @@ -36,7 +36,7 @@ MAX_ITERATIONS = 10 async def main() -> None: """Run the REAL agent demo.""" parser = argparse.ArgumentParser( - description="DeepCritical Agent Demo - REAL, No Mocks", + description="DeepBoner Agent Demo - REAL, No Mocks", formatter_class=argparse.RawDescriptionHelpFormatter, epilog=""" This demo runs the REAL search-judge-synthesize loop: @@ -72,7 +72,7 @@ Examples: sys.exit(1) print(f"\n{'=' * 60}") - print("DeepCritical Agent Demo (REAL)") + print("DeepBoner Agent Demo (REAL)") print(f"Query: {args.query}") print(f"Max Iterations: {args.iterations}") print("Mode: REAL (All live API calls)") diff --git a/examples/orchestrator_demo/run_magentic.py b/examples/orchestrator_demo/run_magentic.py index fe74450d9a19d40c706d32ffe97d452a2aa4f36b..4a49231c73794636f5bccf00ac4819c93a8f4173 100644 --- a/examples/orchestrator_demo/run_magentic.py +++ b/examples/orchestrator_demo/run_magentic.py @@ -1,6 +1,6 @@ #!/usr/bin/env python3 """ -Demo: Magentic-One Orchestrator for DeepCritical. +Demo: Magentic-One Orchestrator for DeepBoner. This script demonstrates Phase 5 functionality: - Multi-Agent Coordination (Searcher + Judge + Manager) @@ -27,7 +27,7 @@ from src.utils.models import OrchestratorConfig async def main() -> None: """Run the magentic agent demo.""" - parser = argparse.ArgumentParser(description="Run DeepCritical Magentic Agent") + parser = argparse.ArgumentParser(description="Run DeepBoner Magentic Agent") parser.add_argument("query", help="Research query (e.g., 'metformin cancer')") parser.add_argument("--iterations", type=int, default=10, help="Max rounds") args = parser.parse_args() @@ -40,7 +40,7 @@ async def main() -> None: sys.exit(1) print(f"\n{'=' * 60}") - print("DeepCritical Magentic Agent Demo") + print("DeepBoner Magentic Agent Demo") print(f"Query: {args.query}") print("Mode: MAGENTIC (Multi-Agent)") print(f"{'=' * 60}\n") diff --git a/examples/search_demo/run_search.py b/examples/search_demo/run_search.py index 05f46d37bbef26d5674f050488c0f8b16f822ef1..132841ab76c4f4c532999895a574e86dc452608f 100644 --- a/examples/search_demo/run_search.py +++ b/examples/search_demo/run_search.py @@ -30,7 +30,7 @@ from src.tools.search_handler import SearchHandler async def main(query: str) -> None: """Run search demo with the given query.""" print(f"\n{'=' * 60}") - print("DeepCritical Search Demo") + print("DeepBoner Search Demo") print(f"Query: {query}") print(f"{'=' * 60}\n") diff --git a/main.py b/main.py index e6f8c40fba36aa58c23a2d2728040d4024f61081..2efa88bf122098762dbe884da8e25ab0d440deb7 100644 --- a/main.py +++ b/main.py @@ -1,5 +1,5 @@ def main(): - print("Hello from deepcritical!") + print("Hello from deepboner!") if __name__ == "__main__": diff --git a/pyproject.toml b/pyproject.toml index 22d112b09e791b72764e99ebeaf4ac1e3e1b3f57..f86a0373a1d072f98825862050370b2a6ed1e738 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,7 +1,7 @@ [project] -name = "deepcritical" +name = "deepboner" version = "0.1.0" -description = "AI-Native Drug Repurposing Research Agent" +description = "AI-Native Sexual Health Research Agent" readme = "README.md" requires-python = ">=3.11" dependencies = [ @@ -126,6 +126,18 @@ markers = [ "integration: Integration tests (real APIs)", "slow: Slow tests", ] +# Filter warnings from unittest.mock introspecting Pydantic models. +# This is a known upstream issue: https://github.com/pydantic/pydantic/issues/9927 +# When autospec=True, mock.py accesses deprecated Pydantic attributes during introspection. +# We filter these specifically because it's NOT our code triggering deprecations. +filterwarnings = [ + # Pydantic 2.0 deprecations triggered by mock introspection + "ignore:The `__fields__` attribute is deprecated:pydantic.warnings.PydanticDeprecatedSince20", + "ignore:The `__fields_set__` attribute is deprecated:pydantic.warnings.PydanticDeprecatedSince20", + # Pydantic 2.11 deprecations triggered by mock introspection + "ignore:Accessing the 'model_computed_fields' attribute on the instance is deprecated:pydantic.warnings.PydanticDeprecatedSince211", + "ignore:Accessing the 'model_fields' attribute on the instance is deprecated:pydantic.warnings.PydanticDeprecatedSince211", +] # ============== COVERAGE CONFIG ============== [tool.coverage.run] diff --git a/src/agent_factory/judges.py b/src/agent_factory/judges.py index 859a905477f21ab25237c6152e7b5e44291a29de..442f8f2dd5bf2aee9363ff39dbe1c19aa8e55524 100644 --- a/src/agent_factory/judges.py +++ b/src/agent_factory/judges.py @@ -9,7 +9,7 @@ from huggingface_hub import InferenceClient from pydantic_ai import Agent from pydantic_ai.models.anthropic import AnthropicModel from pydantic_ai.models.huggingface import HuggingFaceModel -from pydantic_ai.models.openai import OpenAIModel +from pydantic_ai.models.openai import OpenAIChatModel from pydantic_ai.providers.anthropic import AnthropicProvider from pydantic_ai.providers.huggingface import HuggingFaceProvider from pydantic_ai.providers.openai import OpenAIProvider @@ -48,7 +48,7 @@ def get_model() -> Any: logger.warning("Unknown LLM provider, defaulting to OpenAI", provider=llm_provider) openai_provider = OpenAIProvider(api_key=settings.openai_api_key) - return OpenAIModel(settings.openai_model, provider=openai_provider) + return OpenAIChatModel(settings.openai_model, provider=openai_provider) class JudgeHandler: diff --git a/src/app.py b/src/app.py index 5d986fdb5dd3e3b7e1554f27a5dd96695208a937..32f4b3b8984d279d83b93e6581638a689ea7c078 100644 --- a/src/app.py +++ b/src/app.py @@ -1,4 +1,4 @@ -"""Gradio UI for DeepCritical agent with MCP server support.""" +"""Gradio UI for DeepBoner agent with MCP server support.""" import os from collections.abc import AsyncGenerator @@ -197,29 +197,31 @@ def create_demo() -> gr.ChatInterface: # 1. Unwrapped ChatInterface (Fixes Accordion Bug) demo = gr.ChatInterface( fn=research_agent, - title="🧬 DeepCritical", + title="🍆 DeepBoner", description=( - "*AI-Powered Drug Repurposing Agent — searches PubMed, " + "*AI-Powered Sexual Health Research Agent — searches PubMed, " "ClinicalTrials.gov & Europe PMC*\n\n" + "Deep research for sexual wellness, ED treatments, hormone therapy, " + "libido, and reproductive health - for all genders.\n\n" "---\n" "*Research tool only — not for medical advice.* \n" "**MCP Server Active**: Connect Claude Desktop to `/gradio_api/mcp/`" ), examples=[ [ - "What drugs could be repurposed for Alzheimer's disease?", + "What drugs improve female libido post-menopause?", "simple", "", "openai", ], [ - "Is metformin effective for treating cancer?", + "Clinical trials for erectile dysfunction alternatives to PDE5 inhibitors?", "simple", "", "openai", ], [ - "What medications show promise for Long COVID treatment?", + "Evidence for testosterone therapy in women with HSDD?", "simple", "", "openai", diff --git a/src/mcp_tools.py b/src/mcp_tools.py index 43a390b4965573c52810b76b93f9433e9e0c0c0e..763462265eceb0a8e67958ee017a8b500b7f9d2c 100644 --- a/src/mcp_tools.py +++ b/src/mcp_tools.py @@ -1,4 +1,4 @@ -"""MCP tool wrappers for DeepCritical search tools. +"""MCP tool wrappers for DeepBoner search tools. These functions expose our search tools via MCP protocol. Each function follows the MCP tool contract: diff --git a/src/services/__init__.py b/src/services/__init__.py index 2565bd4058ec7aa68217db6a74ee885729cf948b..c7aeb9e097aa4825ba1a7cae1d2bd84b0c5d53a2 100644 --- a/src/services/__init__.py +++ b/src/services/__init__.py @@ -1 +1 @@ -"""Services for DeepCritical.""" +"""Services for DeepBoner.""" diff --git a/src/services/llamaindex_rag.py b/src/services/llamaindex_rag.py index e881a3d74793e5fe2264f5423d9706eb77fab784..64b77dc53fb5cbf2d3fcb6b105147d6e47734a74 100644 --- a/src/services/llamaindex_rag.py +++ b/src/services/llamaindex_rag.py @@ -1,6 +1,10 @@ """LlamaIndex RAG service for evidence retrieval and indexing. Requires optional dependencies: uv sync --extra modal + +Migration Note (v1.0 rebrand): + Default collection_name changed from "deepcritical_evidence" to "deepboner_evidence". + To preserve existing data, explicitly pass collection_name="deepcritical_evidence". """ from typing import Any @@ -25,7 +29,7 @@ class LlamaIndexRAGService: def __init__( self, - collection_name: str = "deepcritical_evidence", + collection_name: str = "deepboner_evidence", persist_dir: str | None = None, embedding_model: str | None = None, similarity_top_k: int = 5, @@ -34,7 +38,8 @@ class LlamaIndexRAGService: Initialize LlamaIndex RAG service. Args: - collection_name: Name of the ChromaDB collection + collection_name: Name of the ChromaDB collection (default changed from + "deepcritical_evidence" to "deepboner_evidence" in v1.0 rebrand) persist_dir: Directory to persist ChromaDB data embedding_model: OpenAI embedding model (defaults to settings.openai_embedding_model) similarity_top_k: Number of top results to retrieve @@ -248,7 +253,7 @@ class LlamaIndexRAGService: def get_rag_service( - collection_name: str = "deepcritical_evidence", + collection_name: str = "deepboner_evidence", **kwargs: Any, ) -> LlamaIndexRAGService: """ diff --git a/src/tools/clinicaltrials.py b/src/tools/clinicaltrials.py index e1c323bd80c0230b02641160e7296738346c7fad..8bf857736aaee8c7317a338e1d9d853799be61ba 100644 --- a/src/tools/clinicaltrials.py +++ b/src/tools/clinicaltrials.py @@ -75,7 +75,7 @@ class ClinicalTrialsTool: requests.get, self.BASE_URL, params=params, - headers={"User-Agent": "DeepCritical-Research-Agent/1.0"}, + headers={"User-Agent": "DeepBoner-Research-Agent/1.0"}, timeout=30, ) response.raise_for_status() diff --git a/src/tools/code_execution.py b/src/tools/code_execution.py index da22fa9b1f864b0ab0e3778706c150ae60c714fd..5a0978effd0c303f169f98db9604b3a4692d1d5c 100644 --- a/src/tools/code_execution.py +++ b/src/tools/code_execution.py @@ -109,10 +109,10 @@ class ModalCodeExecutor: try: # Create or lookup Modal app - app = modal.App.lookup("deepcritical-code-execution", create_if_missing=True) + app = modal.App.lookup("deepboner-code-execution", create_if_missing=True) # Define scientific computing image with common libraries - scientific_image = modal.Image.debian_slim(python_version="3.11").uv_pip_install( + scientific_image = modal.Image.debian_slim(python_version="3.11").pip_install( *get_sandbox_library_list() ) diff --git a/src/utils/exceptions.py b/src/utils/exceptions.py index ecdb1183d8c3e6e0a0bc198bdcfdad1dcce1f447..c4215d9f3978adc28c7a9e2f49472613217eaab7 100644 --- a/src/utils/exceptions.py +++ b/src/utils/exceptions.py @@ -1,25 +1,25 @@ -"""Custom exceptions for DeepCritical.""" +"""Custom exceptions for DeepBoner.""" -class DeepCriticalError(Exception): - """Base exception for all DeepCritical errors.""" +class DeepBonerError(Exception): + """Base exception for all DeepBoner errors.""" pass -class SearchError(DeepCriticalError): +class SearchError(DeepBonerError): """Raised when a search operation fails.""" pass -class JudgeError(DeepCriticalError): +class JudgeError(DeepBonerError): """Raised when the judge fails to assess evidence.""" pass -class ConfigurationError(DeepCriticalError): +class ConfigurationError(DeepBonerError): """Raised when configuration is invalid.""" pass @@ -29,3 +29,7 @@ class RateLimitError(SearchError): """Raised when we hit API rate limits.""" pass + + +# Backwards compatibility alias +DeepCriticalError = DeepBonerError diff --git a/tests/unit/agent_factory/test_judges_factory.py b/tests/unit/agent_factory/test_judges_factory.py index 8c5af6b16dc3dd12a6f1db757580c23b492e255a..5fd715c10d88810458462a7d3711566d868cac97 100644 --- a/tests/unit/agent_factory/test_judges_factory.py +++ b/tests/unit/agent_factory/test_judges_factory.py @@ -10,7 +10,7 @@ from pydantic_ai.models.anthropic import AnthropicModel # We expect this import to exist after we implement it, or we mock it if it's not there yet # For TDD, we assume we will use the library class from pydantic_ai.models.huggingface import HuggingFaceModel -from pydantic_ai.models.openai import OpenAIModel +from pydantic_ai.models.openai import OpenAIChatModel from src.agent_factory.judges import get_model @@ -28,7 +28,7 @@ def test_get_model_openai(mock_settings): mock_settings.openai_model = "gpt-5.1" model = get_model() - assert isinstance(model, OpenAIModel) + assert isinstance(model, OpenAIChatModel) assert model.model_name == "gpt-5.1" @@ -61,4 +61,4 @@ def test_get_model_default_fallback(mock_settings): mock_settings.openai_model = "gpt-5.1" model = get_model() - assert isinstance(model, OpenAIModel) + assert isinstance(model, OpenAIChatModel) diff --git a/tests/unit/agents/test_hypothesis_agent.py b/tests/unit/agents/test_hypothesis_agent.py index 17118a979a2e92337b3aa7b5e66bbe02de7fab6b..53280b7fa1f26fb2c185d1aea26be595ca4d08db 100644 --- a/tests/unit/agents/test_hypothesis_agent.py +++ b/tests/unit/agents/test_hypothesis_agent.py @@ -3,10 +3,19 @@ from unittest.mock import AsyncMock, MagicMock, patch import pytest -from agent_framework import AgentRunResponse -from src.agents.hypothesis_agent import HypothesisAgent -from src.utils.models import Citation, Evidence, HypothesisAssessment, MechanismHypothesis +# Skip all tests if agent_framework not installed (optional dep) +pytest.importorskip("agent_framework") + +from agent_framework import AgentRunResponse # noqa: E402 + +from src.agents.hypothesis_agent import HypothesisAgent # noqa: E402 +from src.utils.models import ( # noqa: E402 + Citation, + Evidence, + HypothesisAssessment, + MechanismHypothesis, +) @pytest.fixture diff --git a/tests/unit/agents/test_report_agent.py b/tests/unit/agents/test_report_agent.py index db5774615ebdc900735e977133d5c1d80f811b53..b648f2441d07063f31976198fdf4de06888122c9 100644 --- a/tests/unit/agents/test_report_agent.py +++ b/tests/unit/agents/test_report_agent.py @@ -5,8 +5,11 @@ from unittest.mock import AsyncMock, MagicMock, patch import pytest -from src.agents.report_agent import ReportAgent -from src.utils.models import ( +# Skip all tests if agent_framework not installed (optional dep) +pytest.importorskip("agent_framework") + +from src.agents.report_agent import ReportAgent # noqa: E402 +from src.utils.models import ( # noqa: E402 Citation, Evidence, MechanismHypothesis, diff --git a/tests/unit/test_magentic_fix.py b/tests/unit/test_magentic_fix.py index 93c1dc4d12b7460845ee3793f27394cb0d30dbb1..d1e61af0fd3041400af89047f5d048523298ba21 100644 --- a/tests/unit/test_magentic_fix.py +++ b/tests/unit/test_magentic_fix.py @@ -3,9 +3,13 @@ from unittest.mock import MagicMock, patch import pytest -from agent_framework import MagenticFinalResultEvent -from src.orchestrator_magentic import MagenticOrchestrator +# Skip all tests if agent_framework not installed (optional dep) +pytest.importorskip("agent_framework") + +from agent_framework import MagenticFinalResultEvent # noqa: E402 + +from src.orchestrator_magentic import MagenticOrchestrator # noqa: E402 class MockChatMessage: diff --git a/tests/unit/utils/test_exceptions.py b/tests/unit/utils/test_exceptions.py index d53305e007318cd1859065580e3e1d4a419e8551..69c61ed0510ad721e5e058276576596cb94bc7d7 100644 --- a/tests/unit/utils/test_exceptions.py +++ b/tests/unit/utils/test_exceptions.py @@ -1,8 +1,12 @@ """Unit tests for custom exceptions.""" +import pytest + +pytestmark = pytest.mark.unit + from src.utils.exceptions import ( ConfigurationError, - DeepCriticalError, + DeepBonerError, JudgeError, RateLimitError, SearchError, @@ -12,22 +16,22 @@ from src.utils.exceptions import ( class TestExceptions: """Tests for exception hierarchy.""" - def test_search_error_is_deepcritical_error(self): - assert issubclass(SearchError, DeepCriticalError) + def test_search_error_is_deepboner_error(self): + assert issubclass(SearchError, DeepBonerError) def test_rate_limit_error_is_search_error(self): assert issubclass(RateLimitError, SearchError) - def test_judge_error_is_deepcritical_error(self): - assert issubclass(JudgeError, DeepCriticalError) + def test_judge_error_is_deepboner_error(self): + assert issubclass(JudgeError, DeepBonerError) - def test_configuration_error_is_deepcritical_error(self): - assert issubclass(ConfigurationError, DeepCriticalError) + def test_configuration_error_is_deepboner_error(self): + assert issubclass(ConfigurationError, DeepBonerError) def test_subclass_caught_as_base(self): - """Verify subclasses can be caught via DeepCriticalError.""" + """Verify subclasses can be caught via DeepBonerError.""" try: raise RateLimitError("rate limited") - except DeepCriticalError as exc: + except DeepBonerError as exc: assert isinstance(exc, RateLimitError) - assert isinstance(exc, DeepCriticalError) + assert isinstance(exc, DeepBonerError) diff --git a/uv.lock b/uv.lock index 9196007e7c92e933f2a14d07e2b4bde9c6a0d489..e91e2d4dbf43d3db60aa1a1af2ca48f61d3a4fde 100644 --- a/uv.lock +++ b/uv.lock @@ -1057,7 +1057,7 @@ wheels = [ ] [[package]] -name = "deepcritical" +name = "deepboner" version = "0.1.0" source = { editable = "." } dependencies = [