| """ClinicalTrials.gov search tool using API v2.""" |
|
|
| import asyncio |
| from typing import Any, ClassVar |
|
|
| import requests |
| from tenacity import retry, stop_after_attempt, wait_exponential |
|
|
| from src.utils.exceptions import SearchError |
| from src.utils.models import Citation, Evidence |
|
|
|
|
| class ClinicalTrialsTool: |
| """Search tool for ClinicalTrials.gov. |
| |
| Note: Uses `requests` library instead of `httpx` because ClinicalTrials.gov's |
| WAF blocks httpx's TLS fingerprint. The `requests` library is not blocked. |
| See: https://clinicaltrials.gov/data-api/api |
| """ |
|
|
| BASE_URL = "https://clinicaltrials.gov/api/v2/studies" |
|
|
| |
| FIELDS: ClassVar[list[str]] = [ |
| "NCTId", |
| "BriefTitle", |
| "Phase", |
| "OverallStatus", |
| "Condition", |
| "InterventionName", |
| "StartDate", |
| "BriefSummary", |
| ] |
|
|
| |
| STATUS_FILTER = "COMPLETED,ACTIVE_NOT_RECRUITING,RECRUITING,ENROLLING_BY_INVITATION" |
|
|
| |
| STUDY_TYPE_FILTER = "INTERVENTIONAL" |
|
|
| @property |
| def name(self) -> str: |
| return "clinicaltrials" |
|
|
| @retry( |
| stop=stop_after_attempt(3), |
| wait=wait_exponential(multiplier=1, min=1, max=10), |
| reraise=True, |
| ) |
| async def search(self, query: str, max_results: int = 10) -> list[Evidence]: |
| """Search ClinicalTrials.gov for interventional studies. |
| |
| Args: |
| query: Search query (e.g., "metformin alzheimer") |
| max_results: Maximum results to return (max 100) |
| |
| Returns: |
| List of Evidence objects from clinical trials |
| """ |
| |
| |
| final_query = f"{query} AND AREA[StudyType]INTERVENTIONAL" |
|
|
| params: dict[str, Any] = { |
| "query.term": final_query, |
| "pageSize": min(max_results, 100), |
| "fields": ",".join(self.FIELDS), |
| |
| "filter.overallStatus": self.STATUS_FILTER, |
| } |
|
|
| try: |
| |
| response = await asyncio.to_thread( |
| requests.get, |
| self.BASE_URL, |
| params=params, |
| headers={"User-Agent": "DeepBoner-Research-Agent/1.0"}, |
| timeout=30, |
| ) |
| response.raise_for_status() |
|
|
| data = response.json() |
| studies = data.get("studies", []) |
| return [self._study_to_evidence(study) for study in studies[:max_results]] |
|
|
| except requests.HTTPError as e: |
| raise SearchError(f"ClinicalTrials.gov API error: {e}") from e |
| except requests.RequestException as e: |
| raise SearchError(f"ClinicalTrials.gov request failed: {e}") from e |
|
|
| def _study_to_evidence(self, study: dict[str, Any]) -> Evidence: |
| """Convert a clinical trial study to Evidence.""" |
| |
| protocol = study.get("protocolSection", {}) |
| id_module = protocol.get("identificationModule", {}) |
| status_module = protocol.get("statusModule", {}) |
| desc_module = protocol.get("descriptionModule", {}) |
| design_module = protocol.get("designModule", {}) |
| conditions_module = protocol.get("conditionsModule", {}) |
| arms_module = protocol.get("armsInterventionsModule", {}) |
|
|
| nct_id = id_module.get("nctId", "Unknown") |
| title = id_module.get("briefTitle", "Untitled Study") |
| status = status_module.get("overallStatus", "Unknown") |
| start_date = status_module.get("startDateStruct", {}).get("date", "Unknown") |
|
|
| |
| phases = design_module.get("phases", []) |
| phase = phases[0] if phases else "Not Applicable" |
|
|
| |
| conditions = conditions_module.get("conditions", []) |
| conditions_str = ", ".join(conditions[:3]) if conditions else "Unknown" |
|
|
| |
| interventions = arms_module.get("interventions", []) |
| intervention_names = [i.get("name", "") for i in interventions[:3]] |
| interventions_str = ", ".join(intervention_names) if intervention_names else "Unknown" |
|
|
| |
| summary = desc_module.get("briefSummary", "No summary available.") |
|
|
| |
| content = ( |
| f"{summary[:500]}... " |
| f"Trial Phase: {phase}. " |
| f"Status: {status}. " |
| f"Conditions: {conditions_str}. " |
| f"Interventions: {interventions_str}." |
| ) |
|
|
| return Evidence( |
| content=content[:2000], |
| citation=Citation( |
| source="clinicaltrials", |
| title=title[:500], |
| url=f"https://clinicaltrials.gov/study/{nct_id}", |
| date=start_date, |
| authors=[], |
| ), |
| relevance=0.85, |
| ) |
|
|