|
|
"""ClinicalTrials.gov search tool using API v2.""" |
|
|
|
|
|
import asyncio |
|
|
from typing import Any, ClassVar |
|
|
|
|
|
import requests |
|
|
from tenacity import retry, stop_after_attempt, wait_exponential |
|
|
|
|
|
from src.utils.exceptions import SearchError |
|
|
from src.utils.models import Citation, Evidence |
|
|
|
|
|
|
|
|
class ClinicalTrialsTool: |
|
|
"""Search tool for ClinicalTrials.gov. |
|
|
|
|
|
Note: Uses `requests` library instead of `httpx` because ClinicalTrials.gov's |
|
|
WAF blocks httpx's TLS fingerprint. The `requests` library is not blocked. |
|
|
See: https://clinicaltrials.gov/data-api/api |
|
|
""" |
|
|
|
|
|
BASE_URL = "https://clinicaltrials.gov/api/v2/studies" |
|
|
|
|
|
|
|
|
FIELDS: ClassVar[list[str]] = [ |
|
|
"NCTId", |
|
|
"BriefTitle", |
|
|
"Phase", |
|
|
"OverallStatus", |
|
|
"Condition", |
|
|
"InterventionName", |
|
|
"StartDate", |
|
|
"BriefSummary", |
|
|
] |
|
|
|
|
|
|
|
|
STATUS_FILTER = "COMPLETED,ACTIVE_NOT_RECRUITING,RECRUITING,ENROLLING_BY_INVITATION" |
|
|
|
|
|
|
|
|
STUDY_TYPE_FILTER = "INTERVENTIONAL" |
|
|
|
|
|
@property |
|
|
def name(self) -> str: |
|
|
return "clinicaltrials" |
|
|
|
|
|
@retry( |
|
|
stop=stop_after_attempt(3), |
|
|
wait=wait_exponential(multiplier=1, min=1, max=10), |
|
|
reraise=True, |
|
|
) |
|
|
async def search(self, query: str, max_results: int = 10) -> list[Evidence]: |
|
|
"""Search ClinicalTrials.gov for interventional studies. |
|
|
|
|
|
Args: |
|
|
query: Search query (e.g., "metformin alzheimer") |
|
|
max_results: Maximum results to return (max 100) |
|
|
|
|
|
Returns: |
|
|
List of Evidence objects from clinical trials |
|
|
""" |
|
|
|
|
|
|
|
|
final_query = f"{query} AND AREA[StudyType]INTERVENTIONAL" |
|
|
|
|
|
params: dict[str, Any] = { |
|
|
"query.term": final_query, |
|
|
"pageSize": min(max_results, 100), |
|
|
"fields": ",".join(self.FIELDS), |
|
|
|
|
|
"filter.overallStatus": self.STATUS_FILTER, |
|
|
} |
|
|
|
|
|
try: |
|
|
|
|
|
response = await asyncio.to_thread( |
|
|
requests.get, |
|
|
self.BASE_URL, |
|
|
params=params, |
|
|
headers={"User-Agent": "DeepBoner-Research-Agent/1.0"}, |
|
|
timeout=30, |
|
|
) |
|
|
response.raise_for_status() |
|
|
|
|
|
data = response.json() |
|
|
studies = data.get("studies", []) |
|
|
return [self._study_to_evidence(study) for study in studies[:max_results]] |
|
|
|
|
|
except requests.HTTPError as e: |
|
|
raise SearchError(f"ClinicalTrials.gov API error: {e}") from e |
|
|
except requests.RequestException as e: |
|
|
raise SearchError(f"ClinicalTrials.gov request failed: {e}") from e |
|
|
|
|
|
def _study_to_evidence(self, study: dict[str, Any]) -> Evidence: |
|
|
"""Convert a clinical trial study to Evidence.""" |
|
|
|
|
|
protocol = study.get("protocolSection", {}) |
|
|
id_module = protocol.get("identificationModule", {}) |
|
|
status_module = protocol.get("statusModule", {}) |
|
|
desc_module = protocol.get("descriptionModule", {}) |
|
|
design_module = protocol.get("designModule", {}) |
|
|
conditions_module = protocol.get("conditionsModule", {}) |
|
|
arms_module = protocol.get("armsInterventionsModule", {}) |
|
|
|
|
|
nct_id = id_module.get("nctId", "Unknown") |
|
|
title = id_module.get("briefTitle", "Untitled Study") |
|
|
status = status_module.get("overallStatus", "Unknown") |
|
|
start_date = status_module.get("startDateStruct", {}).get("date", "Unknown") |
|
|
|
|
|
|
|
|
phases = design_module.get("phases", []) |
|
|
phase = phases[0] if phases else "Not Applicable" |
|
|
|
|
|
|
|
|
conditions = conditions_module.get("conditions", []) |
|
|
conditions_str = ", ".join(conditions[:3]) if conditions else "Unknown" |
|
|
|
|
|
|
|
|
interventions = arms_module.get("interventions", []) |
|
|
intervention_names = [i.get("name", "") for i in interventions[:3]] |
|
|
interventions_str = ", ".join(intervention_names) if intervention_names else "Unknown" |
|
|
|
|
|
|
|
|
summary = desc_module.get("briefSummary", "No summary available.") |
|
|
|
|
|
|
|
|
content = ( |
|
|
f"{summary[:500]}... " |
|
|
f"Trial Phase: {phase}. " |
|
|
f"Status: {status}. " |
|
|
f"Conditions: {conditions_str}. " |
|
|
f"Interventions: {interventions_str}." |
|
|
) |
|
|
|
|
|
return Evidence( |
|
|
content=content[:2000], |
|
|
citation=Citation( |
|
|
source="clinicaltrials", |
|
|
title=title[:500], |
|
|
url=f"https://clinicaltrials.gov/study/{nct_id}", |
|
|
date=start_date, |
|
|
authors=[], |
|
|
), |
|
|
relevance=0.85, |
|
|
) |
|
|
|