|
|
|
|
|
from typing import List, Dict, Any, Tuple
|
|
|
|
|
|
from ctgov_client import (
|
|
|
build_terms,
|
|
|
fetch_all_terms,
|
|
|
score_trial,
|
|
|
extract_row,
|
|
|
)
|
|
|
|
|
|
STATUSES = ["RECRUITING", "NOT_YET_RECRUITING"]
|
|
|
|
|
|
|
|
|
def _normalize_key(row: Dict[str, Any]) -> str:
|
|
|
|
|
|
nct = (row.get("nct") or "").strip()
|
|
|
if nct:
|
|
|
return f"NCT:{nct}"
|
|
|
title = (row.get("title") or "").lower().strip()
|
|
|
return f"TITLE:{title}"
|
|
|
|
|
|
|
|
|
def fetch_uk_trials(
|
|
|
diagnosis: str,
|
|
|
keywords: str,
|
|
|
intake: Dict[str, Any],
|
|
|
include_ctgov: bool = True,
|
|
|
) -> Tuple[List[Dict[str, Any]], int, int]:
|
|
|
"""
|
|
|
Fetch UK trials across selected sources.
|
|
|
Currently implemented: ClinicalTrials.gov with UK site filter.
|
|
|
|
|
|
Returns: (rows, total_raw, skipped)
|
|
|
rows: list of standard rows with keys: title, nct, status, phases, conditions, site, score, reasons, url
|
|
|
total_raw: number of raw studies fetched before filters
|
|
|
skipped: number of studies skipped due to formatting issues
|
|
|
"""
|
|
|
terms = build_terms(diagnosis, keywords)
|
|
|
rows: List[Dict[str, Any]] = []
|
|
|
skipped = 0
|
|
|
total_raw = 0
|
|
|
|
|
|
if include_ctgov:
|
|
|
studies = fetch_all_terms(terms, STATUSES, page_size=100, max_pages=5)
|
|
|
total_raw += len(studies)
|
|
|
for s in studies:
|
|
|
try:
|
|
|
ps = (s.get("protocolSection", {}) or {})
|
|
|
locs = ((ps.get("contactsLocationsModule", {}) or {}).get("locations") or [])
|
|
|
|
|
|
uk_locs = [L for L in locs if "united kingdom" in (L.get("locationCountry") or "").lower()]
|
|
|
if not uk_locs:
|
|
|
continue
|
|
|
sc, reasons = score_trial(s, intake)
|
|
|
base = extract_row(s)
|
|
|
|
|
|
first_site = next(iter(uk_locs), {})
|
|
|
base["site"] = f"{first_site.get('locationFacility','')}, {first_site.get('locationCity','')}, {first_site.get('locationCountry','')}"
|
|
|
base["score"] = sc
|
|
|
base["reasons"] = "; ".join(reasons)
|
|
|
base["url"] = f"https://clinicaltrials.gov/study/{base['nct']}" if base.get("nct") else ""
|
|
|
rows.append(base)
|
|
|
except Exception:
|
|
|
skipped += 1
|
|
|
continue
|
|
|
|
|
|
|
|
|
seen = set()
|
|
|
deduped: List[Dict[str, Any]] = []
|
|
|
for r in rows:
|
|
|
k = _normalize_key(r)
|
|
|
if k in seen:
|
|
|
continue
|
|
|
seen.add(k)
|
|
|
deduped.append(r)
|
|
|
|
|
|
|
|
|
deduped.sort(key=lambda x: -x.get("score", 0))
|
|
|
return deduped, total_raw, skipped
|
|
|
|
|
|
|