|
|
|
|
|
import re
|
|
|
import requests
|
|
|
from typing import Any, Dict, List, Tuple
|
|
|
|
|
|
DEFAULT_DIAG_TERMS = {
|
|
|
"Glioblastoma": ["glioblastoma", "GBM", "glioblastoma multiforme"],
|
|
|
"Diffuse midline glioma": ["diffuse midline glioma", "DMG", "H3 K27M"],
|
|
|
"Anaplastic astrocytoma": ["anaplastic astrocytoma", "grade 3 astrocytoma"],
|
|
|
"Astrocytoma": ["astrocytoma", "grade 2 astrocytoma", "grade 4 astrocytoma"],
|
|
|
"Oligodendroglioma": ["oligodendroglioma", "1p19q codeleted"],
|
|
|
"Meningioma": ["meningioma"],
|
|
|
"Medulloblastoma": ["medulloblastoma"],
|
|
|
"Ependymoma": ["ependymoma"],
|
|
|
"Spinal cord tumor": ["spinal cord tumor", "spinal cord neoplasm"],
|
|
|
}
|
|
|
|
|
|
API_BASE = "https://clinicaltrials.gov/api/v2/studies"
|
|
|
UA = {"User-Agent": "BrainTrialsFinder-Desktop/1.0 (+https://clinicaltrials.gov)"}
|
|
|
|
|
|
|
|
|
def build_terms(diagnosis: str, keywords: str) -> List[str]:
|
|
|
terms: List[str] = []
|
|
|
if diagnosis in DEFAULT_DIAG_TERMS:
|
|
|
terms.extend(DEFAULT_DIAG_TERMS[diagnosis])
|
|
|
else:
|
|
|
terms.extend(["brain tumor", "spinal cord tumor", "CNS tumor"])
|
|
|
extra = [k.strip() for k in (keywords or "").split(",") if k.strip()]
|
|
|
return terms + extra
|
|
|
|
|
|
|
|
|
def ctgov_search_one(term: str, statuses: List[str], page_size: int = 100, max_pages: int = 5) -> List[Dict[str, Any]]:
|
|
|
session = requests.Session()
|
|
|
session.headers.update(UA)
|
|
|
all_studies: List[Dict[str, Any]] = []
|
|
|
page_token = None
|
|
|
count = 0
|
|
|
max_iters = max_pages or 0
|
|
|
while count < max_iters:
|
|
|
params = {
|
|
|
"query.term": term,
|
|
|
"filter.overallStatus": ",".join(statuses),
|
|
|
"pageSize": page_size,
|
|
|
}
|
|
|
if page_token:
|
|
|
params["pageToken"] = page_token
|
|
|
r = session.get(API_BASE, params=params, timeout=30)
|
|
|
r.raise_for_status()
|
|
|
data = r.json()
|
|
|
studies = data.get("studies", [])
|
|
|
if not studies:
|
|
|
break
|
|
|
all_studies.extend(studies)
|
|
|
page_token = data.get("nextPageToken")
|
|
|
if not page_token:
|
|
|
break
|
|
|
count += 1
|
|
|
return all_studies
|
|
|
|
|
|
|
|
|
def fetch_all_terms(terms: List[str], statuses: List[str], page_size=100, max_pages=5) -> List[Dict[str, Any]]:
|
|
|
dedup: Dict[str, Dict[str, Any]] = {}
|
|
|
for t in terms:
|
|
|
try:
|
|
|
for s in ctgov_search_one(t, statuses, page_size=page_size, max_pages=max_pages):
|
|
|
ident = (s.get("protocolSection", {}) or {}).get("identificationModule", {}) or {}
|
|
|
nct = ident.get("nctId")
|
|
|
key = nct or id(s)
|
|
|
if key not in dedup:
|
|
|
dedup[key] = s
|
|
|
except requests.HTTPError:
|
|
|
continue
|
|
|
return list(dedup.values())
|
|
|
|
|
|
|
|
|
def mentions(txt: str, term: str) -> bool:
|
|
|
return bool(re.search(rf"\b{re.escape(term)}\b", txt or "", re.I))
|
|
|
|
|
|
|
|
|
def as_text(obj: Any) -> str:
|
|
|
if obj is None:
|
|
|
return ""
|
|
|
if isinstance(obj, dict):
|
|
|
for k in ("textblock", "textBlock", "value"):
|
|
|
if k in obj:
|
|
|
return str(obj.get(k) or "")
|
|
|
return " ".join(str(v) for v in obj.values() if v is not None)
|
|
|
if isinstance(obj, list):
|
|
|
return "; ".join(as_text(x) for x in obj)
|
|
|
return str(obj)
|
|
|
|
|
|
|
|
|
def parse_age_to_int(v: Any):
|
|
|
if v is None:
|
|
|
return None
|
|
|
if isinstance(v, dict):
|
|
|
return parse_age_to_int(v.get("value"))
|
|
|
if isinstance(v, (int, float)):
|
|
|
return int(v)
|
|
|
m = re.search(r"(\d+)", str(v))
|
|
|
return int(m.group(1)) if m else None
|
|
|
|
|
|
|
|
|
def ensure_list(v: Any):
|
|
|
if v is None:
|
|
|
return []
|
|
|
if isinstance(v, list):
|
|
|
return v
|
|
|
return [v]
|
|
|
|
|
|
|
|
|
def score_trial(t: Dict[str, Any], intake: Dict[str, Any]) -> Tuple[int, List[str]]:
|
|
|
age_local = (intake or {}).get("age")
|
|
|
kps_local = (intake or {}).get("kps")
|
|
|
prior_bev_local = bool((intake or {}).get("prior_bev", False))
|
|
|
setting_local = (intake or {}).get("setting") or ""
|
|
|
keywords_local = (intake or {}).get("keywords") or ""
|
|
|
diagnosis_local = (intake or {}).get("diagnosis") or ""
|
|
|
|
|
|
if diagnosis_local in DEFAULT_DIAG_TERMS:
|
|
|
diag_terms = DEFAULT_DIAG_TERMS[diagnosis_local]
|
|
|
elif diagnosis_local and diagnosis_local != "Other":
|
|
|
diag_terms = [diagnosis_local]
|
|
|
else:
|
|
|
diag_terms = ["brain tumor", "CNS tumor", "spinal cord tumor"]
|
|
|
|
|
|
ps = (t or {}).get("protocolSection") or {}
|
|
|
elig = ps.get("eligibilityModule")
|
|
|
crit = ""
|
|
|
min_age = None
|
|
|
max_age = None
|
|
|
if isinstance(elig, dict):
|
|
|
crit_raw = elig.get("eligibilityCriteria") or elig.get("criteria") or elig
|
|
|
crit = as_text(crit_raw)
|
|
|
min_age = parse_age_to_int(elig.get("minimumAge"))
|
|
|
max_age = parse_age_to_int(elig.get("maximumAge"))
|
|
|
elif isinstance(elig, str):
|
|
|
crit = as_text(elig)
|
|
|
|
|
|
phases_list = ensure_list(ps.get("designModule", {}).get("phases"))
|
|
|
phases_up = [str(p).upper() for p in phases_list]
|
|
|
conds_list = ensure_list(ps.get("conditionsModule", {}).get("conditions"))
|
|
|
title = (ps.get("identificationModule", {}) or {}).get("briefTitle", "")
|
|
|
|
|
|
s = 0
|
|
|
reasons: List[str] = []
|
|
|
if any(any(mentions(c, term) for term in diag_terms) for c in conds_list) or any(mentions(title, term) for term in diag_terms):
|
|
|
s += 30
|
|
|
reasons.append(f"Matches diagnosis: {diagnosis_local or 'neuro-oncology'}.")
|
|
|
if any("PHASE 2" in p or "PHASE2" in p for p in phases_up):
|
|
|
s += 8
|
|
|
if any("PHASE 3" in p or "PHASE3" in p for p in phases_up):
|
|
|
s += 12
|
|
|
try:
|
|
|
if min_age is not None and age_local is not None and age_local < min_age:
|
|
|
reasons.append(f"Age below minimum ({min_age}).")
|
|
|
s -= 30
|
|
|
if max_age is not None and age_local is not None and age_local > max_age:
|
|
|
reasons.append(f"Age above maximum ({max_age}).")
|
|
|
s -= 30
|
|
|
except Exception:
|
|
|
pass
|
|
|
if mentions(crit, "ECOG 0-1") and (kps_local is None or kps_local < 80):
|
|
|
s -= 15
|
|
|
reasons.append("Requires ECOG 0–1 (KPS ~≥80).")
|
|
|
if mentions(crit, "Karnofsky") and (kps_local is None or kps_local < 70):
|
|
|
s -= 10
|
|
|
reasons.append("Requires KPS ≥70.")
|
|
|
if prior_bev_local and mentions(crit, "no prior bevacizumab"):
|
|
|
s -= 25
|
|
|
reasons.append("Excludes prior bevacizumab.")
|
|
|
if setting_local == "Recurrent" and mentions(crit, "recurrent"):
|
|
|
s += 8
|
|
|
if setting_local == "Newly diagnosed" and (mentions(crit, "newly diagnosed") or mentions(title, "adjuvant")):
|
|
|
s += 8
|
|
|
for kw in [k.strip() for k in (keywords_local or "").split(",") if k.strip()]:
|
|
|
if mentions(title, kw) or mentions(crit, kw):
|
|
|
s += 3
|
|
|
return max(0, min(100, s)), reasons
|
|
|
|
|
|
def extract_row(study: dict) -> dict:
|
|
|
"""Return a flat row dict for the table/PDF. Safe against missing fields."""
|
|
|
ps = (study.get("protocolSection") or {})
|
|
|
idm = (ps.get("identificationModule") or {})
|
|
|
scm = (ps.get("statusModule") or {})
|
|
|
dsm = (ps.get("designModule") or {})
|
|
|
cdnm = (ps.get("conditionsModule") or {})
|
|
|
slm = (ps.get("sponsorCollaboratorsModule") or {})
|
|
|
clm = (ps.get("contactsLocationsModule") or {})
|
|
|
|
|
|
title = (idm.get("officialTitle") or idm.get("briefTitle") or "").strip()
|
|
|
nct = (idm.get("nctId") or "").strip()
|
|
|
|
|
|
status_raw = (scm.get("overallStatus") or "").strip()
|
|
|
|
|
|
status = status_raw.replace("_", " ").title() if status_raw else ""
|
|
|
|
|
|
phases_list = ensure_list(dsm.get("phases"))
|
|
|
phases = ", ".join(phases_list)
|
|
|
|
|
|
conditions = ", ".join(ensure_list(cdnm.get("conditions")))
|
|
|
|
|
|
sponsor = ""
|
|
|
lead = slm.get("leadSponsor") or {}
|
|
|
if isinstance(lead, dict):
|
|
|
sponsor = (lead.get("name") or "").strip()
|
|
|
|
|
|
city_country = ""
|
|
|
locs = ensure_list(clm.get("locations"))
|
|
|
if locs:
|
|
|
first = locs[0]
|
|
|
city = (first.get("locationCity") or "").strip()
|
|
|
country = (first.get("locationCountry") or "").strip()
|
|
|
parts = [p for p in [city, country] if p]
|
|
|
city_country = ", ".join(parts)
|
|
|
|
|
|
return {
|
|
|
"title": title,
|
|
|
"nct": nct,
|
|
|
"status": status,
|
|
|
"phases": phases,
|
|
|
"conditions": conditions,
|
|
|
"sponsor": sponsor,
|
|
|
"city_country": city_country,
|
|
|
}
|
|
|
|