# Shared client for ClinicalTrials.gov v2 API and scoring import re import requests from typing import Any, Dict, List, Tuple DEFAULT_DIAG_TERMS = { "Glioblastoma": ["glioblastoma", "GBM", "glioblastoma multiforme"], "Diffuse midline glioma": ["diffuse midline glioma", "DMG", "H3 K27M"], "Anaplastic astrocytoma": ["anaplastic astrocytoma", "grade 3 astrocytoma"], "Astrocytoma": ["astrocytoma", "grade 2 astrocytoma", "grade 4 astrocytoma"], "Oligodendroglioma": ["oligodendroglioma", "1p19q codeleted"], "Meningioma": ["meningioma"], "Medulloblastoma": ["medulloblastoma"], "Ependymoma": ["ependymoma"], "Spinal cord tumor": ["spinal cord tumor", "spinal cord neoplasm"], } API_BASE = "https://clinicaltrials.gov/api/v2/studies" UA = {"User-Agent": "BrainTrialsFinder-Desktop/1.0 (+https://clinicaltrials.gov)"} def build_terms(diagnosis: str, keywords: str) -> List[str]: terms: List[str] = [] if diagnosis in DEFAULT_DIAG_TERMS: terms.extend(DEFAULT_DIAG_TERMS[diagnosis]) else: terms.extend(["brain tumor", "spinal cord tumor", "CNS tumor"]) extra = [k.strip() for k in (keywords or "").split(",") if k.strip()] return terms + extra def ctgov_search_one(term: str, statuses: List[str], page_size: int = 100, max_pages: int = 5) -> List[Dict[str, Any]]: session = requests.Session() session.headers.update(UA) all_studies: List[Dict[str, Any]] = [] page_token = None count = 0 max_iters = max_pages or 0 while count < max_iters: params = { "query.term": term, "filter.overallStatus": ",".join(statuses), "pageSize": page_size, } if page_token: params["pageToken"] = page_token r = session.get(API_BASE, params=params, timeout=30) r.raise_for_status() data = r.json() studies = data.get("studies", []) if not studies: break all_studies.extend(studies) page_token = data.get("nextPageToken") if not page_token: break count += 1 return all_studies def fetch_all_terms(terms: List[str], statuses: List[str], page_size=100, max_pages=5) -> List[Dict[str, Any]]: dedup: Dict[str, Dict[str, Any]] = {} for t in terms: try: for s in ctgov_search_one(t, statuses, page_size=page_size, max_pages=max_pages): ident = (s.get("protocolSection", {}) or {}).get("identificationModule", {}) or {} nct = ident.get("nctId") key = nct or id(s) if key not in dedup: dedup[key] = s except requests.HTTPError: continue return list(dedup.values()) def mentions(txt: str, term: str) -> bool: return bool(re.search(rf"\b{re.escape(term)}\b", txt or "", re.I)) def as_text(obj: Any) -> str: if obj is None: return "" if isinstance(obj, dict): for k in ("textblock", "textBlock", "value"): if k in obj: return str(obj.get(k) or "") return " ".join(str(v) for v in obj.values() if v is not None) if isinstance(obj, list): return "; ".join(as_text(x) for x in obj) return str(obj) def parse_age_to_int(v: Any): if v is None: return None if isinstance(v, dict): return parse_age_to_int(v.get("value")) if isinstance(v, (int, float)): return int(v) m = re.search(r"(\d+)", str(v)) return int(m.group(1)) if m else None def ensure_list(v: Any): if v is None: return [] if isinstance(v, list): return v return [v] def score_trial(t: Dict[str, Any], intake: Dict[str, Any]) -> Tuple[int, List[str]]: age_local = (intake or {}).get("age") kps_local = (intake or {}).get("kps") prior_bev_local = bool((intake or {}).get("prior_bev", False)) setting_local = (intake or {}).get("setting") or "" keywords_local = (intake or {}).get("keywords") or "" diagnosis_local = (intake or {}).get("diagnosis") or "" if diagnosis_local in DEFAULT_DIAG_TERMS: diag_terms = DEFAULT_DIAG_TERMS[diagnosis_local] elif diagnosis_local and diagnosis_local != "Other": diag_terms = [diagnosis_local] else: diag_terms = ["brain tumor", "CNS tumor", "spinal cord tumor"] ps = (t or {}).get("protocolSection") or {} elig = ps.get("eligibilityModule") crit = "" min_age = None max_age = None if isinstance(elig, dict): crit_raw = elig.get("eligibilityCriteria") or elig.get("criteria") or elig crit = as_text(crit_raw) min_age = parse_age_to_int(elig.get("minimumAge")) max_age = parse_age_to_int(elig.get("maximumAge")) elif isinstance(elig, str): crit = as_text(elig) phases_list = ensure_list(ps.get("designModule", {}).get("phases")) phases_up = [str(p).upper() for p in phases_list] conds_list = ensure_list(ps.get("conditionsModule", {}).get("conditions")) title = (ps.get("identificationModule", {}) or {}).get("briefTitle", "") s = 0 reasons: List[str] = [] if any(any(mentions(c, term) for term in diag_terms) for c in conds_list) or any(mentions(title, term) for term in diag_terms): s += 30 reasons.append(f"Matches diagnosis: {diagnosis_local or 'neuro-oncology'}.") if any("PHASE 2" in p or "PHASE2" in p for p in phases_up): s += 8 if any("PHASE 3" in p or "PHASE3" in p for p in phases_up): s += 12 try: if min_age is not None and age_local is not None and age_local < min_age: reasons.append(f"Age below minimum ({min_age}).") s -= 30 if max_age is not None and age_local is not None and age_local > max_age: reasons.append(f"Age above maximum ({max_age}).") s -= 30 except Exception: pass if mentions(crit, "ECOG 0-1") and (kps_local is None or kps_local < 80): s -= 15 reasons.append("Requires ECOG 0–1 (KPS ~≥80).") if mentions(crit, "Karnofsky") and (kps_local is None or kps_local < 70): s -= 10 reasons.append("Requires KPS ≥70.") if prior_bev_local and mentions(crit, "no prior bevacizumab"): s -= 25 reasons.append("Excludes prior bevacizumab.") if setting_local == "Recurrent" and mentions(crit, "recurrent"): s += 8 if setting_local == "Newly diagnosed" and (mentions(crit, "newly diagnosed") or mentions(title, "adjuvant")): s += 8 for kw in [k.strip() for k in (keywords_local or "").split(",") if k.strip()]: if mentions(title, kw) or mentions(crit, kw): s += 3 return max(0, min(100, s)), reasons # python def extract_row(study: dict) -> dict: """Return a flat row dict for the table/PDF. Safe against missing fields.""" ps = (study.get("protocolSection") or {}) idm = (ps.get("identificationModule") or {}) scm = (ps.get("statusModule") or {}) dsm = (ps.get("designModule") or {}) cdnm = (ps.get("conditionsModule") or {}) slm = (ps.get("sponsorCollaboratorsModule") or {}) clm = (ps.get("contactsLocationsModule") or {}) title = (idm.get("officialTitle") or idm.get("briefTitle") or "").strip() nct = (idm.get("nctId") or "").strip() status_raw = (scm.get("overallStatus") or "").strip() # e.g., RECRUITING -> Recruiting status = status_raw.replace("_", " ").title() if status_raw else "" phases_list = ensure_list(dsm.get("phases")) phases = ", ".join(phases_list) conditions = ", ".join(ensure_list(cdnm.get("conditions"))) sponsor = "" lead = slm.get("leadSponsor") or {} if isinstance(lead, dict): sponsor = (lead.get("name") or "").strip() city_country = "" locs = ensure_list(clm.get("locations")) if locs: first = locs[0] city = (first.get("locationCity") or "").strip() country = (first.get("locationCountry") or "").strip() parts = [p for p in [city, country] if p] city_country = ", ".join(parts) return { "title": title, "nct": nct, "status": status, "phases": phases, "conditions": conditions, "sponsor": sponsor, "city_country": city_country, }