# python # GUI_CLinicalTrial.py — run with: streamlit run GUI_CLinicalTrial.py import re import requests import streamlit as st st.set_page_config(page_title="Brain Trials Finder", layout="wide") STATUSES = ["RECRUITING", "NOT_YET_RECRUITING"] DEFAULT_DIAG_TERMS = { "Glioblastoma": ["glioblastoma", "GBM", "grade 4 astrocytoma"], "Diffuse midline glioma": ["diffuse midline glioma", "DMG"], "Anaplastic astrocytoma": ["anaplastic astrocytoma", "grade 3 astrocytoma"], } def ensure_list(v): if isinstance(v, list): return v if v is None: return [] return [v] def mentions(text: str, needle: str) -> bool: if not text: return False return needle.lower() in text.lower() def _to_int(v): try: if v is None: return None if isinstance(v, (int, float)): return int(v) # Extract first integer from strings like "18 Years" m = re.search(r"(\d+)", str(v)) return int(m.group(1)) if m else None except Exception: return None def build_terms(diagnosis: str, keywords: str): base = DEFAULT_DIAG_TERMS.get(diagnosis, []) extra = [k.strip() for k in (keywords or "").split(",") if k.strip()] terms = list(dict.fromkeys([*base, *extra])) # de-duplicate preserve order return terms or ["brain tumor"] def build_expr(diagnosis: str, keywords: str) -> str: terms = build_terms(diagnosis, keywords) # Simple OR query; v2 tokenizes internally return " OR ".join(f'"{t}"' if " " in t else t for t in terms) @st.cache_data(ttl=3600) def ctgov_search(expr: str, statuses, page_size: int = 100, max_pages: int = 5): """Return a list of study dicts from ClinicalTrials.gov v2.""" url = "https://clinicaltrials.gov/api/v2/studies" all_studies = [] token = None for _ in range(max_pages): params = { "query.term": expr, "pageSize": page_size, "filter.overallStatus": ",".join(statuses), } if token: params["pageToken"] = token r = requests.get(url, params=params, timeout=30) r.raise_for_status() data = r.json() or {} studies = data.get("studies") or [] all_studies.extend(studies) token = data.get("nextPageToken") if not token: break return all_studies def extract_row(study: dict) -> dict: ps = (study.get("protocolSection") or {}) idm = (ps.get("identificationModule") or {}) scm = (ps.get("statusModule") or {}) dsm = (ps.get("designModule") or {}) cdnm = (ps.get("conditionsModule") or {}) slm = (ps.get("sponsorCollaboratorsModule") or {}) title = (idm.get("officialTitle") or idm.get("briefTitle") or "").strip() nct = (idm.get("nctId") or "").strip() status_raw = (scm.get("overallStatus") or "").strip() status = status_raw.replace("_", " ").title() if status_raw else "" phases_list = ensure_list(dsm.get("phases")) # Pretty print phases like "PHASE2" -> "Phase 2" def fmt_phase(p: str) -> str: p = str(p or "").upper() if p.startswith("PHASE"): pnum = p.replace("PHASE", "").replace("_", "/").strip() pnum = pnum.replace("1/2", "1/2").replace("2/3", "2/3") return f"Phase {pnum}" if pnum else "Phase" return p.title() if p else "" phases = ", ".join([fmt_phase(p) for p in phases_list if p]) conditions = ", ".join(ensure_list(cdnm.get("conditions"))) sponsor = "" lead = slm.get("leadSponsor") or {} if isinstance(lead, dict): sponsor = (lead.get("name") or "").strip() return { "title": title, "nct": nct, "status": status, "phases": phases, "conditions": conditions, "sponsor": sponsor, } def score_trial(study: dict, intake: dict): ps = (study.get("protocolSection") or {}) scm = (ps.get("statusModule") or {}) dsm = (ps.get("designModule") or {}) elm = (ps.get("eligibilityModule") or {}) idm = (ps.get("identificationModule") or {}) s = 0 reasons = [] status = (scm.get("overallStatus") or "") if status == "RECRUITING": s += 15 elif status == "NOT_YET_RECRUITING": s += 8 phases = ensure_list(dsm.get("phases")) if any("PHASE3" in str(p).upper() for p in phases): s += 12 if any("PHASE2" in str(p).upper() for p in phases): s += 8 # Age checks min_age_raw = elm.get("minimumAge") max_age_raw = elm.get("maximumAge") min_age = _to_int(min_age_raw) max_age = _to_int(max_age_raw) age = int(intake.get("age") or 0) if min_age is not None and age < min_age: reasons.append(f"Age below minimum ({min_age_raw}).") s -= 30 if max_age is not None and age > max_age: reasons.append(f"Age above maximum ({max_age_raw}).") s -= 30 # KPS heuristic from criteria text crit = elm.get("eligibilityCriteria") or "" kps = int(intake.get("kps") or 0) if mentions(crit, "Karnofsky") and kps < 70: s -= 10 reasons.append("Requires KPS ≥70.") # Keyword bonus title = (idm.get("briefTitle") or idm.get("officialTitle") or "") summary = (ps.get("descriptionModule", {}) or {}).get("briefSummary") or "" keywords = [k.strip() for k in (intake.get("keywords") or "").split(",") if k.strip()] blob = " ".join([title, summary]) for kw in keywords: if mentions(blob, kw): s += 2 return s, reasons # UI st.title("Brain Cancer Trials Finder (MVP)") with st.sidebar: diagnosis = st.selectbox( "Diagnosis", ["Glioblastoma", "Diffuse midline glioma", "Anaplastic astrocytoma", "Other"], index=0, ) setting = st.selectbox("Setting", ["Newly diagnosed", "Recurrent"], index=1) age = st.number_input("Age", min_value=1, max_value=100, value=55) kps = st.slider("Karnofsky (KPS)", min_value=40, max_value=100, step=10, value=80) prior_bev = st.checkbox("Prior bevacizumab", value=False) keywords = st.text_input("Keywords (comma-separated)", value="immunotherapy,vaccine,device") do_search = st.button("Search", type="primary") # Trigger search on first load too if do_search or "did_first" not in st.session_state: st.session_state["did_first"] = True expr = build_expr(diagnosis, keywords) studies = ctgov_search(expr, STATUSES, page_size=100, max_pages=5) intake = { "age": age, "kps": kps, "prior_bev": prior_bev, "setting": setting, "keywords": keywords, "diagnosis": diagnosis, } rows = [] for sdict in studies: try: sc, reasons = score_trial(sdict, intake) row = extract_row(sdict) nct = row.get("nct") or "" url = f"https://clinicaltrials.gov/study/{nct}" if nct else "" rows.append( ( sc, row.get("title", ""), nct, row.get("status", ""), row.get("phases", ""), row.get("conditions", ""), row.get("sponsor", ""), reasons, url, sdict, ) ) except Exception: continue rows = sorted(rows, key=lambda x: -x[0])[:50] st.caption(f"Found {len(studies)} studies; showing top {len(rows)} by score.") for sc, title, nct, status, phases, conds, sponsor, reasons, url, study in rows: with st.container(border=True): if url: st.markdown(f"**[{title}]({url})**") else: st.markdown(f"**{title}**") meta = f"NCT: {nct or '—'} · Sponsor: {sponsor or '—'} · Status: {status or '—'} · Phases: {phases or '—'} · Score: {sc}" st.write(meta) if conds: st.write(f"Conditions: {conds}") with st.expander("Contacts and Locations"): ps = (study.get("protocolSection") or {}) clm = (ps.get("contactsLocationsModule") or {}) centrals = ensure_list(clm.get("centralContacts")) if centrals: st.write("Central Contacts:") for c in centrals: parts = [c.get("name"), c.get("role"), c.get("phone"), c.get("email")] st.write(" - " + " | ".join([p for p in parts if p])) officials = ensure_list(clm.get("overallOfficials")) if officials: st.write("Overall Officials:") for o in officials: parts = [o.get("name"), o.get("role"), o.get("affiliation")] st.write(" - " + " | ".join([p for p in parts if p])) locs = ensure_list(clm.get("locations")) if locs: st.write("Locations:") for L in locs: facility = (L.get("locationFacility") or "").strip() city = (L.get("locationCity") or "").strip() state = (L.get("locationState") or "").strip() country = (L.get("locationCountry") or "").strip() status_l = (L.get("status") or "").strip() site_line = ", ".join([p for p in [facility, city, state, country] if p]) if site_line: st.write(f" - {site_line}" + (f" (status: {status_l})" if status_l else "")) lcontacts = ensure_list(L.get("contacts")) or ensure_list(L.get("locationContacts")) for lc in lcontacts: parts = [lc.get("name"), lc.get("role"), lc.get("phone"), lc.get("email")] parts = [p for p in parts if p] if parts: st.write(" • " + " | ".join(parts)) if reasons: with st.expander("Why this score?"): for r in reasons: st.write(f"- {r}")