|
|
|
|
|
|
|
|
import re
|
|
|
import requests
|
|
|
import streamlit as st
|
|
|
|
|
|
st.set_page_config(page_title="Brain Trials Finder", layout="wide")
|
|
|
|
|
|
STATUSES = ["RECRUITING", "NOT_YET_RECRUITING"]
|
|
|
|
|
|
DEFAULT_DIAG_TERMS = {
|
|
|
"Glioblastoma": ["glioblastoma", "GBM", "grade 4 astrocytoma"],
|
|
|
"Diffuse midline glioma": ["diffuse midline glioma", "DMG"],
|
|
|
"Anaplastic astrocytoma": ["anaplastic astrocytoma", "grade 3 astrocytoma"],
|
|
|
}
|
|
|
|
|
|
|
|
|
def ensure_list(v):
|
|
|
if isinstance(v, list):
|
|
|
return v
|
|
|
if v is None:
|
|
|
return []
|
|
|
return [v]
|
|
|
|
|
|
|
|
|
def mentions(text: str, needle: str) -> bool:
|
|
|
if not text:
|
|
|
return False
|
|
|
return needle.lower() in text.lower()
|
|
|
|
|
|
|
|
|
def _to_int(v):
|
|
|
try:
|
|
|
if v is None:
|
|
|
return None
|
|
|
if isinstance(v, (int, float)):
|
|
|
return int(v)
|
|
|
|
|
|
m = re.search(r"(\d+)", str(v))
|
|
|
return int(m.group(1)) if m else None
|
|
|
except Exception:
|
|
|
return None
|
|
|
|
|
|
|
|
|
def build_terms(diagnosis: str, keywords: str):
|
|
|
base = DEFAULT_DIAG_TERMS.get(diagnosis, [])
|
|
|
extra = [k.strip() for k in (keywords or "").split(",") if k.strip()]
|
|
|
terms = list(dict.fromkeys([*base, *extra]))
|
|
|
return terms or ["brain tumor"]
|
|
|
|
|
|
|
|
|
def build_expr(diagnosis: str, keywords: str) -> str:
|
|
|
terms = build_terms(diagnosis, keywords)
|
|
|
|
|
|
return " OR ".join(f'"{t}"' if " " in t else t for t in terms)
|
|
|
|
|
|
|
|
|
@st.cache_data(ttl=3600)
|
|
|
def ctgov_search(expr: str, statuses, page_size: int = 100, max_pages: int = 5):
|
|
|
"""Return a list of study dicts from ClinicalTrials.gov v2."""
|
|
|
url = "https://clinicaltrials.gov/api/v2/studies"
|
|
|
all_studies = []
|
|
|
token = None
|
|
|
for _ in range(max_pages):
|
|
|
params = {
|
|
|
"query.term": expr,
|
|
|
"pageSize": page_size,
|
|
|
"filter.overallStatus": ",".join(statuses),
|
|
|
}
|
|
|
if token:
|
|
|
params["pageToken"] = token
|
|
|
r = requests.get(url, params=params, timeout=30)
|
|
|
r.raise_for_status()
|
|
|
data = r.json() or {}
|
|
|
studies = data.get("studies") or []
|
|
|
all_studies.extend(studies)
|
|
|
token = data.get("nextPageToken")
|
|
|
if not token:
|
|
|
break
|
|
|
return all_studies
|
|
|
|
|
|
|
|
|
def extract_row(study: dict) -> dict:
|
|
|
ps = (study.get("protocolSection") or {})
|
|
|
idm = (ps.get("identificationModule") or {})
|
|
|
scm = (ps.get("statusModule") or {})
|
|
|
dsm = (ps.get("designModule") or {})
|
|
|
cdnm = (ps.get("conditionsModule") or {})
|
|
|
slm = (ps.get("sponsorCollaboratorsModule") or {})
|
|
|
|
|
|
title = (idm.get("officialTitle") or idm.get("briefTitle") or "").strip()
|
|
|
nct = (idm.get("nctId") or "").strip()
|
|
|
|
|
|
status_raw = (scm.get("overallStatus") or "").strip()
|
|
|
status = status_raw.replace("_", " ").title() if status_raw else ""
|
|
|
|
|
|
phases_list = ensure_list(dsm.get("phases"))
|
|
|
|
|
|
def fmt_phase(p: str) -> str:
|
|
|
p = str(p or "").upper()
|
|
|
if p.startswith("PHASE"):
|
|
|
pnum = p.replace("PHASE", "").replace("_", "/").strip()
|
|
|
pnum = pnum.replace("1/2", "1/2").replace("2/3", "2/3")
|
|
|
return f"Phase {pnum}" if pnum else "Phase"
|
|
|
return p.title() if p else ""
|
|
|
phases = ", ".join([fmt_phase(p) for p in phases_list if p])
|
|
|
|
|
|
conditions = ", ".join(ensure_list(cdnm.get("conditions")))
|
|
|
|
|
|
sponsor = ""
|
|
|
lead = slm.get("leadSponsor") or {}
|
|
|
if isinstance(lead, dict):
|
|
|
sponsor = (lead.get("name") or "").strip()
|
|
|
|
|
|
return {
|
|
|
"title": title,
|
|
|
"nct": nct,
|
|
|
"status": status,
|
|
|
"phases": phases,
|
|
|
"conditions": conditions,
|
|
|
"sponsor": sponsor,
|
|
|
}
|
|
|
|
|
|
|
|
|
def score_trial(study: dict, intake: dict):
|
|
|
ps = (study.get("protocolSection") or {})
|
|
|
scm = (ps.get("statusModule") or {})
|
|
|
dsm = (ps.get("designModule") or {})
|
|
|
elm = (ps.get("eligibilityModule") or {})
|
|
|
idm = (ps.get("identificationModule") or {})
|
|
|
|
|
|
s = 0
|
|
|
reasons = []
|
|
|
|
|
|
status = (scm.get("overallStatus") or "")
|
|
|
if status == "RECRUITING":
|
|
|
s += 15
|
|
|
elif status == "NOT_YET_RECRUITING":
|
|
|
s += 8
|
|
|
|
|
|
phases = ensure_list(dsm.get("phases"))
|
|
|
if any("PHASE3" in str(p).upper() for p in phases):
|
|
|
s += 12
|
|
|
if any("PHASE2" in str(p).upper() for p in phases):
|
|
|
s += 8
|
|
|
|
|
|
|
|
|
min_age_raw = elm.get("minimumAge")
|
|
|
max_age_raw = elm.get("maximumAge")
|
|
|
min_age = _to_int(min_age_raw)
|
|
|
max_age = _to_int(max_age_raw)
|
|
|
age = int(intake.get("age") or 0)
|
|
|
if min_age is not None and age < min_age:
|
|
|
reasons.append(f"Age below minimum ({min_age_raw}).")
|
|
|
s -= 30
|
|
|
if max_age is not None and age > max_age:
|
|
|
reasons.append(f"Age above maximum ({max_age_raw}).")
|
|
|
s -= 30
|
|
|
|
|
|
|
|
|
crit = elm.get("eligibilityCriteria") or ""
|
|
|
kps = int(intake.get("kps") or 0)
|
|
|
if mentions(crit, "Karnofsky") and kps < 70:
|
|
|
s -= 10
|
|
|
reasons.append("Requires KPS ≥70.")
|
|
|
|
|
|
|
|
|
title = (idm.get("briefTitle") or idm.get("officialTitle") or "")
|
|
|
summary = (ps.get("descriptionModule", {}) or {}).get("briefSummary") or ""
|
|
|
keywords = [k.strip() for k in (intake.get("keywords") or "").split(",") if k.strip()]
|
|
|
blob = " ".join([title, summary])
|
|
|
for kw in keywords:
|
|
|
if mentions(blob, kw):
|
|
|
s += 2
|
|
|
|
|
|
return s, reasons
|
|
|
|
|
|
|
|
|
|
|
|
st.title("Brain Cancer Trials Finder (MVP)")
|
|
|
|
|
|
with st.sidebar:
|
|
|
diagnosis = st.selectbox(
|
|
|
"Diagnosis",
|
|
|
["Glioblastoma", "Diffuse midline glioma", "Anaplastic astrocytoma", "Other"],
|
|
|
index=0,
|
|
|
)
|
|
|
setting = st.selectbox("Setting", ["Newly diagnosed", "Recurrent"], index=1)
|
|
|
age = st.number_input("Age", min_value=1, max_value=100, value=55)
|
|
|
kps = st.slider("Karnofsky (KPS)", min_value=40, max_value=100, step=10, value=80)
|
|
|
prior_bev = st.checkbox("Prior bevacizumab", value=False)
|
|
|
keywords = st.text_input("Keywords (comma-separated)", value="immunotherapy,vaccine,device")
|
|
|
do_search = st.button("Search", type="primary")
|
|
|
|
|
|
|
|
|
if do_search or "did_first" not in st.session_state:
|
|
|
st.session_state["did_first"] = True
|
|
|
expr = build_expr(diagnosis, keywords)
|
|
|
studies = ctgov_search(expr, STATUSES, page_size=100, max_pages=5)
|
|
|
|
|
|
intake = {
|
|
|
"age": age,
|
|
|
"kps": kps,
|
|
|
"prior_bev": prior_bev,
|
|
|
"setting": setting,
|
|
|
"keywords": keywords,
|
|
|
"diagnosis": diagnosis,
|
|
|
}
|
|
|
|
|
|
rows = []
|
|
|
for sdict in studies:
|
|
|
try:
|
|
|
sc, reasons = score_trial(sdict, intake)
|
|
|
row = extract_row(sdict)
|
|
|
nct = row.get("nct") or ""
|
|
|
url = f"https://clinicaltrials.gov/study/{nct}" if nct else ""
|
|
|
rows.append(
|
|
|
(
|
|
|
sc,
|
|
|
row.get("title", ""),
|
|
|
nct,
|
|
|
row.get("status", ""),
|
|
|
row.get("phases", ""),
|
|
|
row.get("conditions", ""),
|
|
|
row.get("sponsor", ""),
|
|
|
reasons,
|
|
|
url,
|
|
|
sdict,
|
|
|
)
|
|
|
)
|
|
|
except Exception:
|
|
|
continue
|
|
|
|
|
|
rows = sorted(rows, key=lambda x: -x[0])[:50]
|
|
|
|
|
|
st.caption(f"Found {len(studies)} studies; showing top {len(rows)} by score.")
|
|
|
|
|
|
for sc, title, nct, status, phases, conds, sponsor, reasons, url, study in rows:
|
|
|
with st.container(border=True):
|
|
|
if url:
|
|
|
st.markdown(f"**[{title}]({url})**")
|
|
|
else:
|
|
|
st.markdown(f"**{title}**")
|
|
|
meta = f"NCT: {nct or '—'} · Sponsor: {sponsor or '—'} · Status: {status or '—'} · Phases: {phases or '—'} · Score: {sc}"
|
|
|
st.write(meta)
|
|
|
if conds:
|
|
|
st.write(f"Conditions: {conds}")
|
|
|
|
|
|
with st.expander("Contacts and Locations"):
|
|
|
ps = (study.get("protocolSection") or {})
|
|
|
clm = (ps.get("contactsLocationsModule") or {})
|
|
|
|
|
|
centrals = ensure_list(clm.get("centralContacts"))
|
|
|
if centrals:
|
|
|
st.write("Central Contacts:")
|
|
|
for c in centrals:
|
|
|
parts = [c.get("name"), c.get("role"), c.get("phone"), c.get("email")]
|
|
|
st.write(" - " + " | ".join([p for p in parts if p]))
|
|
|
|
|
|
officials = ensure_list(clm.get("overallOfficials"))
|
|
|
if officials:
|
|
|
st.write("Overall Officials:")
|
|
|
for o in officials:
|
|
|
parts = [o.get("name"), o.get("role"), o.get("affiliation")]
|
|
|
st.write(" - " + " | ".join([p for p in parts if p]))
|
|
|
|
|
|
locs = ensure_list(clm.get("locations"))
|
|
|
if locs:
|
|
|
st.write("Locations:")
|
|
|
for L in locs:
|
|
|
facility = (L.get("locationFacility") or "").strip()
|
|
|
city = (L.get("locationCity") or "").strip()
|
|
|
state = (L.get("locationState") or "").strip()
|
|
|
country = (L.get("locationCountry") or "").strip()
|
|
|
status_l = (L.get("status") or "").strip()
|
|
|
site_line = ", ".join([p for p in [facility, city, state, country] if p])
|
|
|
if site_line:
|
|
|
st.write(f" - {site_line}" + (f" (status: {status_l})" if status_l else ""))
|
|
|
lcontacts = ensure_list(L.get("contacts")) or ensure_list(L.get("locationContacts"))
|
|
|
for lc in lcontacts:
|
|
|
parts = [lc.get("name"), lc.get("role"), lc.get("phone"), lc.get("email")]
|
|
|
parts = [p for p in parts if p]
|
|
|
if parts:
|
|
|
st.write(" • " + " | ".join(parts))
|
|
|
|
|
|
if reasons:
|
|
|
with st.expander("Why this score?"):
|
|
|
for r in reasons:
|
|
|
st.write(f"- {r}")
|
|
|
|