Upload 12 files
Browse files- .gitattributes +2 -0
- .gitignore +21 -0
- Brain Cancer Trial Finder.exe +3 -0
- GUI_CLinicalTrial.py +289 -0
- GUI_CLinicalTrial.spec +44 -0
- Neuro_onc_clinicalTrial.py +121 -0
- README.md +69 -3
- ctgov_client.py +225 -0
- desktop_app.py +607 -0
- logo_precise.png +3 -0
- runtime.txt +1 -0
- streamlit_app.py +12 -0
- uk_sources.py +80 -0
.gitattributes
CHANGED
|
@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
|
|
|
|
|
| 33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
| 34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
| 35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
| 36 |
+
Brain[[:space:]]Cancer[[:space:]]Trial[[:space:]]Finder.exe filter=lfs diff=lfs merge=lfs -text
|
| 37 |
+
logo_precise.png filter=lfs diff=lfs merge=lfs -text
|
.gitignore
ADDED
|
@@ -0,0 +1,21 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Python
|
| 2 |
+
__pycache__/
|
| 3 |
+
*.py[cod]
|
| 4 |
+
*.pyo
|
| 5 |
+
.venv/
|
| 6 |
+
.env
|
| 7 |
+
|
| 8 |
+
# Data, models, caches
|
| 9 |
+
models_*/
|
| 10 |
+
models*/
|
| 11 |
+
cache_dir/
|
| 12 |
+
*.joblib
|
| 13 |
+
*.pkl
|
| 14 |
+
|
| 15 |
+
# Logs
|
| 16 |
+
*.log
|
| 17 |
+
|
| 18 |
+
# IDE
|
| 19 |
+
.vscode/
|
| 20 |
+
.idea/
|
| 21 |
+
|
Brain Cancer Trial Finder.exe
ADDED
|
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
version https://git-lfs.github.com/spec/v1
|
| 2 |
+
oid sha256:977b04e5ded6299f555c6490cc29a88559dd6a1bdfaab2df2e405aa37a0219da
|
| 3 |
+
size 4759201
|
GUI_CLinicalTrial.py
ADDED
|
@@ -0,0 +1,289 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# python
|
| 2 |
+
# GUI_CLinicalTrial.py — run with: streamlit run GUI_CLinicalTrial.py
|
| 3 |
+
import re
|
| 4 |
+
import requests
|
| 5 |
+
import streamlit as st
|
| 6 |
+
|
| 7 |
+
st.set_page_config(page_title="Brain Trials Finder", layout="wide")
|
| 8 |
+
|
| 9 |
+
STATUSES = ["RECRUITING", "NOT_YET_RECRUITING"]
|
| 10 |
+
|
| 11 |
+
DEFAULT_DIAG_TERMS = {
|
| 12 |
+
"Glioblastoma": ["glioblastoma", "GBM", "grade 4 astrocytoma"],
|
| 13 |
+
"Diffuse midline glioma": ["diffuse midline glioma", "DMG"],
|
| 14 |
+
"Anaplastic astrocytoma": ["anaplastic astrocytoma", "grade 3 astrocytoma"],
|
| 15 |
+
}
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
def ensure_list(v):
|
| 19 |
+
if isinstance(v, list):
|
| 20 |
+
return v
|
| 21 |
+
if v is None:
|
| 22 |
+
return []
|
| 23 |
+
return [v]
|
| 24 |
+
|
| 25 |
+
|
| 26 |
+
def mentions(text: str, needle: str) -> bool:
|
| 27 |
+
if not text:
|
| 28 |
+
return False
|
| 29 |
+
return needle.lower() in text.lower()
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
def _to_int(v):
|
| 33 |
+
try:
|
| 34 |
+
if v is None:
|
| 35 |
+
return None
|
| 36 |
+
if isinstance(v, (int, float)):
|
| 37 |
+
return int(v)
|
| 38 |
+
# Extract first integer from strings like "18 Years"
|
| 39 |
+
m = re.search(r"(\d+)", str(v))
|
| 40 |
+
return int(m.group(1)) if m else None
|
| 41 |
+
except Exception:
|
| 42 |
+
return None
|
| 43 |
+
|
| 44 |
+
|
| 45 |
+
def build_terms(diagnosis: str, keywords: str):
|
| 46 |
+
base = DEFAULT_DIAG_TERMS.get(diagnosis, [])
|
| 47 |
+
extra = [k.strip() for k in (keywords or "").split(",") if k.strip()]
|
| 48 |
+
terms = list(dict.fromkeys([*base, *extra])) # de-duplicate preserve order
|
| 49 |
+
return terms or ["brain tumor"]
|
| 50 |
+
|
| 51 |
+
|
| 52 |
+
def build_expr(diagnosis: str, keywords: str) -> str:
|
| 53 |
+
terms = build_terms(diagnosis, keywords)
|
| 54 |
+
# Simple OR query; v2 tokenizes internally
|
| 55 |
+
return " OR ".join(f'"{t}"' if " " in t else t for t in terms)
|
| 56 |
+
|
| 57 |
+
|
| 58 |
+
@st.cache_data(ttl=3600)
|
| 59 |
+
def ctgov_search(expr: str, statuses, page_size: int = 100, max_pages: int = 5):
|
| 60 |
+
"""Return a list of study dicts from ClinicalTrials.gov v2."""
|
| 61 |
+
url = "https://clinicaltrials.gov/api/v2/studies"
|
| 62 |
+
all_studies = []
|
| 63 |
+
token = None
|
| 64 |
+
for _ in range(max_pages):
|
| 65 |
+
params = {
|
| 66 |
+
"query.term": expr,
|
| 67 |
+
"pageSize": page_size,
|
| 68 |
+
"filter.overallStatus": ",".join(statuses),
|
| 69 |
+
}
|
| 70 |
+
if token:
|
| 71 |
+
params["pageToken"] = token
|
| 72 |
+
r = requests.get(url, params=params, timeout=30)
|
| 73 |
+
r.raise_for_status()
|
| 74 |
+
data = r.json() or {}
|
| 75 |
+
studies = data.get("studies") or []
|
| 76 |
+
all_studies.extend(studies)
|
| 77 |
+
token = data.get("nextPageToken")
|
| 78 |
+
if not token:
|
| 79 |
+
break
|
| 80 |
+
return all_studies
|
| 81 |
+
|
| 82 |
+
|
| 83 |
+
def extract_row(study: dict) -> dict:
|
| 84 |
+
ps = (study.get("protocolSection") or {})
|
| 85 |
+
idm = (ps.get("identificationModule") or {})
|
| 86 |
+
scm = (ps.get("statusModule") or {})
|
| 87 |
+
dsm = (ps.get("designModule") or {})
|
| 88 |
+
cdnm = (ps.get("conditionsModule") or {})
|
| 89 |
+
slm = (ps.get("sponsorCollaboratorsModule") or {})
|
| 90 |
+
|
| 91 |
+
title = (idm.get("officialTitle") or idm.get("briefTitle") or "").strip()
|
| 92 |
+
nct = (idm.get("nctId") or "").strip()
|
| 93 |
+
|
| 94 |
+
status_raw = (scm.get("overallStatus") or "").strip()
|
| 95 |
+
status = status_raw.replace("_", " ").title() if status_raw else ""
|
| 96 |
+
|
| 97 |
+
phases_list = ensure_list(dsm.get("phases"))
|
| 98 |
+
# Pretty print phases like "PHASE2" -> "Phase 2"
|
| 99 |
+
def fmt_phase(p: str) -> str:
|
| 100 |
+
p = str(p or "").upper()
|
| 101 |
+
if p.startswith("PHASE"):
|
| 102 |
+
pnum = p.replace("PHASE", "").replace("_", "/").strip()
|
| 103 |
+
pnum = pnum.replace("1/2", "1/2").replace("2/3", "2/3")
|
| 104 |
+
return f"Phase {pnum}" if pnum else "Phase"
|
| 105 |
+
return p.title() if p else ""
|
| 106 |
+
phases = ", ".join([fmt_phase(p) for p in phases_list if p])
|
| 107 |
+
|
| 108 |
+
conditions = ", ".join(ensure_list(cdnm.get("conditions")))
|
| 109 |
+
|
| 110 |
+
sponsor = ""
|
| 111 |
+
lead = slm.get("leadSponsor") or {}
|
| 112 |
+
if isinstance(lead, dict):
|
| 113 |
+
sponsor = (lead.get("name") or "").strip()
|
| 114 |
+
|
| 115 |
+
return {
|
| 116 |
+
"title": title,
|
| 117 |
+
"nct": nct,
|
| 118 |
+
"status": status,
|
| 119 |
+
"phases": phases,
|
| 120 |
+
"conditions": conditions,
|
| 121 |
+
"sponsor": sponsor,
|
| 122 |
+
}
|
| 123 |
+
|
| 124 |
+
|
| 125 |
+
def score_trial(study: dict, intake: dict):
|
| 126 |
+
ps = (study.get("protocolSection") or {})
|
| 127 |
+
scm = (ps.get("statusModule") or {})
|
| 128 |
+
dsm = (ps.get("designModule") or {})
|
| 129 |
+
elm = (ps.get("eligibilityModule") or {})
|
| 130 |
+
idm = (ps.get("identificationModule") or {})
|
| 131 |
+
|
| 132 |
+
s = 0
|
| 133 |
+
reasons = []
|
| 134 |
+
|
| 135 |
+
status = (scm.get("overallStatus") or "")
|
| 136 |
+
if status == "RECRUITING":
|
| 137 |
+
s += 15
|
| 138 |
+
elif status == "NOT_YET_RECRUITING":
|
| 139 |
+
s += 8
|
| 140 |
+
|
| 141 |
+
phases = ensure_list(dsm.get("phases"))
|
| 142 |
+
if any("PHASE3" in str(p).upper() for p in phases):
|
| 143 |
+
s += 12
|
| 144 |
+
if any("PHASE2" in str(p).upper() for p in phases):
|
| 145 |
+
s += 8
|
| 146 |
+
|
| 147 |
+
# Age checks
|
| 148 |
+
min_age_raw = elm.get("minimumAge")
|
| 149 |
+
max_age_raw = elm.get("maximumAge")
|
| 150 |
+
min_age = _to_int(min_age_raw)
|
| 151 |
+
max_age = _to_int(max_age_raw)
|
| 152 |
+
age = int(intake.get("age") or 0)
|
| 153 |
+
if min_age is not None and age < min_age:
|
| 154 |
+
reasons.append(f"Age below minimum ({min_age_raw}).")
|
| 155 |
+
s -= 30
|
| 156 |
+
if max_age is not None and age > max_age:
|
| 157 |
+
reasons.append(f"Age above maximum ({max_age_raw}).")
|
| 158 |
+
s -= 30
|
| 159 |
+
|
| 160 |
+
# KPS heuristic from criteria text
|
| 161 |
+
crit = elm.get("eligibilityCriteria") or ""
|
| 162 |
+
kps = int(intake.get("kps") or 0)
|
| 163 |
+
if mentions(crit, "Karnofsky") and kps < 70:
|
| 164 |
+
s -= 10
|
| 165 |
+
reasons.append("Requires KPS ≥70.")
|
| 166 |
+
|
| 167 |
+
# Keyword bonus
|
| 168 |
+
title = (idm.get("briefTitle") or idm.get("officialTitle") or "")
|
| 169 |
+
summary = (ps.get("descriptionModule", {}) or {}).get("briefSummary") or ""
|
| 170 |
+
keywords = [k.strip() for k in (intake.get("keywords") or "").split(",") if k.strip()]
|
| 171 |
+
blob = " ".join([title, summary])
|
| 172 |
+
for kw in keywords:
|
| 173 |
+
if mentions(blob, kw):
|
| 174 |
+
s += 2
|
| 175 |
+
|
| 176 |
+
return s, reasons
|
| 177 |
+
|
| 178 |
+
|
| 179 |
+
# UI
|
| 180 |
+
st.title("Brain Cancer Trials Finder (MVP)")
|
| 181 |
+
|
| 182 |
+
with st.sidebar:
|
| 183 |
+
diagnosis = st.selectbox(
|
| 184 |
+
"Diagnosis",
|
| 185 |
+
["Glioblastoma", "Diffuse midline glioma", "Anaplastic astrocytoma", "Other"],
|
| 186 |
+
index=0,
|
| 187 |
+
)
|
| 188 |
+
setting = st.selectbox("Setting", ["Newly diagnosed", "Recurrent"], index=1)
|
| 189 |
+
age = st.number_input("Age", min_value=1, max_value=100, value=55)
|
| 190 |
+
kps = st.slider("Karnofsky (KPS)", min_value=40, max_value=100, step=10, value=80)
|
| 191 |
+
prior_bev = st.checkbox("Prior bevacizumab", value=False)
|
| 192 |
+
keywords = st.text_input("Keywords (comma-separated)", value="immunotherapy,vaccine,device")
|
| 193 |
+
do_search = st.button("Search", type="primary")
|
| 194 |
+
|
| 195 |
+
# Trigger search on first load too
|
| 196 |
+
if do_search or "did_first" not in st.session_state:
|
| 197 |
+
st.session_state["did_first"] = True
|
| 198 |
+
expr = build_expr(diagnosis, keywords)
|
| 199 |
+
studies = ctgov_search(expr, STATUSES, page_size=100, max_pages=5)
|
| 200 |
+
|
| 201 |
+
intake = {
|
| 202 |
+
"age": age,
|
| 203 |
+
"kps": kps,
|
| 204 |
+
"prior_bev": prior_bev,
|
| 205 |
+
"setting": setting,
|
| 206 |
+
"keywords": keywords,
|
| 207 |
+
"diagnosis": diagnosis,
|
| 208 |
+
}
|
| 209 |
+
|
| 210 |
+
rows = []
|
| 211 |
+
for sdict in studies:
|
| 212 |
+
try:
|
| 213 |
+
sc, reasons = score_trial(sdict, intake)
|
| 214 |
+
row = extract_row(sdict)
|
| 215 |
+
nct = row.get("nct") or ""
|
| 216 |
+
url = f"https://clinicaltrials.gov/study/{nct}" if nct else ""
|
| 217 |
+
rows.append(
|
| 218 |
+
(
|
| 219 |
+
sc,
|
| 220 |
+
row.get("title", ""),
|
| 221 |
+
nct,
|
| 222 |
+
row.get("status", ""),
|
| 223 |
+
row.get("phases", ""),
|
| 224 |
+
row.get("conditions", ""),
|
| 225 |
+
row.get("sponsor", ""),
|
| 226 |
+
reasons,
|
| 227 |
+
url,
|
| 228 |
+
sdict,
|
| 229 |
+
)
|
| 230 |
+
)
|
| 231 |
+
except Exception:
|
| 232 |
+
continue
|
| 233 |
+
|
| 234 |
+
rows = sorted(rows, key=lambda x: -x[0])[:50]
|
| 235 |
+
|
| 236 |
+
st.caption(f"Found {len(studies)} studies; showing top {len(rows)} by score.")
|
| 237 |
+
|
| 238 |
+
for sc, title, nct, status, phases, conds, sponsor, reasons, url, study in rows:
|
| 239 |
+
with st.container(border=True):
|
| 240 |
+
if url:
|
| 241 |
+
st.markdown(f"**[{title}]({url})**")
|
| 242 |
+
else:
|
| 243 |
+
st.markdown(f"**{title}**")
|
| 244 |
+
meta = f"NCT: {nct or '—'} · Sponsor: {sponsor or '—'} · Status: {status or '—'} · Phases: {phases or '—'} · Score: {sc}"
|
| 245 |
+
st.write(meta)
|
| 246 |
+
if conds:
|
| 247 |
+
st.write(f"Conditions: {conds}")
|
| 248 |
+
|
| 249 |
+
with st.expander("Contacts and Locations"):
|
| 250 |
+
ps = (study.get("protocolSection") or {})
|
| 251 |
+
clm = (ps.get("contactsLocationsModule") or {})
|
| 252 |
+
|
| 253 |
+
centrals = ensure_list(clm.get("centralContacts"))
|
| 254 |
+
if centrals:
|
| 255 |
+
st.write("Central Contacts:")
|
| 256 |
+
for c in centrals:
|
| 257 |
+
parts = [c.get("name"), c.get("role"), c.get("phone"), c.get("email")]
|
| 258 |
+
st.write(" - " + " | ".join([p for p in parts if p]))
|
| 259 |
+
|
| 260 |
+
officials = ensure_list(clm.get("overallOfficials"))
|
| 261 |
+
if officials:
|
| 262 |
+
st.write("Overall Officials:")
|
| 263 |
+
for o in officials:
|
| 264 |
+
parts = [o.get("name"), o.get("role"), o.get("affiliation")]
|
| 265 |
+
st.write(" - " + " | ".join([p for p in parts if p]))
|
| 266 |
+
|
| 267 |
+
locs = ensure_list(clm.get("locations"))
|
| 268 |
+
if locs:
|
| 269 |
+
st.write("Locations:")
|
| 270 |
+
for L in locs:
|
| 271 |
+
facility = (L.get("locationFacility") or "").strip()
|
| 272 |
+
city = (L.get("locationCity") or "").strip()
|
| 273 |
+
state = (L.get("locationState") or "").strip()
|
| 274 |
+
country = (L.get("locationCountry") or "").strip()
|
| 275 |
+
status_l = (L.get("status") or "").strip()
|
| 276 |
+
site_line = ", ".join([p for p in [facility, city, state, country] if p])
|
| 277 |
+
if site_line:
|
| 278 |
+
st.write(f" - {site_line}" + (f" (status: {status_l})" if status_l else ""))
|
| 279 |
+
lcontacts = ensure_list(L.get("contacts")) or ensure_list(L.get("locationContacts"))
|
| 280 |
+
for lc in lcontacts:
|
| 281 |
+
parts = [lc.get("name"), lc.get("role"), lc.get("phone"), lc.get("email")]
|
| 282 |
+
parts = [p for p in parts if p]
|
| 283 |
+
if parts:
|
| 284 |
+
st.write(" • " + " | ".join(parts))
|
| 285 |
+
|
| 286 |
+
if reasons:
|
| 287 |
+
with st.expander("Why this score?"):
|
| 288 |
+
for r in reasons:
|
| 289 |
+
st.write(f"- {r}")
|
GUI_CLinicalTrial.spec
ADDED
|
@@ -0,0 +1,44 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# -*- mode: python ; coding: utf-8 -*-
|
| 2 |
+
|
| 3 |
+
|
| 4 |
+
a = Analysis(
|
| 5 |
+
['GUI_CLinicalTrial.py'],
|
| 6 |
+
pathex=[],
|
| 7 |
+
binaries=[],
|
| 8 |
+
datas=[],
|
| 9 |
+
hiddenimports=[],
|
| 10 |
+
hookspath=[],
|
| 11 |
+
hooksconfig={},
|
| 12 |
+
runtime_hooks=[],
|
| 13 |
+
excludes=[],
|
| 14 |
+
noarchive=False,
|
| 15 |
+
optimize=0,
|
| 16 |
+
)
|
| 17 |
+
pyz = PYZ(a.pure)
|
| 18 |
+
|
| 19 |
+
exe = EXE(
|
| 20 |
+
pyz,
|
| 21 |
+
a.scripts,
|
| 22 |
+
[],
|
| 23 |
+
exclude_binaries=True,
|
| 24 |
+
name='GUI_CLinicalTrial',
|
| 25 |
+
debug=False,
|
| 26 |
+
bootloader_ignore_signals=False,
|
| 27 |
+
strip=False,
|
| 28 |
+
upx=True,
|
| 29 |
+
console=True,
|
| 30 |
+
disable_windowed_traceback=False,
|
| 31 |
+
argv_emulation=False,
|
| 32 |
+
target_arch=None,
|
| 33 |
+
codesign_identity=None,
|
| 34 |
+
entitlements_file=None,
|
| 35 |
+
)
|
| 36 |
+
coll = COLLECT(
|
| 37 |
+
exe,
|
| 38 |
+
a.binaries,
|
| 39 |
+
a.datas,
|
| 40 |
+
strip=False,
|
| 41 |
+
upx=True,
|
| 42 |
+
upx_exclude=[],
|
| 43 |
+
name='GUI_CLinicalTrial',
|
| 44 |
+
)
|
Neuro_onc_clinicalTrial.py
ADDED
|
@@ -0,0 +1,121 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
#!/usr/bin/env python3
|
| 2 |
+
import argparse
|
| 3 |
+
import csv
|
| 4 |
+
import json
|
| 5 |
+
from typing import List, Dict, Any
|
| 6 |
+
|
| 7 |
+
from ctgov_client import (
|
| 8 |
+
DEFAULT_DIAG_TERMS,
|
| 9 |
+
build_terms,
|
| 10 |
+
fetch_all_terms,
|
| 11 |
+
score_trial,
|
| 12 |
+
extract_row,
|
| 13 |
+
)
|
| 14 |
+
|
| 15 |
+
STATUSES = ["RECRUITING", "NOT_YET_RECRUITING"]
|
| 16 |
+
|
| 17 |
+
|
| 18 |
+
def save_results(rows: List[Dict[str, Any]], csv_path: str, json_path: str):
|
| 19 |
+
if not rows:
|
| 20 |
+
print("No studies found.")
|
| 21 |
+
return
|
| 22 |
+
# stable header order
|
| 23 |
+
keys = [
|
| 24 |
+
"score",
|
| 25 |
+
"title",
|
| 26 |
+
"nct",
|
| 27 |
+
"url",
|
| 28 |
+
"status",
|
| 29 |
+
"phases",
|
| 30 |
+
"conditions",
|
| 31 |
+
"site",
|
| 32 |
+
"reasons",
|
| 33 |
+
]
|
| 34 |
+
with open(csv_path, "w", newline="", encoding="utf-8") as f:
|
| 35 |
+
writer = csv.DictWriter(f, fieldnames=keys)
|
| 36 |
+
writer.writeheader()
|
| 37 |
+
for r in rows:
|
| 38 |
+
writer.writerow({k: r.get(k, "") for k in keys})
|
| 39 |
+
with open(json_path, "w", encoding="utf-8") as f:
|
| 40 |
+
json.dump(rows, f, indent=2, ensure_ascii=False)
|
| 41 |
+
print(f"Wrote {len(rows)} studies to {csv_path} and {json_path}")
|
| 42 |
+
|
| 43 |
+
|
| 44 |
+
def main():
|
| 45 |
+
parser = argparse.ArgumentParser(
|
| 46 |
+
description="Download actively recruiting neuro-oncology trials from ClinicalTrials.gov v2 API (robust client)"
|
| 47 |
+
)
|
| 48 |
+
parser.add_argument(
|
| 49 |
+
"--diagnosis",
|
| 50 |
+
default="Glioblastoma",
|
| 51 |
+
choices=list(DEFAULT_DIAG_TERMS.keys()) + ["Other"],
|
| 52 |
+
help="Primary diagnosis category to search for.",
|
| 53 |
+
)
|
| 54 |
+
parser.add_argument(
|
| 55 |
+
"--keywords",
|
| 56 |
+
default="",
|
| 57 |
+
help="Extra keywords (comma-separated) to refine search.",
|
| 58 |
+
)
|
| 59 |
+
parser.add_argument("--age", type=int, default=55, help="Patient age (years)")
|
| 60 |
+
parser.add_argument("--kps", type=int, default=80, help="Karnofsky Performance Status (40-100)")
|
| 61 |
+
parser.add_argument("--prior-bev", action="store_true", help="Indicate prior bevacizumab exposure")
|
| 62 |
+
parser.add_argument(
|
| 63 |
+
"--setting",
|
| 64 |
+
default="Recurrent",
|
| 65 |
+
choices=["Newly diagnosed", "Recurrent"],
|
| 66 |
+
help="Disease setting",
|
| 67 |
+
)
|
| 68 |
+
parser.add_argument("--country", default="", help="Filter: require location country containing this text (case-insensitive)")
|
| 69 |
+
parser.add_argument("--require-country", action="store_true", help="If set, require at least one site in the given country text")
|
| 70 |
+
parser.add_argument("--csv", default="neuro_onc_trials.csv", help="CSV output path")
|
| 71 |
+
parser.add_argument("--json", default="neuro_onc_trials.json", help="JSON output path")
|
| 72 |
+
parser.add_argument("--page-size", type=int, default=100, help="Results per page per term (max 1000)")
|
| 73 |
+
parser.add_argument("--pages", type=int, default=5, help="Max pages to fetch per term")
|
| 74 |
+
args = parser.parse_args()
|
| 75 |
+
|
| 76 |
+
terms = build_terms(args.diagnosis, args.keywords)
|
| 77 |
+
print("Searching ClinicalTrials.gov for:")
|
| 78 |
+
print(" Diagnosis:", args.diagnosis)
|
| 79 |
+
if args.keywords:
|
| 80 |
+
print(" Extra keywords:", args.keywords)
|
| 81 |
+
|
| 82 |
+
studies = fetch_all_terms(terms, STATUSES, page_size=args.page_size, max_pages=args.pages)
|
| 83 |
+
|
| 84 |
+
rows: List[Dict[str, Any]] = []
|
| 85 |
+
skipped = 0
|
| 86 |
+
for s in studies:
|
| 87 |
+
try:
|
| 88 |
+
ps = (s.get("protocolSection", {}) or {})
|
| 89 |
+
locs = ((ps.get("contactsLocationsModule", {}) or {}).get("locations") or [])
|
| 90 |
+
if args.country and args.require_country:
|
| 91 |
+
locs = [L for L in locs if args.country.lower() in (L.get("locationCountry") or "").lower()]
|
| 92 |
+
if args.require_country and not locs:
|
| 93 |
+
continue
|
| 94 |
+
sc, reasons = score_trial(
|
| 95 |
+
s,
|
| 96 |
+
dict(
|
| 97 |
+
age=args.age,
|
| 98 |
+
kps=args.kps,
|
| 99 |
+
prior_bev=args.prior_bev,
|
| 100 |
+
setting=args.setting,
|
| 101 |
+
keywords=args.keywords,
|
| 102 |
+
diagnosis=args.diagnosis,
|
| 103 |
+
),
|
| 104 |
+
)
|
| 105 |
+
base = extract_row(s)
|
| 106 |
+
base["score"] = sc
|
| 107 |
+
base["reasons"] = "; ".join(reasons)
|
| 108 |
+
base["url"] = f"https://clinicaltrials.gov/study/{base['nct']}" if base.get("nct") else ""
|
| 109 |
+
rows.append(base)
|
| 110 |
+
except Exception:
|
| 111 |
+
skipped += 1
|
| 112 |
+
continue
|
| 113 |
+
|
| 114 |
+
rows.sort(key=lambda x: -x.get("score", 0))
|
| 115 |
+
print(f"Fetched {len(studies)} trials; showing {len(rows)} after filters. Skipped {skipped}.")
|
| 116 |
+
|
| 117 |
+
save_results(rows, args.csv, args.json)
|
| 118 |
+
|
| 119 |
+
|
| 120 |
+
if __name__ == "__main__":
|
| 121 |
+
main()
|
README.md
CHANGED
|
@@ -1,3 +1,69 @@
|
|
| 1 |
-
|
| 2 |
-
|
| 3 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
Project: PRECISE-GBM - Model training & retraining helpers
|
| 2 |
+
|
| 3 |
+
Overview
|
| 4 |
+
|
| 5 |
+
This repository contains code to train models (Gaussian Mixture labelling + SVM and ensemble classifiers) and to persist all artifacts required to reproduce or retrain models on new data. It includes:
|
| 6 |
+
|
| 7 |
+
- `Scenario_heldout_final_PRECISE.py` — training pipeline producing `.joblib` models and metadata JSONs (selected features, best params, CV results).
|
| 8 |
+
- `retrain_helper.py` — CLI utility to rebuild pipelines, set best params and retrain using saved selected-features and params JSONs. Supports JSON/YAML config files and auto-detection of model type.
|
| 9 |
+
- `README_RETRAIN.md` — detailed retrain examples and a notebook cell.
|
| 10 |
+
|
| 11 |
+
This repo also includes helper files to make it ready for GitHub:
|
| 12 |
+
- `requirements.txt` — Python dependencies
|
| 13 |
+
- `.gitignore` — recommended ignores (models, caches, logs)
|
| 14 |
+
- `LICENSE` — MIT license
|
| 15 |
+
- GitHub Actions workflow for CI (pytest smoke test)
|
| 16 |
+
|
| 17 |
+
Getting started (Windows PowerShell)
|
| 18 |
+
|
| 19 |
+
1) Create and activate a virtual environment
|
| 20 |
+
|
| 21 |
+
```powershell
|
| 22 |
+
python -m venv .venv
|
| 23 |
+
.\.venv\Scripts\Activate.ps1
|
| 24 |
+
```
|
| 25 |
+
|
| 26 |
+
2) Install dependencies
|
| 27 |
+
|
| 28 |
+
```powershell
|
| 29 |
+
pip install --upgrade pip
|
| 30 |
+
pip install -r requirements.txt
|
| 31 |
+
```
|
| 32 |
+
|
| 33 |
+
3) Run training (note: the training script reads data from absolute paths configured in the script — adjust them or run from an environment where those files are present)
|
| 34 |
+
|
| 35 |
+
```powershell
|
| 36 |
+
python Scenario_heldout_final_PRECISE.py
|
| 37 |
+
```
|
| 38 |
+
|
| 39 |
+
The training script will create model files under `models_LM22/` and `models_GBM/` and write metadata JSONs next to each joblib model (selected features, params, cv results) as well as group-level JSON summaries.
|
| 40 |
+
|
| 41 |
+
Retraining
|
| 42 |
+
|
| 43 |
+
See `README_RETRAIN.md` for detailed CLI and notebook examples. Short example:
|
| 44 |
+
|
| 45 |
+
```powershell
|
| 46 |
+
python retrain_helper.py \
|
| 47 |
+
--model-prefix "models_GBM/scenario_1/GBM_scen1_Tcell" \
|
| 48 |
+
--train-csv "data\new_train.csv" \
|
| 49 |
+
--label-col "label"
|
| 50 |
+
```
|
| 51 |
+
|
| 52 |
+
Notes
|
| 53 |
+
|
| 54 |
+
- The training script contains hard-coded absolute paths to data files. Before running on another machine, update the `scenarios_*` file paths or place the datasets in the same paths.
|
| 55 |
+
- Retrain helper auto-detects model type when `--model-type` is omitted by looking for `{prefix}_svm_params.json` or `{prefix}_ens_params.json`.
|
| 56 |
+
- YAML config support for retrain requires PyYAML (`pip install pyyaml`).
|
| 57 |
+
|
| 58 |
+
CI
|
| 59 |
+
|
| 60 |
+
A basic GitHub Actions workflow runs a smoke pytest to ensure the retrain helper imports and basic pipeline construction works. It does not run heavy training.
|
| 61 |
+
|
| 62 |
+
Contributing
|
| 63 |
+
|
| 64 |
+
See `CONTRIBUTING.md` for guidance on opening issues and PRs.
|
| 65 |
+
|
| 66 |
+
License
|
| 67 |
+
|
| 68 |
+
This project is released under the MIT License — see `LICENSE`.
|
| 69 |
+
|
ctgov_client.py
ADDED
|
@@ -0,0 +1,225 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Shared client for ClinicalTrials.gov v2 API and scoring
|
| 2 |
+
import re
|
| 3 |
+
import requests
|
| 4 |
+
from typing import Any, Dict, List, Tuple
|
| 5 |
+
|
| 6 |
+
DEFAULT_DIAG_TERMS = {
|
| 7 |
+
"Glioblastoma": ["glioblastoma", "GBM", "glioblastoma multiforme"],
|
| 8 |
+
"Diffuse midline glioma": ["diffuse midline glioma", "DMG", "H3 K27M"],
|
| 9 |
+
"Anaplastic astrocytoma": ["anaplastic astrocytoma", "grade 3 astrocytoma"],
|
| 10 |
+
"Astrocytoma": ["astrocytoma", "grade 2 astrocytoma", "grade 4 astrocytoma"],
|
| 11 |
+
"Oligodendroglioma": ["oligodendroglioma", "1p19q codeleted"],
|
| 12 |
+
"Meningioma": ["meningioma"],
|
| 13 |
+
"Medulloblastoma": ["medulloblastoma"],
|
| 14 |
+
"Ependymoma": ["ependymoma"],
|
| 15 |
+
"Spinal cord tumor": ["spinal cord tumor", "spinal cord neoplasm"],
|
| 16 |
+
}
|
| 17 |
+
|
| 18 |
+
API_BASE = "https://clinicaltrials.gov/api/v2/studies"
|
| 19 |
+
UA = {"User-Agent": "BrainTrialsFinder-Desktop/1.0 (+https://clinicaltrials.gov)"}
|
| 20 |
+
|
| 21 |
+
|
| 22 |
+
def build_terms(diagnosis: str, keywords: str) -> List[str]:
|
| 23 |
+
terms: List[str] = []
|
| 24 |
+
if diagnosis in DEFAULT_DIAG_TERMS:
|
| 25 |
+
terms.extend(DEFAULT_DIAG_TERMS[diagnosis])
|
| 26 |
+
else:
|
| 27 |
+
terms.extend(["brain tumor", "spinal cord tumor", "CNS tumor"])
|
| 28 |
+
extra = [k.strip() for k in (keywords or "").split(",") if k.strip()]
|
| 29 |
+
return terms + extra
|
| 30 |
+
|
| 31 |
+
|
| 32 |
+
def ctgov_search_one(term: str, statuses: List[str], page_size: int = 100, max_pages: int = 5) -> List[Dict[str, Any]]:
|
| 33 |
+
session = requests.Session()
|
| 34 |
+
session.headers.update(UA)
|
| 35 |
+
all_studies: List[Dict[str, Any]] = []
|
| 36 |
+
page_token = None
|
| 37 |
+
count = 0
|
| 38 |
+
max_iters = max_pages or 0
|
| 39 |
+
while count < max_iters:
|
| 40 |
+
params = {
|
| 41 |
+
"query.term": term,
|
| 42 |
+
"filter.overallStatus": ",".join(statuses),
|
| 43 |
+
"pageSize": page_size,
|
| 44 |
+
}
|
| 45 |
+
if page_token:
|
| 46 |
+
params["pageToken"] = page_token
|
| 47 |
+
r = session.get(API_BASE, params=params, timeout=30)
|
| 48 |
+
r.raise_for_status()
|
| 49 |
+
data = r.json()
|
| 50 |
+
studies = data.get("studies", [])
|
| 51 |
+
if not studies:
|
| 52 |
+
break
|
| 53 |
+
all_studies.extend(studies)
|
| 54 |
+
page_token = data.get("nextPageToken")
|
| 55 |
+
if not page_token:
|
| 56 |
+
break
|
| 57 |
+
count += 1
|
| 58 |
+
return all_studies
|
| 59 |
+
|
| 60 |
+
|
| 61 |
+
def fetch_all_terms(terms: List[str], statuses: List[str], page_size=100, max_pages=5) -> List[Dict[str, Any]]:
|
| 62 |
+
dedup: Dict[str, Dict[str, Any]] = {}
|
| 63 |
+
for t in terms:
|
| 64 |
+
try:
|
| 65 |
+
for s in ctgov_search_one(t, statuses, page_size=page_size, max_pages=max_pages):
|
| 66 |
+
ident = (s.get("protocolSection", {}) or {}).get("identificationModule", {}) or {}
|
| 67 |
+
nct = ident.get("nctId")
|
| 68 |
+
key = nct or id(s)
|
| 69 |
+
if key not in dedup:
|
| 70 |
+
dedup[key] = s
|
| 71 |
+
except requests.HTTPError:
|
| 72 |
+
continue
|
| 73 |
+
return list(dedup.values())
|
| 74 |
+
|
| 75 |
+
|
| 76 |
+
def mentions(txt: str, term: str) -> bool:
|
| 77 |
+
return bool(re.search(rf"\b{re.escape(term)}\b", txt or "", re.I))
|
| 78 |
+
|
| 79 |
+
|
| 80 |
+
def as_text(obj: Any) -> str:
|
| 81 |
+
if obj is None:
|
| 82 |
+
return ""
|
| 83 |
+
if isinstance(obj, dict):
|
| 84 |
+
for k in ("textblock", "textBlock", "value"):
|
| 85 |
+
if k in obj:
|
| 86 |
+
return str(obj.get(k) or "")
|
| 87 |
+
return " ".join(str(v) for v in obj.values() if v is not None)
|
| 88 |
+
if isinstance(obj, list):
|
| 89 |
+
return "; ".join(as_text(x) for x in obj)
|
| 90 |
+
return str(obj)
|
| 91 |
+
|
| 92 |
+
|
| 93 |
+
def parse_age_to_int(v: Any):
|
| 94 |
+
if v is None:
|
| 95 |
+
return None
|
| 96 |
+
if isinstance(v, dict):
|
| 97 |
+
return parse_age_to_int(v.get("value"))
|
| 98 |
+
if isinstance(v, (int, float)):
|
| 99 |
+
return int(v)
|
| 100 |
+
m = re.search(r"(\d+)", str(v))
|
| 101 |
+
return int(m.group(1)) if m else None
|
| 102 |
+
|
| 103 |
+
|
| 104 |
+
def ensure_list(v: Any):
|
| 105 |
+
if v is None:
|
| 106 |
+
return []
|
| 107 |
+
if isinstance(v, list):
|
| 108 |
+
return v
|
| 109 |
+
return [v]
|
| 110 |
+
|
| 111 |
+
|
| 112 |
+
def score_trial(t: Dict[str, Any], intake: Dict[str, Any]) -> Tuple[int, List[str]]:
|
| 113 |
+
age_local = (intake or {}).get("age")
|
| 114 |
+
kps_local = (intake or {}).get("kps")
|
| 115 |
+
prior_bev_local = bool((intake or {}).get("prior_bev", False))
|
| 116 |
+
setting_local = (intake or {}).get("setting") or ""
|
| 117 |
+
keywords_local = (intake or {}).get("keywords") or ""
|
| 118 |
+
diagnosis_local = (intake or {}).get("diagnosis") or ""
|
| 119 |
+
|
| 120 |
+
if diagnosis_local in DEFAULT_DIAG_TERMS:
|
| 121 |
+
diag_terms = DEFAULT_DIAG_TERMS[diagnosis_local]
|
| 122 |
+
elif diagnosis_local and diagnosis_local != "Other":
|
| 123 |
+
diag_terms = [diagnosis_local]
|
| 124 |
+
else:
|
| 125 |
+
diag_terms = ["brain tumor", "CNS tumor", "spinal cord tumor"]
|
| 126 |
+
|
| 127 |
+
ps = (t or {}).get("protocolSection") or {}
|
| 128 |
+
elig = ps.get("eligibilityModule")
|
| 129 |
+
crit = ""
|
| 130 |
+
min_age = None
|
| 131 |
+
max_age = None
|
| 132 |
+
if isinstance(elig, dict):
|
| 133 |
+
crit_raw = elig.get("eligibilityCriteria") or elig.get("criteria") or elig
|
| 134 |
+
crit = as_text(crit_raw)
|
| 135 |
+
min_age = parse_age_to_int(elig.get("minimumAge"))
|
| 136 |
+
max_age = parse_age_to_int(elig.get("maximumAge"))
|
| 137 |
+
elif isinstance(elig, str):
|
| 138 |
+
crit = as_text(elig)
|
| 139 |
+
|
| 140 |
+
phases_list = ensure_list(ps.get("designModule", {}).get("phases"))
|
| 141 |
+
phases_up = [str(p).upper() for p in phases_list]
|
| 142 |
+
conds_list = ensure_list(ps.get("conditionsModule", {}).get("conditions"))
|
| 143 |
+
title = (ps.get("identificationModule", {}) or {}).get("briefTitle", "")
|
| 144 |
+
|
| 145 |
+
s = 0
|
| 146 |
+
reasons: List[str] = []
|
| 147 |
+
if any(any(mentions(c, term) for term in diag_terms) for c in conds_list) or any(mentions(title, term) for term in diag_terms):
|
| 148 |
+
s += 30
|
| 149 |
+
reasons.append(f"Matches diagnosis: {diagnosis_local or 'neuro-oncology'}.")
|
| 150 |
+
if any("PHASE 2" in p or "PHASE2" in p for p in phases_up):
|
| 151 |
+
s += 8
|
| 152 |
+
if any("PHASE 3" in p or "PHASE3" in p for p in phases_up):
|
| 153 |
+
s += 12
|
| 154 |
+
try:
|
| 155 |
+
if min_age is not None and age_local is not None and age_local < min_age:
|
| 156 |
+
reasons.append(f"Age below minimum ({min_age}).")
|
| 157 |
+
s -= 30
|
| 158 |
+
if max_age is not None and age_local is not None and age_local > max_age:
|
| 159 |
+
reasons.append(f"Age above maximum ({max_age}).")
|
| 160 |
+
s -= 30
|
| 161 |
+
except Exception:
|
| 162 |
+
pass
|
| 163 |
+
if mentions(crit, "ECOG 0-1") and (kps_local is None or kps_local < 80):
|
| 164 |
+
s -= 15
|
| 165 |
+
reasons.append("Requires ECOG 0–1 (KPS ~≥80).")
|
| 166 |
+
if mentions(crit, "Karnofsky") and (kps_local is None or kps_local < 70):
|
| 167 |
+
s -= 10
|
| 168 |
+
reasons.append("Requires KPS ≥70.")
|
| 169 |
+
if prior_bev_local and mentions(crit, "no prior bevacizumab"):
|
| 170 |
+
s -= 25
|
| 171 |
+
reasons.append("Excludes prior bevacizumab.")
|
| 172 |
+
if setting_local == "Recurrent" and mentions(crit, "recurrent"):
|
| 173 |
+
s += 8
|
| 174 |
+
if setting_local == "Newly diagnosed" and (mentions(crit, "newly diagnosed") or mentions(title, "adjuvant")):
|
| 175 |
+
s += 8
|
| 176 |
+
for kw in [k.strip() for k in (keywords_local or "").split(",") if k.strip()]:
|
| 177 |
+
if mentions(title, kw) or mentions(crit, kw):
|
| 178 |
+
s += 3
|
| 179 |
+
return max(0, min(100, s)), reasons
|
| 180 |
+
# python
|
| 181 |
+
def extract_row(study: dict) -> dict:
|
| 182 |
+
"""Return a flat row dict for the table/PDF. Safe against missing fields."""
|
| 183 |
+
ps = (study.get("protocolSection") or {})
|
| 184 |
+
idm = (ps.get("identificationModule") or {})
|
| 185 |
+
scm = (ps.get("statusModule") or {})
|
| 186 |
+
dsm = (ps.get("designModule") or {})
|
| 187 |
+
cdnm = (ps.get("conditionsModule") or {})
|
| 188 |
+
slm = (ps.get("sponsorCollaboratorsModule") or {})
|
| 189 |
+
clm = (ps.get("contactsLocationsModule") or {})
|
| 190 |
+
|
| 191 |
+
title = (idm.get("officialTitle") or idm.get("briefTitle") or "").strip()
|
| 192 |
+
nct = (idm.get("nctId") or "").strip()
|
| 193 |
+
|
| 194 |
+
status_raw = (scm.get("overallStatus") or "").strip()
|
| 195 |
+
# e.g., RECRUITING -> Recruiting
|
| 196 |
+
status = status_raw.replace("_", " ").title() if status_raw else ""
|
| 197 |
+
|
| 198 |
+
phases_list = ensure_list(dsm.get("phases"))
|
| 199 |
+
phases = ", ".join(phases_list)
|
| 200 |
+
|
| 201 |
+
conditions = ", ".join(ensure_list(cdnm.get("conditions")))
|
| 202 |
+
|
| 203 |
+
sponsor = ""
|
| 204 |
+
lead = slm.get("leadSponsor") or {}
|
| 205 |
+
if isinstance(lead, dict):
|
| 206 |
+
sponsor = (lead.get("name") or "").strip()
|
| 207 |
+
|
| 208 |
+
city_country = ""
|
| 209 |
+
locs = ensure_list(clm.get("locations"))
|
| 210 |
+
if locs:
|
| 211 |
+
first = locs[0]
|
| 212 |
+
city = (first.get("locationCity") or "").strip()
|
| 213 |
+
country = (first.get("locationCountry") or "").strip()
|
| 214 |
+
parts = [p for p in [city, country] if p]
|
| 215 |
+
city_country = ", ".join(parts)
|
| 216 |
+
|
| 217 |
+
return {
|
| 218 |
+
"title": title,
|
| 219 |
+
"nct": nct,
|
| 220 |
+
"status": status,
|
| 221 |
+
"phases": phases,
|
| 222 |
+
"conditions": conditions,
|
| 223 |
+
"sponsor": sponsor,
|
| 224 |
+
"city_country": city_country,
|
| 225 |
+
}
|
desktop_app.py
ADDED
|
@@ -0,0 +1,607 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# python
|
| 2 |
+
# Desktop GUI for Brain Trials Finder (no Streamlit)
|
| 3 |
+
# Run with: python desktop_app.py
|
| 4 |
+
import threading
|
| 5 |
+
import tkinter as tk
|
| 6 |
+
from tkinter import ttk, messagebox, filedialog
|
| 7 |
+
import urllib.parse
|
| 8 |
+
import webbrowser
|
| 9 |
+
from typing import List, Dict, Any
|
| 10 |
+
|
| 11 |
+
from ctgov_client import (
|
| 12 |
+
DEFAULT_DIAG_TERMS,
|
| 13 |
+
build_terms,
|
| 14 |
+
fetch_all_terms,
|
| 15 |
+
score_trial,
|
| 16 |
+
extract_row,
|
| 17 |
+
ensure_list,
|
| 18 |
+
)
|
| 19 |
+
from uk_sources import fetch_uk_trials
|
| 20 |
+
from euctr_client import fetch_eu_trials
|
| 21 |
+
|
| 22 |
+
STATUSES = ["RECRUITING", "NOT_YET_RECRUITING"]
|
| 23 |
+
COPYRIGHT = "© 2025 Brain Trials Finder | Prajwal Ghimire"
|
| 24 |
+
__copyright__ = COPYRIGHT
|
| 25 |
+
|
| 26 |
+
# Predefined NIHR UK location options for portal queries
|
| 27 |
+
UK_NIHR_LOCATIONS = [
|
| 28 |
+
"Nottingham",
|
| 29 |
+
"Liverpool",
|
| 30 |
+
"Preston",
|
| 31 |
+
"Brighton",
|
| 32 |
+
"Cardiff",
|
| 33 |
+
"Leeds",
|
| 34 |
+
"Plymouth",
|
| 35 |
+
"Coventry",
|
| 36 |
+
"Newcastle upon Tyne",
|
| 37 |
+
"Dundee",
|
| 38 |
+
"Cambridge",
|
| 39 |
+
"Birmingham",
|
| 40 |
+
"Hull",
|
| 41 |
+
"Stoke-on-Trent",
|
| 42 |
+
"Romford",
|
| 43 |
+
"Southampton",
|
| 44 |
+
"Bristol",
|
| 45 |
+
"Middlesbrough",
|
| 46 |
+
"London",
|
| 47 |
+
"Sheffield",
|
| 48 |
+
"Edinburgh",
|
| 49 |
+
"Oxford",
|
| 50 |
+
]
|
| 51 |
+
|
| 52 |
+
|
| 53 |
+
class BrainTrialsApp(tk.Tk):
|
| 54 |
+
def __init__(self):
|
| 55 |
+
super().__init__()
|
| 56 |
+
self.title(f"Brain Cancer Trials Finder - Desktop App - {COPYRIGHT}")
|
| 57 |
+
self.geometry("1200x760")
|
| 58 |
+
|
| 59 |
+
# Inputs frame (top controls)
|
| 60 |
+
frm = ttk.Frame(self, padding=10)
|
| 61 |
+
frm.pack(fill="x")
|
| 62 |
+
|
| 63 |
+
# Diagnosis
|
| 64 |
+
ttk.Label(frm, text="Diagnosis:").grid(row=0, column=0, sticky=tk.W, padx=(0, 6))
|
| 65 |
+
diag_options = list(DEFAULT_DIAG_TERMS.keys()) + ["Other"]
|
| 66 |
+
self.diagnosis = tk.StringVar(value="Glioblastoma")
|
| 67 |
+
ttk.Combobox(frm, textvariable=self.diagnosis, values=diag_options, state="readonly", width=28).grid(row=0, column=1, sticky=tk.W)
|
| 68 |
+
|
| 69 |
+
# Setting
|
| 70 |
+
ttk.Label(frm, text="Setting:").grid(row=0, column=2, sticky=tk.W, padx=(16, 6))
|
| 71 |
+
self.setting = tk.StringVar(value="Recurrent")
|
| 72 |
+
ttk.Combobox(frm, textvariable=self.setting, values=["Newly diagnosed", "Recurrent"], state="readonly", width=20).grid(row=0, column=3, sticky=tk.W)
|
| 73 |
+
|
| 74 |
+
# Age
|
| 75 |
+
ttk.Label(frm, text="Age:").grid(row=0, column=4, sticky=tk.W, padx=(16, 6))
|
| 76 |
+
self.age = tk.IntVar(value=55)
|
| 77 |
+
tk.Spinbox(frm, from_=1, to=100, textvariable=self.age, width=6).grid(row=0, column=5, sticky=tk.W)
|
| 78 |
+
|
| 79 |
+
# KPS
|
| 80 |
+
ttk.Label(frm, text="KPS:").grid(row=0, column=6, sticky=tk.W, padx=(16, 6))
|
| 81 |
+
self.kps = tk.IntVar(value=80)
|
| 82 |
+
tk.Spinbox(frm, from_=40, to=100, increment=10, textvariable=self.kps, width=6).grid(row=0, column=7, sticky=tk.W)
|
| 83 |
+
|
| 84 |
+
# Prior bev
|
| 85 |
+
self.prior_bev = tk.BooleanVar(value=False)
|
| 86 |
+
ttk.Checkbutton(frm, text="Prior bevacizumab", variable=self.prior_bev).grid(row=1, column=1, sticky=tk.W, pady=(6, 0))
|
| 87 |
+
|
| 88 |
+
# Keywords
|
| 89 |
+
ttk.Label(frm, text="Keywords:").grid(row=1, column=2, sticky=tk.W, padx=(16, 6), pady=(6, 0))
|
| 90 |
+
self.keywords = tk.StringVar(value="immunotherapy,vaccine,device")
|
| 91 |
+
ttk.Entry(frm, textvariable=self.keywords, width=32).grid(row=1, column=3, sticky=tk.W, pady=(6, 0))
|
| 92 |
+
|
| 93 |
+
# Country filter (optional)
|
| 94 |
+
ttk.Label(frm, text="Country contains:").grid(row=1, column=4, sticky=tk.W, padx=(16, 6), pady=(6, 0))
|
| 95 |
+
self.country = tk.StringVar(value="")
|
| 96 |
+
ttk.Entry(frm, textvariable=self.country, width=18).grid(row=1, column=5, sticky=tk.W, pady=(6, 0))
|
| 97 |
+
self.require_country = tk.BooleanVar(value=False)
|
| 98 |
+
ttk.Checkbutton(frm, text="Require site in country", variable=self.require_country).grid(row=1, column=6, sticky=tk.W, pady=(6, 0))
|
| 99 |
+
|
| 100 |
+
# Buttons
|
| 101 |
+
self.btn_search = ttk.Button(frm, text="Search", command=self.on_search)
|
| 102 |
+
self.btn_search.grid(row=0, column=8, sticky=tk.W, padx=(16, 0))
|
| 103 |
+
self.status_lbl = ttk.Label(frm, text="Ready")
|
| 104 |
+
self.status_lbl.grid(row=1, column=8, sticky=tk.W, padx=(16, 0))
|
| 105 |
+
|
| 106 |
+
# UK Sources section
|
| 107 |
+
ukfrm = ttk.Labelframe(self, text="UK Sources", padding=10)
|
| 108 |
+
ukfrm.pack(fill="x", padx=10)
|
| 109 |
+
self.uk_use_ctgov = tk.BooleanVar(value=True)
|
| 110 |
+
ttk.Checkbutton(ukfrm, text="ClinicalTrials.gov (UK sites only)", variable=self.uk_use_ctgov).grid(row=0, column=0, sticky=tk.W)
|
| 111 |
+
# EU CTR toggle and controls
|
| 112 |
+
self.use_euctr = tk.BooleanVar(value=True)
|
| 113 |
+
ttk.Checkbutton(ukfrm, text="Include EU Clinical Trials Register (EUCTR)", variable=self.use_euctr).grid(row=0, column=3, sticky=tk.W)
|
| 114 |
+
ttk.Label(ukfrm, text="EUCTR delay (s):").grid(row=1, column=3, sticky=tk.W, padx=(8,0))
|
| 115 |
+
self.euctr_delay = tk.DoubleVar(value=0.8)
|
| 116 |
+
ttk.Entry(ukfrm, textvariable=self.euctr_delay, width=6).grid(row=1, column=4, sticky=tk.W)
|
| 117 |
+
ttk.Label(ukfrm, text="EUCTR max pages:").grid(row=1, column=5, sticky=tk.W, padx=(8,0))
|
| 118 |
+
self.euctr_maxpages = tk.IntVar(value=2)
|
| 119 |
+
ttk.Entry(ukfrm, textvariable=self.euctr_maxpages, width=4).grid(row=1, column=6, sticky=tk.W)
|
| 120 |
+
self.btn_search_uk = ttk.Button(ukfrm, text="Search UK", command=self.on_search_uk)
|
| 121 |
+
self.btn_search_uk.grid(row=0, column=1, padx=(16, 0))
|
| 122 |
+
# Separate EU search button (decoupled from main Search)
|
| 123 |
+
self.btn_search_eu = ttk.Button(ukfrm, text="Search EU", command=self.on_search_eu)
|
| 124 |
+
self.btn_search_eu.grid(row=0, column=4, padx=(8, 0))
|
| 125 |
+
ttk.Button(ukfrm, text="Save PDF", command=self.on_save_pdf).grid(row=0, column=2, padx=(16, 0))
|
| 126 |
+
# Open portal shortcuts
|
| 127 |
+
ttk.Button(ukfrm, text="Open NIHR", command=self.on_open_nihr).grid(row=1, column=0, pady=(8, 0), sticky=tk.W)
|
| 128 |
+
ttk.Button(ukfrm, text="Open ISRCTN (UK)", command=self.on_open_isrctn).grid(row=1, column=1, pady=(8, 0), sticky=tk.W)
|
| 129 |
+
ttk.Button(ukfrm, text="Open CRUK", command=self.on_open_cruk).grid(row=1, column=2, pady=(8, 0), sticky=tk.W)
|
| 130 |
+
# NIHR specific location (optional)
|
| 131 |
+
ttk.Label(ukfrm, text="NIHR location (optional):").grid(row=2, column=0, sticky=tk.W, pady=(8, 0))
|
| 132 |
+
self.uk_location = tk.StringVar(value="")
|
| 133 |
+
ttk.Combobox(ukfrm, textvariable=self.uk_location, values=UK_NIHR_LOCATIONS, width=28, state="readonly").grid(row=2, column=1, sticky=tk.W, pady=(8, 0))
|
| 134 |
+
|
| 135 |
+
# Results tree
|
| 136 |
+
cols = ("score", "title", "sponsor", "city_country", "status", "phases", "conditions", "nct", "source")
|
| 137 |
+
self.tree = ttk.Treeview(self, columns=cols, show="headings", height=18)
|
| 138 |
+
self.tree.pack(fill="both", expand=True, padx=10, pady=(6, 10))
|
| 139 |
+
self.tree.heading("score", text="Score")
|
| 140 |
+
self.tree.heading("title", text="Title")
|
| 141 |
+
self.tree.heading("sponsor", text="Sponsor")
|
| 142 |
+
self.tree.heading("city_country", text="City/Country")
|
| 143 |
+
self.tree.heading("status", text="Status")
|
| 144 |
+
self.tree.heading("phases", text="Phases")
|
| 145 |
+
self.tree.heading("conditions", text="Conditions")
|
| 146 |
+
self.tree.heading("nct", text="NCT ID")
|
| 147 |
+
self.tree.heading("source", text="Source")
|
| 148 |
+
self.tree.column("score", width=60, anchor="center")
|
| 149 |
+
self.tree.column("title", width=330)
|
| 150 |
+
self.tree.column("sponsor", width=220)
|
| 151 |
+
self.tree.column("city_country", width=160)
|
| 152 |
+
self.tree.column("status", width=120)
|
| 153 |
+
self.tree.column("phases", width=110)
|
| 154 |
+
self.tree.column("conditions", width=260)
|
| 155 |
+
self.tree.column("nct", width=120)
|
| 156 |
+
self.tree.column("source", width=120)
|
| 157 |
+
self.tree.bind("<Double-1>", self.on_open)
|
| 158 |
+
self.tree.bind("<<TreeviewSelect>>", self.on_select)
|
| 159 |
+
|
| 160 |
+
# Store per-row mappings
|
| 161 |
+
self._url_by_item: Dict[str, str] = {}
|
| 162 |
+
self._study_by_item: Dict[str, Dict[str, Any]] = {}
|
| 163 |
+
self._current_rows: List[Dict[str, Any]] = [] # rows currently displayed
|
| 164 |
+
|
| 165 |
+
# Contacts and Locations panel
|
| 166 |
+
infofrm = ttk.Labelframe(self, text="Contacts and Locations", padding=10)
|
| 167 |
+
infofrm.pack(fill="both", expand=True, padx=10, pady=(0, 10))
|
| 168 |
+
self.contacts_text = tk.Text(infofrm, height=12, wrap="word")
|
| 169 |
+
self.contacts_text.config(state="disabled")
|
| 170 |
+
scroll = ttk.Scrollbar(infofrm, orient="vertical", command=self.contacts_text.yview)
|
| 171 |
+
self.contacts_text.configure(yscrollcommand=scroll.set)
|
| 172 |
+
self.contacts_text.grid(row=0, column=0, sticky="nsew")
|
| 173 |
+
scroll.grid(row=0, column=1, sticky="ns")
|
| 174 |
+
infofrm.columnconfigure(0, weight=1)
|
| 175 |
+
infofrm.rowconfigure(0, weight=1)
|
| 176 |
+
|
| 177 |
+
# Initial load (use lambda to satisfy type checkers)
|
| 178 |
+
# Removed automatic search on startup; user must press the Search button to fetch results.
|
| 179 |
+
# self.after(100, lambda: self.on_search())
|
| 180 |
+
|
| 181 |
+
# ----- Portal helpers -----
|
| 182 |
+
def _build_portal_query(self) -> str:
|
| 183 |
+
diag = (self.diagnosis.get() or "").strip()
|
| 184 |
+
if diag and diag != "Other":
|
| 185 |
+
q = diag
|
| 186 |
+
else:
|
| 187 |
+
q = (self.keywords.get() or "").strip() or "brain tumour"
|
| 188 |
+
return urllib.parse.quote_plus(q)
|
| 189 |
+
|
| 190 |
+
def on_open_nihr(self):
|
| 191 |
+
q = self._build_portal_query()
|
| 192 |
+
base = "https://www.bepartofresearch.nihr.ac.uk/results/search-results"
|
| 193 |
+
loc_txt = (self.uk_location.get() or "").strip()
|
| 194 |
+
if loc_txt:
|
| 195 |
+
loc = urllib.parse.quote_plus(loc_txt)
|
| 196 |
+
url = f"{base}?query={q}&location={loc}"
|
| 197 |
+
else:
|
| 198 |
+
url = f"{base}?query={q}"
|
| 199 |
+
webbrowser.open_new_tab(url)
|
| 200 |
+
|
| 201 |
+
def on_open_isrctn(self):
|
| 202 |
+
q = self._build_portal_query()
|
| 203 |
+
url = f"https://www.isrctn.com/search?q={q}&countries=United%20Kingdom"
|
| 204 |
+
webbrowser.open_new_tab(url)
|
| 205 |
+
|
| 206 |
+
def on_open_cruk(self):
|
| 207 |
+
q = self._build_portal_query()
|
| 208 |
+
url = f"https://find.cancerresearchuk.org/clinical-trials?q={q}"
|
| 209 |
+
webbrowser.open_new_tab(url)
|
| 210 |
+
|
| 211 |
+
# ----- Actions -----
|
| 212 |
+
def on_open(self, event=None):
|
| 213 |
+
sel = self.tree.selection()
|
| 214 |
+
if not sel:
|
| 215 |
+
return
|
| 216 |
+
for iid in sel:
|
| 217 |
+
url = self._url_by_item.get(iid)
|
| 218 |
+
if url:
|
| 219 |
+
webbrowser.open_new_tab(url)
|
| 220 |
+
break
|
| 221 |
+
|
| 222 |
+
def on_select(self, event=None):
|
| 223 |
+
sel = self.tree.selection()
|
| 224 |
+
if not sel:
|
| 225 |
+
return
|
| 226 |
+
iid = sel[0]
|
| 227 |
+
study = self._study_by_item.get(iid)
|
| 228 |
+
if study:
|
| 229 |
+
self._populate_contacts(study)
|
| 230 |
+
|
| 231 |
+
def on_search(self):
|
| 232 |
+
self.btn_search.configure(state=tk.DISABLED)
|
| 233 |
+
self.btn_search_uk.configure(state=tk.DISABLED)
|
| 234 |
+
self.status_lbl.configure(text="Fetching…")
|
| 235 |
+
diagnosis = self.diagnosis.get()
|
| 236 |
+
setting = self.setting.get()
|
| 237 |
+
age = self.age.get()
|
| 238 |
+
kps = self.kps.get()
|
| 239 |
+
prior_bev = self.prior_bev.get()
|
| 240 |
+
keywords = self.keywords.get()
|
| 241 |
+
country = self.country.get().strip()
|
| 242 |
+
require_country = self.require_country.get()
|
| 243 |
+
|
| 244 |
+
def worker():
|
| 245 |
+
try:
|
| 246 |
+
terms = build_terms(diagnosis, keywords)
|
| 247 |
+
# Fetch ClinicalTrials.gov results only (no EUCTR fetch here)
|
| 248 |
+
studies = fetch_all_terms(terms, STATUSES, page_size=100, max_pages=5)
|
| 249 |
+
# Tag CTGov studies as source CTGov
|
| 250 |
+
source_map = {}
|
| 251 |
+
combined_entries = [{"study": s, "source": "CTGov"} for s in studies]
|
| 252 |
+
for e in combined_entries:
|
| 253 |
+
sst = e.get("study") or {}
|
| 254 |
+
psst = (sst.get("protocolSection") or {})
|
| 255 |
+
idm = (psst.get("identificationModule") or {})
|
| 256 |
+
nctid = idm.get("nctId") or idm.get("nct")
|
| 257 |
+
eudr = idm.get("eudractNumber") or idm.get("eudra") or idm.get("eudract")
|
| 258 |
+
if nctid:
|
| 259 |
+
source_map[str(nctid)] = e.get("source")
|
| 260 |
+
if eudr:
|
| 261 |
+
source_map[str(eudr)] = e.get("source")
|
| 262 |
+
rows: List[Dict[str, Any]] = []
|
| 263 |
+
skipped = 0
|
| 264 |
+
for s in studies:
|
| 265 |
+
try:
|
| 266 |
+
ps = (s.get("protocolSection", {}) or {})
|
| 267 |
+
clm = (ps.get("contactsLocationsModule", {}) or {})
|
| 268 |
+
locs = ensure_list(clm.get("locations"))
|
| 269 |
+
if country and require_country:
|
| 270 |
+
locs = [L for L in locs if country.lower() in (L.get("locationCountry") or "").lower()]
|
| 271 |
+
if require_country and not locs:
|
| 272 |
+
continue
|
| 273 |
+
intake = {
|
| 274 |
+
"age": age,
|
| 275 |
+
"kps": kps,
|
| 276 |
+
"prior_bev": prior_bev,
|
| 277 |
+
"setting": setting,
|
| 278 |
+
"keywords": keywords,
|
| 279 |
+
"diagnosis": diagnosis,
|
| 280 |
+
}
|
| 281 |
+
sc, reasons = score_trial(s, intake)
|
| 282 |
+
base = extract_row(s)
|
| 283 |
+
# Ensure city_country exists (fallback from first location)
|
| 284 |
+
if not base.get("city_country"):
|
| 285 |
+
first = locs[0] if locs else None
|
| 286 |
+
if first:
|
| 287 |
+
city = (first.get("locationCity") or "").strip()
|
| 288 |
+
country1 = (first.get("locationCountry") or "").strip()
|
| 289 |
+
parts = [p for p in [city, country1] if p]
|
| 290 |
+
if parts:
|
| 291 |
+
base["city_country"] = ", ".join(parts)
|
| 292 |
+
|
| 293 |
+
# CTGov search: mark source as CTGov (or preserve mapping if present)
|
| 294 |
+
nct_key = base.get("nct")
|
| 295 |
+
eudract_key = (s.get("protocolSection",{}).get("identificationModule",{}).get("eudractNumber"))
|
| 296 |
+
src = source_map.get(nct_key) or source_map.get(eudract_key) or "CTGov"
|
| 297 |
+
base["source"] = src
|
| 298 |
+
base["url"] = f"https://clinicaltrials.gov/study/{base['nct']}" if base.get("nct") else s.get("_source_url", "")
|
| 299 |
+
base["study"] = s
|
| 300 |
+
rows.append(base)
|
| 301 |
+
except Exception:
|
| 302 |
+
skipped += 1
|
| 303 |
+
continue
|
| 304 |
+
rows.sort(key=lambda x: -x.get("score", 0))
|
| 305 |
+
self.after(0, self._render_rows, rows, skipped, len(studies))
|
| 306 |
+
except Exception as e:
|
| 307 |
+
self.after(0, self._show_error, e)
|
| 308 |
+
|
| 309 |
+
threading.Thread(target=worker, daemon=True).start()
|
| 310 |
+
|
| 311 |
+
def on_search_uk(self):
|
| 312 |
+
self.btn_search.configure(state=tk.DISABLED)
|
| 313 |
+
self.btn_search_uk.configure(state=tk.DISABLED)
|
| 314 |
+
self.status_lbl.configure(text="Fetching UK trials…")
|
| 315 |
+
diagnosis = self.diagnosis.get()
|
| 316 |
+
setting = self.setting.get()
|
| 317 |
+
age = self.age.get()
|
| 318 |
+
kps = self.kps.get()
|
| 319 |
+
prior_bev = self.prior_bev.get()
|
| 320 |
+
keywords = self.keywords.get()
|
| 321 |
+
use_ctgov = self.uk_use_ctgov.get()
|
| 322 |
+
|
| 323 |
+
def worker():
|
| 324 |
+
try:
|
| 325 |
+
intake = {
|
| 326 |
+
"age": age,
|
| 327 |
+
"kps": kps,
|
| 328 |
+
"prior_bev": prior_bev,
|
| 329 |
+
"setting": setting,
|
| 330 |
+
"keywords": keywords,
|
| 331 |
+
"diagnosis": diagnosis,
|
| 332 |
+
}
|
| 333 |
+
rows, total_raw, skipped = fetch_uk_trials(diagnosis, keywords, intake, include_ctgov=use_ctgov)
|
| 334 |
+
self.after(0, self._render_rows, rows, skipped, total_raw)
|
| 335 |
+
except Exception as e:
|
| 336 |
+
self.after(0, self._show_error, e)
|
| 337 |
+
|
| 338 |
+
threading.Thread(target=worker, daemon=True).start()
|
| 339 |
+
|
| 340 |
+
def on_search_eu(self):
|
| 341 |
+
"""Run EUCTR-only search and display results (separate button)."""
|
| 342 |
+
self.btn_search.configure(state=tk.DISABLED)
|
| 343 |
+
self.btn_search_uk.configure(state=tk.DISABLED)
|
| 344 |
+
self.btn_search_eu.configure(state=tk.DISABLED)
|
| 345 |
+
self.status_lbl.configure(text="Fetching EU trials…")
|
| 346 |
+
diagnosis = self.diagnosis.get()
|
| 347 |
+
keywords = self.keywords.get()
|
| 348 |
+
country = self.country.get().strip()
|
| 349 |
+
require_country = self.require_country.get()
|
| 350 |
+
|
| 351 |
+
def worker_eu():
|
| 352 |
+
try:
|
| 353 |
+
terms = build_terms(diagnosis, keywords)
|
| 354 |
+
eu_studies = []
|
| 355 |
+
try:
|
| 356 |
+
eu_studies = fetch_eu_trials(terms, STATUSES, page_size=50, max_pages=self.euctr_maxpages.get(), polite_delay=self.euctr_delay.get())
|
| 357 |
+
except Exception:
|
| 358 |
+
eu_studies = []
|
| 359 |
+
rows = []
|
| 360 |
+
skipped = 0
|
| 361 |
+
for s in eu_studies:
|
| 362 |
+
try:
|
| 363 |
+
ps = (s.get("protocolSection", {}) or {})
|
| 364 |
+
clm = (ps.get("contactsLocationsModule", {}) or {})
|
| 365 |
+
locs = ensure_list(clm.get("locations"))
|
| 366 |
+
if country and require_country:
|
| 367 |
+
locs = [L for L in locs if country.lower() in (L.get("locationCountry") or "").lower()]
|
| 368 |
+
if require_country and not locs:
|
| 369 |
+
continue
|
| 370 |
+
intake = {
|
| 371 |
+
"age": None,
|
| 372 |
+
"kps": None,
|
| 373 |
+
"prior_bev": False,
|
| 374 |
+
"setting": "",
|
| 375 |
+
"keywords": keywords,
|
| 376 |
+
"diagnosis": diagnosis,
|
| 377 |
+
}
|
| 378 |
+
# Reuse score_trial where possible (may be incomplete for EU studies)
|
| 379 |
+
try:
|
| 380 |
+
sc, reasons = score_trial(s, intake)
|
| 381 |
+
except Exception:
|
| 382 |
+
sc, reasons = 0, []
|
| 383 |
+
base = extract_row(s)
|
| 384 |
+
if not base.get("city_country"):
|
| 385 |
+
first = locs[0] if locs else None
|
| 386 |
+
if first:
|
| 387 |
+
city = (first.get("locationCity") or "").strip()
|
| 388 |
+
country1 = (first.get("locationCountry") or "").strip()
|
| 389 |
+
parts = [p for p in [city, country1] if p]
|
| 390 |
+
if parts:
|
| 391 |
+
base["city_country"] = ", ".join(parts)
|
| 392 |
+
base["score"] = sc
|
| 393 |
+
base["reasons"] = "; ".join(reasons)
|
| 394 |
+
base["source"] = "EUCTR"
|
| 395 |
+
base["url"] = s.get("_source_url", "")
|
| 396 |
+
base["study"] = s
|
| 397 |
+
rows.append(base)
|
| 398 |
+
except Exception:
|
| 399 |
+
skipped += 1
|
| 400 |
+
continue
|
| 401 |
+
rows.sort(key=lambda x: -x.get("score", 0))
|
| 402 |
+
self.after(0, self._render_rows, rows, skipped, len(eu_studies))
|
| 403 |
+
except Exception as e:
|
| 404 |
+
self.after(0, self._show_error, e)
|
| 405 |
+
finally:
|
| 406 |
+
self.btn_search.configure(state=tk.NORMAL)
|
| 407 |
+
self.btn_search_uk.configure(state=tk.NORMAL)
|
| 408 |
+
self.btn_search_eu.configure(state=tk.NORMAL)
|
| 409 |
+
|
| 410 |
+
threading.Thread(target=worker_eu, daemon=True).start()
|
| 411 |
+
|
| 412 |
+
# ----- Rendering & details -----
|
| 413 |
+
def _show_error(self, e: Exception):
|
| 414 |
+
self.btn_search.configure(state=tk.NORMAL)
|
| 415 |
+
self.btn_search_uk.configure(state=tk.NORMAL)
|
| 416 |
+
self.btn_search_eu.configure(state=tk.NORMAL)
|
| 417 |
+
self.status_lbl.configure(text="Error")
|
| 418 |
+
messagebox.showerror("Error", f"Failed to fetch trials.\n{e}")
|
| 419 |
+
|
| 420 |
+
def _render_rows(self, rows: List[Dict[str, Any]], skipped: int, total: int):
|
| 421 |
+
# Clear
|
| 422 |
+
for iid in self.tree.get_children():
|
| 423 |
+
self.tree.delete(iid)
|
| 424 |
+
self._url_by_item.clear()
|
| 425 |
+
self._study_by_item.clear()
|
| 426 |
+
self._current_rows = rows[:] # snapshot for export
|
| 427 |
+
|
| 428 |
+
# Insert
|
| 429 |
+
for r in rows[:300]:
|
| 430 |
+
# Prefer showing EU CT number (EudraCT) when NCT is absent
|
| 431 |
+
nct_display = r.get("nct") or ""
|
| 432 |
+
if not nct_display:
|
| 433 |
+
study = r.get("study") or {}
|
| 434 |
+
try:
|
| 435 |
+
nct_display = (study.get("protocolSection", {}).get("identificationModule", {}).get("eudractNumber")) or nct_display
|
| 436 |
+
except Exception:
|
| 437 |
+
nct_display = nct_display
|
| 438 |
+
|
| 439 |
+
# Prefer trial countries if city_country missing
|
| 440 |
+
city_country = r.get("city_country") or ""
|
| 441 |
+
if not city_country:
|
| 442 |
+
study = r.get("study") or {}
|
| 443 |
+
try:
|
| 444 |
+
locs = (study.get("protocolSection", {}).get("contactsLocationsModule", {}).get("locations")) or []
|
| 445 |
+
countries = []
|
| 446 |
+
for L in locs:
|
| 447 |
+
c = (L.get("locationCountry") or "").strip()
|
| 448 |
+
if c:
|
| 449 |
+
countries.append(c)
|
| 450 |
+
if countries:
|
| 451 |
+
city_country = ", ".join(countries)
|
| 452 |
+
except Exception:
|
| 453 |
+
pass
|
| 454 |
+
|
| 455 |
+
values = (
|
| 456 |
+
r.get("score", 0),
|
| 457 |
+
r.get("title", ""),
|
| 458 |
+
r.get("sponsor", ""),
|
| 459 |
+
city_country,
|
| 460 |
+
r.get("status", ""),
|
| 461 |
+
r.get("phases", ""),
|
| 462 |
+
r.get("conditions", ""),
|
| 463 |
+
nct_display,
|
| 464 |
+
r.get("source", ""),
|
| 465 |
+
)
|
| 466 |
+
iid = self.tree.insert("", "end", values=values)
|
| 467 |
+
if r.get("url"):
|
| 468 |
+
self._url_by_item[iid] = r["url"]
|
| 469 |
+
if r.get("study"):
|
| 470 |
+
self._study_by_item[iid] = r["study"]
|
| 471 |
+
|
| 472 |
+
txt = f"Fetched {total} trials; showing {len(rows)} after filters."
|
| 473 |
+
if skipped:
|
| 474 |
+
txt += f" Skipped {skipped}."
|
| 475 |
+
self.status_lbl.configure(text=txt)
|
| 476 |
+
self.btn_search.configure(state=tk.NORMAL)
|
| 477 |
+
self.btn_search_uk.configure(state=tk.NORMAL)
|
| 478 |
+
self.btn_search_eu.configure(state=tk.NORMAL)
|
| 479 |
+
|
| 480 |
+
def _populate_contacts(self, study: Dict[str, Any]):
|
| 481 |
+
ps = (study.get("protocolSection", {}) or {})
|
| 482 |
+
clm = (ps.get("contactsLocationsModule", {}) or {})
|
| 483 |
+
lines: List[str] = []
|
| 484 |
+
|
| 485 |
+
# Central contacts
|
| 486 |
+
centrals = ensure_list(clm.get("centralContacts"))
|
| 487 |
+
if centrals:
|
| 488 |
+
lines.append("Central Contacts:")
|
| 489 |
+
for c in centrals:
|
| 490 |
+
name = (c.get("name") or "").strip()
|
| 491 |
+
role = (c.get("role") or "").strip()
|
| 492 |
+
phone = (c.get("phone") or "").strip()
|
| 493 |
+
email = (c.get("email") or "").strip()
|
| 494 |
+
parts = [p for p in [name, role, phone, email] if p]
|
| 495 |
+
if parts:
|
| 496 |
+
lines.append(" - " + " | ".join(parts))
|
| 497 |
+
|
| 498 |
+
# Overall officials
|
| 499 |
+
officials = ensure_list(clm.get("overallOfficials"))
|
| 500 |
+
if officials:
|
| 501 |
+
lines.append("Overall Officials:")
|
| 502 |
+
for o in officials:
|
| 503 |
+
name = (o.get("name") or "").strip()
|
| 504 |
+
role = (o.get("role") or "").strip()
|
| 505 |
+
aff = (o.get("affiliation") or "").strip()
|
| 506 |
+
parts = [p for p in [name, role, aff] if p]
|
| 507 |
+
if parts:
|
| 508 |
+
lines.append(" - " + " | ".join(parts))
|
| 509 |
+
|
| 510 |
+
# Locations
|
| 511 |
+
locs = ensure_list(clm.get("locations"))
|
| 512 |
+
if locs:
|
| 513 |
+
lines.append("Locations:")
|
| 514 |
+
for L in locs:
|
| 515 |
+
facility = (L.get("locationFacility") or "").strip()
|
| 516 |
+
city = (L.get("locationCity") or "").strip()
|
| 517 |
+
state = (L.get("locationState") or "").strip()
|
| 518 |
+
country = (L.get("locationCountry") or "").strip()
|
| 519 |
+
status = (L.get("status") or "").strip()
|
| 520 |
+
site_line = ", ".join([p for p in [facility, city, state, country] if p])
|
| 521 |
+
if site_line:
|
| 522 |
+
if status:
|
| 523 |
+
lines.append(f" - {site_line} (status: {status})")
|
| 524 |
+
else:
|
| 525 |
+
lines.append(f" - {site_line}")
|
| 526 |
+
# per-location contacts
|
| 527 |
+
lcontacts = ensure_list(L.get("contacts")) or ensure_list(L.get("locationContacts"))
|
| 528 |
+
for lc in lcontacts:
|
| 529 |
+
lname = (lc.get("name") or "").strip()
|
| 530 |
+
lrole = (lc.get("role") or "").strip()
|
| 531 |
+
lphone = (lc.get("phone") or "").strip()
|
| 532 |
+
lemail = (lc.get("email") or "").strip()
|
| 533 |
+
parts = [p for p in [lname, lrole, lphone, lemail] if p]
|
| 534 |
+
if parts:
|
| 535 |
+
lines.append(" • " + " | ".join(parts))
|
| 536 |
+
|
| 537 |
+
if not lines:
|
| 538 |
+
lines.append("No contacts/locations provided by sponsor at this time.")
|
| 539 |
+
|
| 540 |
+
self.contacts_text.config(state="normal")
|
| 541 |
+
self.contacts_text.delete("1.0", tk.END)
|
| 542 |
+
self.contacts_text.insert(tk.END, "\n".join(lines))
|
| 543 |
+
self.contacts_text.config(state="disabled")
|
| 544 |
+
|
| 545 |
+
# ----- PDF export -----
|
| 546 |
+
def on_save_pdf(self):
|
| 547 |
+
if not self._current_rows:
|
| 548 |
+
messagebox.showinfo("Save PDF", "No results to export. Perform a search first.")
|
| 549 |
+
return
|
| 550 |
+
path = filedialog.asksaveasfilename(
|
| 551 |
+
title="Save PDF",
|
| 552 |
+
defaultextension=".pdf",
|
| 553 |
+
filetypes=[("PDF files", "*.pdf")],
|
| 554 |
+
initialfile="brain_trials_results.pdf",
|
| 555 |
+
)
|
| 556 |
+
if not path:
|
| 557 |
+
return
|
| 558 |
+
try:
|
| 559 |
+
self._export_pdf(self._current_rows, path)
|
| 560 |
+
messagebox.showinfo("Save PDF", f"Saved: {path}")
|
| 561 |
+
except Exception as e:
|
| 562 |
+
messagebox.showerror("Save PDF", f"Failed to create PDF.\n{e}")
|
| 563 |
+
|
| 564 |
+
def _export_pdf(self, rows: List[Dict[str, Any]], path: str):
|
| 565 |
+
from reportlab.lib.pagesizes import A4
|
| 566 |
+
from reportlab.lib.styles import getSampleStyleSheet
|
| 567 |
+
from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer
|
| 568 |
+
from reportlab.lib.units import mm
|
| 569 |
+
|
| 570 |
+
doc = SimpleDocTemplate(path, pagesize=A4, leftMargin=15 * mm, rightMargin=15 * mm, topMargin=15 * mm, bottomMargin=15 * mm)
|
| 571 |
+
styles = getSampleStyleSheet()
|
| 572 |
+
story = []
|
| 573 |
+
|
| 574 |
+
story.append(Paragraph("Brain Cancer Trials – Results", styles["Title"]))
|
| 575 |
+
story.append(Spacer(1, 6))
|
| 576 |
+
story.append(Paragraph(f"Total shown: {len(rows)}", styles["Normal"]))
|
| 577 |
+
story.append(Spacer(1, 12))
|
| 578 |
+
story.append(Paragraph(COPYRIGHT, styles["Normal"]))
|
| 579 |
+
|
| 580 |
+
for r in rows:
|
| 581 |
+
title = r.get("title", "")
|
| 582 |
+
nct = r.get("nct", "")
|
| 583 |
+
sponsor = r.get("sponsor", "")
|
| 584 |
+
status = r.get("status", "")
|
| 585 |
+
phases = r.get("phases", "")
|
| 586 |
+
city_country = r.get("city_country", "")
|
| 587 |
+
score = r.get("score", 0)
|
| 588 |
+
url = r.get("url") or (f"https://clinicaltrials.gov/study/{nct}" if nct else "")
|
| 589 |
+
story.append(Paragraph(f"<b>{title}</b>", styles["Heading4"]))
|
| 590 |
+
meta = (
|
| 591 |
+
f"NCT: {nct or '—'} | Sponsor: {sponsor or '—'} | City/Country: {city_country or '—'} | "
|
| 592 |
+
f"Status: {status or '—'} | Phases: {phases or '—'} | Score: {score}"
|
| 593 |
+
)
|
| 594 |
+
story.append(Paragraph(meta, styles["Normal"]))
|
| 595 |
+
if url:
|
| 596 |
+
story.append(Paragraph(f"URL: <a href='{url}' color='blue'>{url}</a>", styles["Normal"]))
|
| 597 |
+
source_txt = r.get("source", "")
|
| 598 |
+
if source_txt:
|
| 599 |
+
story.append(Paragraph(f"Source: {source_txt}", styles["Normal"]))
|
| 600 |
+
story.append(Spacer(1, 8))
|
| 601 |
+
|
| 602 |
+
doc.build(story)
|
| 603 |
+
|
| 604 |
+
|
| 605 |
+
if __name__ == "__main__":
|
| 606 |
+
app = BrainTrialsApp()
|
| 607 |
+
app.mainloop()
|
logo_precise.png
ADDED
|
Git LFS Details
|
runtime.txt
ADDED
|
@@ -0,0 +1 @@
|
|
|
|
|
|
|
| 1 |
+
python-3.10
|
streamlit_app.py
ADDED
|
@@ -0,0 +1,12 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# Streamlit Cloud entrypoint. We reuse your existing app as-is.
|
| 2 |
+
# Main requirement: keep the module name and path exactly as below so Linux (Streamlit Cloud) can import it.
|
| 3 |
+
|
| 4 |
+
# IMPORTANT: Do not call st.set_page_config here to avoid double configuration.
|
| 5 |
+
# The original module handles all Streamlit layout and rendering.
|
| 6 |
+
|
| 7 |
+
try:
|
| 8 |
+
import GUI_CLinicalTrial # noqa: F401 # importing runs the Streamlit app defined at top-level
|
| 9 |
+
except Exception as e:
|
| 10 |
+
import streamlit as st
|
| 11 |
+
st.error("Failed to import GUI_CLinicalTrial.py. Ensure the file exists and has no syntax errors.")
|
| 12 |
+
st.exception(e)
|
uk_sources.py
ADDED
|
@@ -0,0 +1,80 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
+
# UK sources aggregator (initial: ClinicalTrials.gov UK filter)
|
| 2 |
+
from typing import List, Dict, Any, Tuple
|
| 3 |
+
|
| 4 |
+
from ctgov_client import (
|
| 5 |
+
build_terms,
|
| 6 |
+
fetch_all_terms,
|
| 7 |
+
score_trial,
|
| 8 |
+
extract_row,
|
| 9 |
+
)
|
| 10 |
+
|
| 11 |
+
STATUSES = ["RECRUITING", "NOT_YET_RECRUITING"]
|
| 12 |
+
|
| 13 |
+
|
| 14 |
+
def _normalize_key(row: Dict[str, Any]) -> str:
|
| 15 |
+
# Prefer identifiers; fallback to normalized title
|
| 16 |
+
nct = (row.get("nct") or "").strip()
|
| 17 |
+
if nct:
|
| 18 |
+
return f"NCT:{nct}"
|
| 19 |
+
title = (row.get("title") or "").lower().strip()
|
| 20 |
+
return f"TITLE:{title}"
|
| 21 |
+
|
| 22 |
+
|
| 23 |
+
def fetch_uk_trials(
|
| 24 |
+
diagnosis: str,
|
| 25 |
+
keywords: str,
|
| 26 |
+
intake: Dict[str, Any],
|
| 27 |
+
include_ctgov: bool = True,
|
| 28 |
+
) -> Tuple[List[Dict[str, Any]], int, int]:
|
| 29 |
+
"""
|
| 30 |
+
Fetch UK trials across selected sources.
|
| 31 |
+
Currently implemented: ClinicalTrials.gov with UK site filter.
|
| 32 |
+
|
| 33 |
+
Returns: (rows, total_raw, skipped)
|
| 34 |
+
rows: list of standard rows with keys: title, nct, status, phases, conditions, site, score, reasons, url
|
| 35 |
+
total_raw: number of raw studies fetched before filters
|
| 36 |
+
skipped: number of studies skipped due to formatting issues
|
| 37 |
+
"""
|
| 38 |
+
terms = build_terms(diagnosis, keywords)
|
| 39 |
+
rows: List[Dict[str, Any]] = []
|
| 40 |
+
skipped = 0
|
| 41 |
+
total_raw = 0
|
| 42 |
+
|
| 43 |
+
if include_ctgov:
|
| 44 |
+
studies = fetch_all_terms(terms, STATUSES, page_size=100, max_pages=5)
|
| 45 |
+
total_raw += len(studies)
|
| 46 |
+
for s in studies:
|
| 47 |
+
try:
|
| 48 |
+
ps = (s.get("protocolSection", {}) or {})
|
| 49 |
+
locs = ((ps.get("contactsLocationsModule", {}) or {}).get("locations") or [])
|
| 50 |
+
# UK filter (case-insensitive contains)
|
| 51 |
+
uk_locs = [L for L in locs if "united kingdom" in (L.get("locationCountry") or "").lower()]
|
| 52 |
+
if not uk_locs:
|
| 53 |
+
continue
|
| 54 |
+
sc, reasons = score_trial(s, intake)
|
| 55 |
+
base = extract_row(s)
|
| 56 |
+
# Replace site with first UK site
|
| 57 |
+
first_site = next(iter(uk_locs), {})
|
| 58 |
+
base["site"] = f"{first_site.get('locationFacility','')}, {first_site.get('locationCity','')}, {first_site.get('locationCountry','')}"
|
| 59 |
+
base["score"] = sc
|
| 60 |
+
base["reasons"] = "; ".join(reasons)
|
| 61 |
+
base["url"] = f"https://clinicaltrials.gov/study/{base['nct']}" if base.get("nct") else ""
|
| 62 |
+
rows.append(base)
|
| 63 |
+
except Exception:
|
| 64 |
+
skipped += 1
|
| 65 |
+
continue
|
| 66 |
+
|
| 67 |
+
# Deduplicate
|
| 68 |
+
seen = set()
|
| 69 |
+
deduped: List[Dict[str, Any]] = []
|
| 70 |
+
for r in rows:
|
| 71 |
+
k = _normalize_key(r)
|
| 72 |
+
if k in seen:
|
| 73 |
+
continue
|
| 74 |
+
seen.add(k)
|
| 75 |
+
deduped.append(r)
|
| 76 |
+
|
| 77 |
+
# Sort
|
| 78 |
+
deduped.sort(key=lambda x: -x.get("score", 0))
|
| 79 |
+
return deduped, total_raw, skipped
|
| 80 |
+
|