PrazNeuro commited on
Commit
b950dbe
·
verified ·
1 Parent(s): 767eb2e

Upload 12 files

Browse files
.gitattributes CHANGED
@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ Brain[[:space:]]Cancer[[:space:]]Trial[[:space:]]Finder.exe filter=lfs diff=lfs merge=lfs -text
37
+ logo_precise.png filter=lfs diff=lfs merge=lfs -text
.gitignore ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Python
2
+ __pycache__/
3
+ *.py[cod]
4
+ *.pyo
5
+ .venv/
6
+ .env
7
+
8
+ # Data, models, caches
9
+ models_*/
10
+ models*/
11
+ cache_dir/
12
+ *.joblib
13
+ *.pkl
14
+
15
+ # Logs
16
+ *.log
17
+
18
+ # IDE
19
+ .vscode/
20
+ .idea/
21
+
Brain Cancer Trial Finder.exe ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:977b04e5ded6299f555c6490cc29a88559dd6a1bdfaab2df2e405aa37a0219da
3
+ size 4759201
GUI_CLinicalTrial.py ADDED
@@ -0,0 +1,289 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # python
2
+ # GUI_CLinicalTrial.py — run with: streamlit run GUI_CLinicalTrial.py
3
+ import re
4
+ import requests
5
+ import streamlit as st
6
+
7
+ st.set_page_config(page_title="Brain Trials Finder", layout="wide")
8
+
9
+ STATUSES = ["RECRUITING", "NOT_YET_RECRUITING"]
10
+
11
+ DEFAULT_DIAG_TERMS = {
12
+ "Glioblastoma": ["glioblastoma", "GBM", "grade 4 astrocytoma"],
13
+ "Diffuse midline glioma": ["diffuse midline glioma", "DMG"],
14
+ "Anaplastic astrocytoma": ["anaplastic astrocytoma", "grade 3 astrocytoma"],
15
+ }
16
+
17
+
18
+ def ensure_list(v):
19
+ if isinstance(v, list):
20
+ return v
21
+ if v is None:
22
+ return []
23
+ return [v]
24
+
25
+
26
+ def mentions(text: str, needle: str) -> bool:
27
+ if not text:
28
+ return False
29
+ return needle.lower() in text.lower()
30
+
31
+
32
+ def _to_int(v):
33
+ try:
34
+ if v is None:
35
+ return None
36
+ if isinstance(v, (int, float)):
37
+ return int(v)
38
+ # Extract first integer from strings like "18 Years"
39
+ m = re.search(r"(\d+)", str(v))
40
+ return int(m.group(1)) if m else None
41
+ except Exception:
42
+ return None
43
+
44
+
45
+ def build_terms(diagnosis: str, keywords: str):
46
+ base = DEFAULT_DIAG_TERMS.get(diagnosis, [])
47
+ extra = [k.strip() for k in (keywords or "").split(",") if k.strip()]
48
+ terms = list(dict.fromkeys([*base, *extra])) # de-duplicate preserve order
49
+ return terms or ["brain tumor"]
50
+
51
+
52
+ def build_expr(diagnosis: str, keywords: str) -> str:
53
+ terms = build_terms(diagnosis, keywords)
54
+ # Simple OR query; v2 tokenizes internally
55
+ return " OR ".join(f'"{t}"' if " " in t else t for t in terms)
56
+
57
+
58
+ @st.cache_data(ttl=3600)
59
+ def ctgov_search(expr: str, statuses, page_size: int = 100, max_pages: int = 5):
60
+ """Return a list of study dicts from ClinicalTrials.gov v2."""
61
+ url = "https://clinicaltrials.gov/api/v2/studies"
62
+ all_studies = []
63
+ token = None
64
+ for _ in range(max_pages):
65
+ params = {
66
+ "query.term": expr,
67
+ "pageSize": page_size,
68
+ "filter.overallStatus": ",".join(statuses),
69
+ }
70
+ if token:
71
+ params["pageToken"] = token
72
+ r = requests.get(url, params=params, timeout=30)
73
+ r.raise_for_status()
74
+ data = r.json() or {}
75
+ studies = data.get("studies") or []
76
+ all_studies.extend(studies)
77
+ token = data.get("nextPageToken")
78
+ if not token:
79
+ break
80
+ return all_studies
81
+
82
+
83
+ def extract_row(study: dict) -> dict:
84
+ ps = (study.get("protocolSection") or {})
85
+ idm = (ps.get("identificationModule") or {})
86
+ scm = (ps.get("statusModule") or {})
87
+ dsm = (ps.get("designModule") or {})
88
+ cdnm = (ps.get("conditionsModule") or {})
89
+ slm = (ps.get("sponsorCollaboratorsModule") or {})
90
+
91
+ title = (idm.get("officialTitle") or idm.get("briefTitle") or "").strip()
92
+ nct = (idm.get("nctId") or "").strip()
93
+
94
+ status_raw = (scm.get("overallStatus") or "").strip()
95
+ status = status_raw.replace("_", " ").title() if status_raw else ""
96
+
97
+ phases_list = ensure_list(dsm.get("phases"))
98
+ # Pretty print phases like "PHASE2" -> "Phase 2"
99
+ def fmt_phase(p: str) -> str:
100
+ p = str(p or "").upper()
101
+ if p.startswith("PHASE"):
102
+ pnum = p.replace("PHASE", "").replace("_", "/").strip()
103
+ pnum = pnum.replace("1/2", "1/2").replace("2/3", "2/3")
104
+ return f"Phase {pnum}" if pnum else "Phase"
105
+ return p.title() if p else ""
106
+ phases = ", ".join([fmt_phase(p) for p in phases_list if p])
107
+
108
+ conditions = ", ".join(ensure_list(cdnm.get("conditions")))
109
+
110
+ sponsor = ""
111
+ lead = slm.get("leadSponsor") or {}
112
+ if isinstance(lead, dict):
113
+ sponsor = (lead.get("name") or "").strip()
114
+
115
+ return {
116
+ "title": title,
117
+ "nct": nct,
118
+ "status": status,
119
+ "phases": phases,
120
+ "conditions": conditions,
121
+ "sponsor": sponsor,
122
+ }
123
+
124
+
125
+ def score_trial(study: dict, intake: dict):
126
+ ps = (study.get("protocolSection") or {})
127
+ scm = (ps.get("statusModule") or {})
128
+ dsm = (ps.get("designModule") or {})
129
+ elm = (ps.get("eligibilityModule") or {})
130
+ idm = (ps.get("identificationModule") or {})
131
+
132
+ s = 0
133
+ reasons = []
134
+
135
+ status = (scm.get("overallStatus") or "")
136
+ if status == "RECRUITING":
137
+ s += 15
138
+ elif status == "NOT_YET_RECRUITING":
139
+ s += 8
140
+
141
+ phases = ensure_list(dsm.get("phases"))
142
+ if any("PHASE3" in str(p).upper() for p in phases):
143
+ s += 12
144
+ if any("PHASE2" in str(p).upper() for p in phases):
145
+ s += 8
146
+
147
+ # Age checks
148
+ min_age_raw = elm.get("minimumAge")
149
+ max_age_raw = elm.get("maximumAge")
150
+ min_age = _to_int(min_age_raw)
151
+ max_age = _to_int(max_age_raw)
152
+ age = int(intake.get("age") or 0)
153
+ if min_age is not None and age < min_age:
154
+ reasons.append(f"Age below minimum ({min_age_raw}).")
155
+ s -= 30
156
+ if max_age is not None and age > max_age:
157
+ reasons.append(f"Age above maximum ({max_age_raw}).")
158
+ s -= 30
159
+
160
+ # KPS heuristic from criteria text
161
+ crit = elm.get("eligibilityCriteria") or ""
162
+ kps = int(intake.get("kps") or 0)
163
+ if mentions(crit, "Karnofsky") and kps < 70:
164
+ s -= 10
165
+ reasons.append("Requires KPS ≥70.")
166
+
167
+ # Keyword bonus
168
+ title = (idm.get("briefTitle") or idm.get("officialTitle") or "")
169
+ summary = (ps.get("descriptionModule", {}) or {}).get("briefSummary") or ""
170
+ keywords = [k.strip() for k in (intake.get("keywords") or "").split(",") if k.strip()]
171
+ blob = " ".join([title, summary])
172
+ for kw in keywords:
173
+ if mentions(blob, kw):
174
+ s += 2
175
+
176
+ return s, reasons
177
+
178
+
179
+ # UI
180
+ st.title("Brain Cancer Trials Finder (MVP)")
181
+
182
+ with st.sidebar:
183
+ diagnosis = st.selectbox(
184
+ "Diagnosis",
185
+ ["Glioblastoma", "Diffuse midline glioma", "Anaplastic astrocytoma", "Other"],
186
+ index=0,
187
+ )
188
+ setting = st.selectbox("Setting", ["Newly diagnosed", "Recurrent"], index=1)
189
+ age = st.number_input("Age", min_value=1, max_value=100, value=55)
190
+ kps = st.slider("Karnofsky (KPS)", min_value=40, max_value=100, step=10, value=80)
191
+ prior_bev = st.checkbox("Prior bevacizumab", value=False)
192
+ keywords = st.text_input("Keywords (comma-separated)", value="immunotherapy,vaccine,device")
193
+ do_search = st.button("Search", type="primary")
194
+
195
+ # Trigger search on first load too
196
+ if do_search or "did_first" not in st.session_state:
197
+ st.session_state["did_first"] = True
198
+ expr = build_expr(diagnosis, keywords)
199
+ studies = ctgov_search(expr, STATUSES, page_size=100, max_pages=5)
200
+
201
+ intake = {
202
+ "age": age,
203
+ "kps": kps,
204
+ "prior_bev": prior_bev,
205
+ "setting": setting,
206
+ "keywords": keywords,
207
+ "diagnosis": diagnosis,
208
+ }
209
+
210
+ rows = []
211
+ for sdict in studies:
212
+ try:
213
+ sc, reasons = score_trial(sdict, intake)
214
+ row = extract_row(sdict)
215
+ nct = row.get("nct") or ""
216
+ url = f"https://clinicaltrials.gov/study/{nct}" if nct else ""
217
+ rows.append(
218
+ (
219
+ sc,
220
+ row.get("title", ""),
221
+ nct,
222
+ row.get("status", ""),
223
+ row.get("phases", ""),
224
+ row.get("conditions", ""),
225
+ row.get("sponsor", ""),
226
+ reasons,
227
+ url,
228
+ sdict,
229
+ )
230
+ )
231
+ except Exception:
232
+ continue
233
+
234
+ rows = sorted(rows, key=lambda x: -x[0])[:50]
235
+
236
+ st.caption(f"Found {len(studies)} studies; showing top {len(rows)} by score.")
237
+
238
+ for sc, title, nct, status, phases, conds, sponsor, reasons, url, study in rows:
239
+ with st.container(border=True):
240
+ if url:
241
+ st.markdown(f"**[{title}]({url})**")
242
+ else:
243
+ st.markdown(f"**{title}**")
244
+ meta = f"NCT: {nct or '—'} · Sponsor: {sponsor or '—'} · Status: {status or '—'} · Phases: {phases or '—'} · Score: {sc}"
245
+ st.write(meta)
246
+ if conds:
247
+ st.write(f"Conditions: {conds}")
248
+
249
+ with st.expander("Contacts and Locations"):
250
+ ps = (study.get("protocolSection") or {})
251
+ clm = (ps.get("contactsLocationsModule") or {})
252
+
253
+ centrals = ensure_list(clm.get("centralContacts"))
254
+ if centrals:
255
+ st.write("Central Contacts:")
256
+ for c in centrals:
257
+ parts = [c.get("name"), c.get("role"), c.get("phone"), c.get("email")]
258
+ st.write(" - " + " | ".join([p for p in parts if p]))
259
+
260
+ officials = ensure_list(clm.get("overallOfficials"))
261
+ if officials:
262
+ st.write("Overall Officials:")
263
+ for o in officials:
264
+ parts = [o.get("name"), o.get("role"), o.get("affiliation")]
265
+ st.write(" - " + " | ".join([p for p in parts if p]))
266
+
267
+ locs = ensure_list(clm.get("locations"))
268
+ if locs:
269
+ st.write("Locations:")
270
+ for L in locs:
271
+ facility = (L.get("locationFacility") or "").strip()
272
+ city = (L.get("locationCity") or "").strip()
273
+ state = (L.get("locationState") or "").strip()
274
+ country = (L.get("locationCountry") or "").strip()
275
+ status_l = (L.get("status") or "").strip()
276
+ site_line = ", ".join([p for p in [facility, city, state, country] if p])
277
+ if site_line:
278
+ st.write(f" - {site_line}" + (f" (status: {status_l})" if status_l else ""))
279
+ lcontacts = ensure_list(L.get("contacts")) or ensure_list(L.get("locationContacts"))
280
+ for lc in lcontacts:
281
+ parts = [lc.get("name"), lc.get("role"), lc.get("phone"), lc.get("email")]
282
+ parts = [p for p in parts if p]
283
+ if parts:
284
+ st.write(" • " + " | ".join(parts))
285
+
286
+ if reasons:
287
+ with st.expander("Why this score?"):
288
+ for r in reasons:
289
+ st.write(f"- {r}")
GUI_CLinicalTrial.spec ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- mode: python ; coding: utf-8 -*-
2
+
3
+
4
+ a = Analysis(
5
+ ['GUI_CLinicalTrial.py'],
6
+ pathex=[],
7
+ binaries=[],
8
+ datas=[],
9
+ hiddenimports=[],
10
+ hookspath=[],
11
+ hooksconfig={},
12
+ runtime_hooks=[],
13
+ excludes=[],
14
+ noarchive=False,
15
+ optimize=0,
16
+ )
17
+ pyz = PYZ(a.pure)
18
+
19
+ exe = EXE(
20
+ pyz,
21
+ a.scripts,
22
+ [],
23
+ exclude_binaries=True,
24
+ name='GUI_CLinicalTrial',
25
+ debug=False,
26
+ bootloader_ignore_signals=False,
27
+ strip=False,
28
+ upx=True,
29
+ console=True,
30
+ disable_windowed_traceback=False,
31
+ argv_emulation=False,
32
+ target_arch=None,
33
+ codesign_identity=None,
34
+ entitlements_file=None,
35
+ )
36
+ coll = COLLECT(
37
+ exe,
38
+ a.binaries,
39
+ a.datas,
40
+ strip=False,
41
+ upx=True,
42
+ upx_exclude=[],
43
+ name='GUI_CLinicalTrial',
44
+ )
Neuro_onc_clinicalTrial.py ADDED
@@ -0,0 +1,121 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/usr/bin/env python3
2
+ import argparse
3
+ import csv
4
+ import json
5
+ from typing import List, Dict, Any
6
+
7
+ from ctgov_client import (
8
+ DEFAULT_DIAG_TERMS,
9
+ build_terms,
10
+ fetch_all_terms,
11
+ score_trial,
12
+ extract_row,
13
+ )
14
+
15
+ STATUSES = ["RECRUITING", "NOT_YET_RECRUITING"]
16
+
17
+
18
+ def save_results(rows: List[Dict[str, Any]], csv_path: str, json_path: str):
19
+ if not rows:
20
+ print("No studies found.")
21
+ return
22
+ # stable header order
23
+ keys = [
24
+ "score",
25
+ "title",
26
+ "nct",
27
+ "url",
28
+ "status",
29
+ "phases",
30
+ "conditions",
31
+ "site",
32
+ "reasons",
33
+ ]
34
+ with open(csv_path, "w", newline="", encoding="utf-8") as f:
35
+ writer = csv.DictWriter(f, fieldnames=keys)
36
+ writer.writeheader()
37
+ for r in rows:
38
+ writer.writerow({k: r.get(k, "") for k in keys})
39
+ with open(json_path, "w", encoding="utf-8") as f:
40
+ json.dump(rows, f, indent=2, ensure_ascii=False)
41
+ print(f"Wrote {len(rows)} studies to {csv_path} and {json_path}")
42
+
43
+
44
+ def main():
45
+ parser = argparse.ArgumentParser(
46
+ description="Download actively recruiting neuro-oncology trials from ClinicalTrials.gov v2 API (robust client)"
47
+ )
48
+ parser.add_argument(
49
+ "--diagnosis",
50
+ default="Glioblastoma",
51
+ choices=list(DEFAULT_DIAG_TERMS.keys()) + ["Other"],
52
+ help="Primary diagnosis category to search for.",
53
+ )
54
+ parser.add_argument(
55
+ "--keywords",
56
+ default="",
57
+ help="Extra keywords (comma-separated) to refine search.",
58
+ )
59
+ parser.add_argument("--age", type=int, default=55, help="Patient age (years)")
60
+ parser.add_argument("--kps", type=int, default=80, help="Karnofsky Performance Status (40-100)")
61
+ parser.add_argument("--prior-bev", action="store_true", help="Indicate prior bevacizumab exposure")
62
+ parser.add_argument(
63
+ "--setting",
64
+ default="Recurrent",
65
+ choices=["Newly diagnosed", "Recurrent"],
66
+ help="Disease setting",
67
+ )
68
+ parser.add_argument("--country", default="", help="Filter: require location country containing this text (case-insensitive)")
69
+ parser.add_argument("--require-country", action="store_true", help="If set, require at least one site in the given country text")
70
+ parser.add_argument("--csv", default="neuro_onc_trials.csv", help="CSV output path")
71
+ parser.add_argument("--json", default="neuro_onc_trials.json", help="JSON output path")
72
+ parser.add_argument("--page-size", type=int, default=100, help="Results per page per term (max 1000)")
73
+ parser.add_argument("--pages", type=int, default=5, help="Max pages to fetch per term")
74
+ args = parser.parse_args()
75
+
76
+ terms = build_terms(args.diagnosis, args.keywords)
77
+ print("Searching ClinicalTrials.gov for:")
78
+ print(" Diagnosis:", args.diagnosis)
79
+ if args.keywords:
80
+ print(" Extra keywords:", args.keywords)
81
+
82
+ studies = fetch_all_terms(terms, STATUSES, page_size=args.page_size, max_pages=args.pages)
83
+
84
+ rows: List[Dict[str, Any]] = []
85
+ skipped = 0
86
+ for s in studies:
87
+ try:
88
+ ps = (s.get("protocolSection", {}) or {})
89
+ locs = ((ps.get("contactsLocationsModule", {}) or {}).get("locations") or [])
90
+ if args.country and args.require_country:
91
+ locs = [L for L in locs if args.country.lower() in (L.get("locationCountry") or "").lower()]
92
+ if args.require_country and not locs:
93
+ continue
94
+ sc, reasons = score_trial(
95
+ s,
96
+ dict(
97
+ age=args.age,
98
+ kps=args.kps,
99
+ prior_bev=args.prior_bev,
100
+ setting=args.setting,
101
+ keywords=args.keywords,
102
+ diagnosis=args.diagnosis,
103
+ ),
104
+ )
105
+ base = extract_row(s)
106
+ base["score"] = sc
107
+ base["reasons"] = "; ".join(reasons)
108
+ base["url"] = f"https://clinicaltrials.gov/study/{base['nct']}" if base.get("nct") else ""
109
+ rows.append(base)
110
+ except Exception:
111
+ skipped += 1
112
+ continue
113
+
114
+ rows.sort(key=lambda x: -x.get("score", 0))
115
+ print(f"Fetched {len(studies)} trials; showing {len(rows)} after filters. Skipped {skipped}.")
116
+
117
+ save_results(rows, args.csv, args.json)
118
+
119
+
120
+ if __name__ == "__main__":
121
+ main()
README.md CHANGED
@@ -1,3 +1,69 @@
1
- ---
2
- license: apache-2.0
3
- ---
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Project: PRECISE-GBM - Model training & retraining helpers
2
+
3
+ Overview
4
+
5
+ This repository contains code to train models (Gaussian Mixture labelling + SVM and ensemble classifiers) and to persist all artifacts required to reproduce or retrain models on new data. It includes:
6
+
7
+ - `Scenario_heldout_final_PRECISE.py` — training pipeline producing `.joblib` models and metadata JSONs (selected features, best params, CV results).
8
+ - `retrain_helper.py` — CLI utility to rebuild pipelines, set best params and retrain using saved selected-features and params JSONs. Supports JSON/YAML config files and auto-detection of model type.
9
+ - `README_RETRAIN.md` — detailed retrain examples and a notebook cell.
10
+
11
+ This repo also includes helper files to make it ready for GitHub:
12
+ - `requirements.txt` — Python dependencies
13
+ - `.gitignore` — recommended ignores (models, caches, logs)
14
+ - `LICENSE` — MIT license
15
+ - GitHub Actions workflow for CI (pytest smoke test)
16
+
17
+ Getting started (Windows PowerShell)
18
+
19
+ 1) Create and activate a virtual environment
20
+
21
+ ```powershell
22
+ python -m venv .venv
23
+ .\.venv\Scripts\Activate.ps1
24
+ ```
25
+
26
+ 2) Install dependencies
27
+
28
+ ```powershell
29
+ pip install --upgrade pip
30
+ pip install -r requirements.txt
31
+ ```
32
+
33
+ 3) Run training (note: the training script reads data from absolute paths configured in the script — adjust them or run from an environment where those files are present)
34
+
35
+ ```powershell
36
+ python Scenario_heldout_final_PRECISE.py
37
+ ```
38
+
39
+ The training script will create model files under `models_LM22/` and `models_GBM/` and write metadata JSONs next to each joblib model (selected features, params, cv results) as well as group-level JSON summaries.
40
+
41
+ Retraining
42
+
43
+ See `README_RETRAIN.md` for detailed CLI and notebook examples. Short example:
44
+
45
+ ```powershell
46
+ python retrain_helper.py \
47
+ --model-prefix "models_GBM/scenario_1/GBM_scen1_Tcell" \
48
+ --train-csv "data\new_train.csv" \
49
+ --label-col "label"
50
+ ```
51
+
52
+ Notes
53
+
54
+ - The training script contains hard-coded absolute paths to data files. Before running on another machine, update the `scenarios_*` file paths or place the datasets in the same paths.
55
+ - Retrain helper auto-detects model type when `--model-type` is omitted by looking for `{prefix}_svm_params.json` or `{prefix}_ens_params.json`.
56
+ - YAML config support for retrain requires PyYAML (`pip install pyyaml`).
57
+
58
+ CI
59
+
60
+ A basic GitHub Actions workflow runs a smoke pytest to ensure the retrain helper imports and basic pipeline construction works. It does not run heavy training.
61
+
62
+ Contributing
63
+
64
+ See `CONTRIBUTING.md` for guidance on opening issues and PRs.
65
+
66
+ License
67
+
68
+ This project is released under the MIT License — see `LICENSE`.
69
+
ctgov_client.py ADDED
@@ -0,0 +1,225 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Shared client for ClinicalTrials.gov v2 API and scoring
2
+ import re
3
+ import requests
4
+ from typing import Any, Dict, List, Tuple
5
+
6
+ DEFAULT_DIAG_TERMS = {
7
+ "Glioblastoma": ["glioblastoma", "GBM", "glioblastoma multiforme"],
8
+ "Diffuse midline glioma": ["diffuse midline glioma", "DMG", "H3 K27M"],
9
+ "Anaplastic astrocytoma": ["anaplastic astrocytoma", "grade 3 astrocytoma"],
10
+ "Astrocytoma": ["astrocytoma", "grade 2 astrocytoma", "grade 4 astrocytoma"],
11
+ "Oligodendroglioma": ["oligodendroglioma", "1p19q codeleted"],
12
+ "Meningioma": ["meningioma"],
13
+ "Medulloblastoma": ["medulloblastoma"],
14
+ "Ependymoma": ["ependymoma"],
15
+ "Spinal cord tumor": ["spinal cord tumor", "spinal cord neoplasm"],
16
+ }
17
+
18
+ API_BASE = "https://clinicaltrials.gov/api/v2/studies"
19
+ UA = {"User-Agent": "BrainTrialsFinder-Desktop/1.0 (+https://clinicaltrials.gov)"}
20
+
21
+
22
+ def build_terms(diagnosis: str, keywords: str) -> List[str]:
23
+ terms: List[str] = []
24
+ if diagnosis in DEFAULT_DIAG_TERMS:
25
+ terms.extend(DEFAULT_DIAG_TERMS[diagnosis])
26
+ else:
27
+ terms.extend(["brain tumor", "spinal cord tumor", "CNS tumor"])
28
+ extra = [k.strip() for k in (keywords or "").split(",") if k.strip()]
29
+ return terms + extra
30
+
31
+
32
+ def ctgov_search_one(term: str, statuses: List[str], page_size: int = 100, max_pages: int = 5) -> List[Dict[str, Any]]:
33
+ session = requests.Session()
34
+ session.headers.update(UA)
35
+ all_studies: List[Dict[str, Any]] = []
36
+ page_token = None
37
+ count = 0
38
+ max_iters = max_pages or 0
39
+ while count < max_iters:
40
+ params = {
41
+ "query.term": term,
42
+ "filter.overallStatus": ",".join(statuses),
43
+ "pageSize": page_size,
44
+ }
45
+ if page_token:
46
+ params["pageToken"] = page_token
47
+ r = session.get(API_BASE, params=params, timeout=30)
48
+ r.raise_for_status()
49
+ data = r.json()
50
+ studies = data.get("studies", [])
51
+ if not studies:
52
+ break
53
+ all_studies.extend(studies)
54
+ page_token = data.get("nextPageToken")
55
+ if not page_token:
56
+ break
57
+ count += 1
58
+ return all_studies
59
+
60
+
61
+ def fetch_all_terms(terms: List[str], statuses: List[str], page_size=100, max_pages=5) -> List[Dict[str, Any]]:
62
+ dedup: Dict[str, Dict[str, Any]] = {}
63
+ for t in terms:
64
+ try:
65
+ for s in ctgov_search_one(t, statuses, page_size=page_size, max_pages=max_pages):
66
+ ident = (s.get("protocolSection", {}) or {}).get("identificationModule", {}) or {}
67
+ nct = ident.get("nctId")
68
+ key = nct or id(s)
69
+ if key not in dedup:
70
+ dedup[key] = s
71
+ except requests.HTTPError:
72
+ continue
73
+ return list(dedup.values())
74
+
75
+
76
+ def mentions(txt: str, term: str) -> bool:
77
+ return bool(re.search(rf"\b{re.escape(term)}\b", txt or "", re.I))
78
+
79
+
80
+ def as_text(obj: Any) -> str:
81
+ if obj is None:
82
+ return ""
83
+ if isinstance(obj, dict):
84
+ for k in ("textblock", "textBlock", "value"):
85
+ if k in obj:
86
+ return str(obj.get(k) or "")
87
+ return " ".join(str(v) for v in obj.values() if v is not None)
88
+ if isinstance(obj, list):
89
+ return "; ".join(as_text(x) for x in obj)
90
+ return str(obj)
91
+
92
+
93
+ def parse_age_to_int(v: Any):
94
+ if v is None:
95
+ return None
96
+ if isinstance(v, dict):
97
+ return parse_age_to_int(v.get("value"))
98
+ if isinstance(v, (int, float)):
99
+ return int(v)
100
+ m = re.search(r"(\d+)", str(v))
101
+ return int(m.group(1)) if m else None
102
+
103
+
104
+ def ensure_list(v: Any):
105
+ if v is None:
106
+ return []
107
+ if isinstance(v, list):
108
+ return v
109
+ return [v]
110
+
111
+
112
+ def score_trial(t: Dict[str, Any], intake: Dict[str, Any]) -> Tuple[int, List[str]]:
113
+ age_local = (intake or {}).get("age")
114
+ kps_local = (intake or {}).get("kps")
115
+ prior_bev_local = bool((intake or {}).get("prior_bev", False))
116
+ setting_local = (intake or {}).get("setting") or ""
117
+ keywords_local = (intake or {}).get("keywords") or ""
118
+ diagnosis_local = (intake or {}).get("diagnosis") or ""
119
+
120
+ if diagnosis_local in DEFAULT_DIAG_TERMS:
121
+ diag_terms = DEFAULT_DIAG_TERMS[diagnosis_local]
122
+ elif diagnosis_local and diagnosis_local != "Other":
123
+ diag_terms = [diagnosis_local]
124
+ else:
125
+ diag_terms = ["brain tumor", "CNS tumor", "spinal cord tumor"]
126
+
127
+ ps = (t or {}).get("protocolSection") or {}
128
+ elig = ps.get("eligibilityModule")
129
+ crit = ""
130
+ min_age = None
131
+ max_age = None
132
+ if isinstance(elig, dict):
133
+ crit_raw = elig.get("eligibilityCriteria") or elig.get("criteria") or elig
134
+ crit = as_text(crit_raw)
135
+ min_age = parse_age_to_int(elig.get("minimumAge"))
136
+ max_age = parse_age_to_int(elig.get("maximumAge"))
137
+ elif isinstance(elig, str):
138
+ crit = as_text(elig)
139
+
140
+ phases_list = ensure_list(ps.get("designModule", {}).get("phases"))
141
+ phases_up = [str(p).upper() for p in phases_list]
142
+ conds_list = ensure_list(ps.get("conditionsModule", {}).get("conditions"))
143
+ title = (ps.get("identificationModule", {}) or {}).get("briefTitle", "")
144
+
145
+ s = 0
146
+ reasons: List[str] = []
147
+ if any(any(mentions(c, term) for term in diag_terms) for c in conds_list) or any(mentions(title, term) for term in diag_terms):
148
+ s += 30
149
+ reasons.append(f"Matches diagnosis: {diagnosis_local or 'neuro-oncology'}.")
150
+ if any("PHASE 2" in p or "PHASE2" in p for p in phases_up):
151
+ s += 8
152
+ if any("PHASE 3" in p or "PHASE3" in p for p in phases_up):
153
+ s += 12
154
+ try:
155
+ if min_age is not None and age_local is not None and age_local < min_age:
156
+ reasons.append(f"Age below minimum ({min_age}).")
157
+ s -= 30
158
+ if max_age is not None and age_local is not None and age_local > max_age:
159
+ reasons.append(f"Age above maximum ({max_age}).")
160
+ s -= 30
161
+ except Exception:
162
+ pass
163
+ if mentions(crit, "ECOG 0-1") and (kps_local is None or kps_local < 80):
164
+ s -= 15
165
+ reasons.append("Requires ECOG 0–1 (KPS ~≥80).")
166
+ if mentions(crit, "Karnofsky") and (kps_local is None or kps_local < 70):
167
+ s -= 10
168
+ reasons.append("Requires KPS ≥70.")
169
+ if prior_bev_local and mentions(crit, "no prior bevacizumab"):
170
+ s -= 25
171
+ reasons.append("Excludes prior bevacizumab.")
172
+ if setting_local == "Recurrent" and mentions(crit, "recurrent"):
173
+ s += 8
174
+ if setting_local == "Newly diagnosed" and (mentions(crit, "newly diagnosed") or mentions(title, "adjuvant")):
175
+ s += 8
176
+ for kw in [k.strip() for k in (keywords_local or "").split(",") if k.strip()]:
177
+ if mentions(title, kw) or mentions(crit, kw):
178
+ s += 3
179
+ return max(0, min(100, s)), reasons
180
+ # python
181
+ def extract_row(study: dict) -> dict:
182
+ """Return a flat row dict for the table/PDF. Safe against missing fields."""
183
+ ps = (study.get("protocolSection") or {})
184
+ idm = (ps.get("identificationModule") or {})
185
+ scm = (ps.get("statusModule") or {})
186
+ dsm = (ps.get("designModule") or {})
187
+ cdnm = (ps.get("conditionsModule") or {})
188
+ slm = (ps.get("sponsorCollaboratorsModule") or {})
189
+ clm = (ps.get("contactsLocationsModule") or {})
190
+
191
+ title = (idm.get("officialTitle") or idm.get("briefTitle") or "").strip()
192
+ nct = (idm.get("nctId") or "").strip()
193
+
194
+ status_raw = (scm.get("overallStatus") or "").strip()
195
+ # e.g., RECRUITING -> Recruiting
196
+ status = status_raw.replace("_", " ").title() if status_raw else ""
197
+
198
+ phases_list = ensure_list(dsm.get("phases"))
199
+ phases = ", ".join(phases_list)
200
+
201
+ conditions = ", ".join(ensure_list(cdnm.get("conditions")))
202
+
203
+ sponsor = ""
204
+ lead = slm.get("leadSponsor") or {}
205
+ if isinstance(lead, dict):
206
+ sponsor = (lead.get("name") or "").strip()
207
+
208
+ city_country = ""
209
+ locs = ensure_list(clm.get("locations"))
210
+ if locs:
211
+ first = locs[0]
212
+ city = (first.get("locationCity") or "").strip()
213
+ country = (first.get("locationCountry") or "").strip()
214
+ parts = [p for p in [city, country] if p]
215
+ city_country = ", ".join(parts)
216
+
217
+ return {
218
+ "title": title,
219
+ "nct": nct,
220
+ "status": status,
221
+ "phases": phases,
222
+ "conditions": conditions,
223
+ "sponsor": sponsor,
224
+ "city_country": city_country,
225
+ }
desktop_app.py ADDED
@@ -0,0 +1,607 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # python
2
+ # Desktop GUI for Brain Trials Finder (no Streamlit)
3
+ # Run with: python desktop_app.py
4
+ import threading
5
+ import tkinter as tk
6
+ from tkinter import ttk, messagebox, filedialog
7
+ import urllib.parse
8
+ import webbrowser
9
+ from typing import List, Dict, Any
10
+
11
+ from ctgov_client import (
12
+ DEFAULT_DIAG_TERMS,
13
+ build_terms,
14
+ fetch_all_terms,
15
+ score_trial,
16
+ extract_row,
17
+ ensure_list,
18
+ )
19
+ from uk_sources import fetch_uk_trials
20
+ from euctr_client import fetch_eu_trials
21
+
22
+ STATUSES = ["RECRUITING", "NOT_YET_RECRUITING"]
23
+ COPYRIGHT = "© 2025 Brain Trials Finder | Prajwal Ghimire"
24
+ __copyright__ = COPYRIGHT
25
+
26
+ # Predefined NIHR UK location options for portal queries
27
+ UK_NIHR_LOCATIONS = [
28
+ "Nottingham",
29
+ "Liverpool",
30
+ "Preston",
31
+ "Brighton",
32
+ "Cardiff",
33
+ "Leeds",
34
+ "Plymouth",
35
+ "Coventry",
36
+ "Newcastle upon Tyne",
37
+ "Dundee",
38
+ "Cambridge",
39
+ "Birmingham",
40
+ "Hull",
41
+ "Stoke-on-Trent",
42
+ "Romford",
43
+ "Southampton",
44
+ "Bristol",
45
+ "Middlesbrough",
46
+ "London",
47
+ "Sheffield",
48
+ "Edinburgh",
49
+ "Oxford",
50
+ ]
51
+
52
+
53
+ class BrainTrialsApp(tk.Tk):
54
+ def __init__(self):
55
+ super().__init__()
56
+ self.title(f"Brain Cancer Trials Finder - Desktop App - {COPYRIGHT}")
57
+ self.geometry("1200x760")
58
+
59
+ # Inputs frame (top controls)
60
+ frm = ttk.Frame(self, padding=10)
61
+ frm.pack(fill="x")
62
+
63
+ # Diagnosis
64
+ ttk.Label(frm, text="Diagnosis:").grid(row=0, column=0, sticky=tk.W, padx=(0, 6))
65
+ diag_options = list(DEFAULT_DIAG_TERMS.keys()) + ["Other"]
66
+ self.diagnosis = tk.StringVar(value="Glioblastoma")
67
+ ttk.Combobox(frm, textvariable=self.diagnosis, values=diag_options, state="readonly", width=28).grid(row=0, column=1, sticky=tk.W)
68
+
69
+ # Setting
70
+ ttk.Label(frm, text="Setting:").grid(row=0, column=2, sticky=tk.W, padx=(16, 6))
71
+ self.setting = tk.StringVar(value="Recurrent")
72
+ ttk.Combobox(frm, textvariable=self.setting, values=["Newly diagnosed", "Recurrent"], state="readonly", width=20).grid(row=0, column=3, sticky=tk.W)
73
+
74
+ # Age
75
+ ttk.Label(frm, text="Age:").grid(row=0, column=4, sticky=tk.W, padx=(16, 6))
76
+ self.age = tk.IntVar(value=55)
77
+ tk.Spinbox(frm, from_=1, to=100, textvariable=self.age, width=6).grid(row=0, column=5, sticky=tk.W)
78
+
79
+ # KPS
80
+ ttk.Label(frm, text="KPS:").grid(row=0, column=6, sticky=tk.W, padx=(16, 6))
81
+ self.kps = tk.IntVar(value=80)
82
+ tk.Spinbox(frm, from_=40, to=100, increment=10, textvariable=self.kps, width=6).grid(row=0, column=7, sticky=tk.W)
83
+
84
+ # Prior bev
85
+ self.prior_bev = tk.BooleanVar(value=False)
86
+ ttk.Checkbutton(frm, text="Prior bevacizumab", variable=self.prior_bev).grid(row=1, column=1, sticky=tk.W, pady=(6, 0))
87
+
88
+ # Keywords
89
+ ttk.Label(frm, text="Keywords:").grid(row=1, column=2, sticky=tk.W, padx=(16, 6), pady=(6, 0))
90
+ self.keywords = tk.StringVar(value="immunotherapy,vaccine,device")
91
+ ttk.Entry(frm, textvariable=self.keywords, width=32).grid(row=1, column=3, sticky=tk.W, pady=(6, 0))
92
+
93
+ # Country filter (optional)
94
+ ttk.Label(frm, text="Country contains:").grid(row=1, column=4, sticky=tk.W, padx=(16, 6), pady=(6, 0))
95
+ self.country = tk.StringVar(value="")
96
+ ttk.Entry(frm, textvariable=self.country, width=18).grid(row=1, column=5, sticky=tk.W, pady=(6, 0))
97
+ self.require_country = tk.BooleanVar(value=False)
98
+ ttk.Checkbutton(frm, text="Require site in country", variable=self.require_country).grid(row=1, column=6, sticky=tk.W, pady=(6, 0))
99
+
100
+ # Buttons
101
+ self.btn_search = ttk.Button(frm, text="Search", command=self.on_search)
102
+ self.btn_search.grid(row=0, column=8, sticky=tk.W, padx=(16, 0))
103
+ self.status_lbl = ttk.Label(frm, text="Ready")
104
+ self.status_lbl.grid(row=1, column=8, sticky=tk.W, padx=(16, 0))
105
+
106
+ # UK Sources section
107
+ ukfrm = ttk.Labelframe(self, text="UK Sources", padding=10)
108
+ ukfrm.pack(fill="x", padx=10)
109
+ self.uk_use_ctgov = tk.BooleanVar(value=True)
110
+ ttk.Checkbutton(ukfrm, text="ClinicalTrials.gov (UK sites only)", variable=self.uk_use_ctgov).grid(row=0, column=0, sticky=tk.W)
111
+ # EU CTR toggle and controls
112
+ self.use_euctr = tk.BooleanVar(value=True)
113
+ ttk.Checkbutton(ukfrm, text="Include EU Clinical Trials Register (EUCTR)", variable=self.use_euctr).grid(row=0, column=3, sticky=tk.W)
114
+ ttk.Label(ukfrm, text="EUCTR delay (s):").grid(row=1, column=3, sticky=tk.W, padx=(8,0))
115
+ self.euctr_delay = tk.DoubleVar(value=0.8)
116
+ ttk.Entry(ukfrm, textvariable=self.euctr_delay, width=6).grid(row=1, column=4, sticky=tk.W)
117
+ ttk.Label(ukfrm, text="EUCTR max pages:").grid(row=1, column=5, sticky=tk.W, padx=(8,0))
118
+ self.euctr_maxpages = tk.IntVar(value=2)
119
+ ttk.Entry(ukfrm, textvariable=self.euctr_maxpages, width=4).grid(row=1, column=6, sticky=tk.W)
120
+ self.btn_search_uk = ttk.Button(ukfrm, text="Search UK", command=self.on_search_uk)
121
+ self.btn_search_uk.grid(row=0, column=1, padx=(16, 0))
122
+ # Separate EU search button (decoupled from main Search)
123
+ self.btn_search_eu = ttk.Button(ukfrm, text="Search EU", command=self.on_search_eu)
124
+ self.btn_search_eu.grid(row=0, column=4, padx=(8, 0))
125
+ ttk.Button(ukfrm, text="Save PDF", command=self.on_save_pdf).grid(row=0, column=2, padx=(16, 0))
126
+ # Open portal shortcuts
127
+ ttk.Button(ukfrm, text="Open NIHR", command=self.on_open_nihr).grid(row=1, column=0, pady=(8, 0), sticky=tk.W)
128
+ ttk.Button(ukfrm, text="Open ISRCTN (UK)", command=self.on_open_isrctn).grid(row=1, column=1, pady=(8, 0), sticky=tk.W)
129
+ ttk.Button(ukfrm, text="Open CRUK", command=self.on_open_cruk).grid(row=1, column=2, pady=(8, 0), sticky=tk.W)
130
+ # NIHR specific location (optional)
131
+ ttk.Label(ukfrm, text="NIHR location (optional):").grid(row=2, column=0, sticky=tk.W, pady=(8, 0))
132
+ self.uk_location = tk.StringVar(value="")
133
+ ttk.Combobox(ukfrm, textvariable=self.uk_location, values=UK_NIHR_LOCATIONS, width=28, state="readonly").grid(row=2, column=1, sticky=tk.W, pady=(8, 0))
134
+
135
+ # Results tree
136
+ cols = ("score", "title", "sponsor", "city_country", "status", "phases", "conditions", "nct", "source")
137
+ self.tree = ttk.Treeview(self, columns=cols, show="headings", height=18)
138
+ self.tree.pack(fill="both", expand=True, padx=10, pady=(6, 10))
139
+ self.tree.heading("score", text="Score")
140
+ self.tree.heading("title", text="Title")
141
+ self.tree.heading("sponsor", text="Sponsor")
142
+ self.tree.heading("city_country", text="City/Country")
143
+ self.tree.heading("status", text="Status")
144
+ self.tree.heading("phases", text="Phases")
145
+ self.tree.heading("conditions", text="Conditions")
146
+ self.tree.heading("nct", text="NCT ID")
147
+ self.tree.heading("source", text="Source")
148
+ self.tree.column("score", width=60, anchor="center")
149
+ self.tree.column("title", width=330)
150
+ self.tree.column("sponsor", width=220)
151
+ self.tree.column("city_country", width=160)
152
+ self.tree.column("status", width=120)
153
+ self.tree.column("phases", width=110)
154
+ self.tree.column("conditions", width=260)
155
+ self.tree.column("nct", width=120)
156
+ self.tree.column("source", width=120)
157
+ self.tree.bind("<Double-1>", self.on_open)
158
+ self.tree.bind("<<TreeviewSelect>>", self.on_select)
159
+
160
+ # Store per-row mappings
161
+ self._url_by_item: Dict[str, str] = {}
162
+ self._study_by_item: Dict[str, Dict[str, Any]] = {}
163
+ self._current_rows: List[Dict[str, Any]] = [] # rows currently displayed
164
+
165
+ # Contacts and Locations panel
166
+ infofrm = ttk.Labelframe(self, text="Contacts and Locations", padding=10)
167
+ infofrm.pack(fill="both", expand=True, padx=10, pady=(0, 10))
168
+ self.contacts_text = tk.Text(infofrm, height=12, wrap="word")
169
+ self.contacts_text.config(state="disabled")
170
+ scroll = ttk.Scrollbar(infofrm, orient="vertical", command=self.contacts_text.yview)
171
+ self.contacts_text.configure(yscrollcommand=scroll.set)
172
+ self.contacts_text.grid(row=0, column=0, sticky="nsew")
173
+ scroll.grid(row=0, column=1, sticky="ns")
174
+ infofrm.columnconfigure(0, weight=1)
175
+ infofrm.rowconfigure(0, weight=1)
176
+
177
+ # Initial load (use lambda to satisfy type checkers)
178
+ # Removed automatic search on startup; user must press the Search button to fetch results.
179
+ # self.after(100, lambda: self.on_search())
180
+
181
+ # ----- Portal helpers -----
182
+ def _build_portal_query(self) -> str:
183
+ diag = (self.diagnosis.get() or "").strip()
184
+ if diag and diag != "Other":
185
+ q = diag
186
+ else:
187
+ q = (self.keywords.get() or "").strip() or "brain tumour"
188
+ return urllib.parse.quote_plus(q)
189
+
190
+ def on_open_nihr(self):
191
+ q = self._build_portal_query()
192
+ base = "https://www.bepartofresearch.nihr.ac.uk/results/search-results"
193
+ loc_txt = (self.uk_location.get() or "").strip()
194
+ if loc_txt:
195
+ loc = urllib.parse.quote_plus(loc_txt)
196
+ url = f"{base}?query={q}&location={loc}"
197
+ else:
198
+ url = f"{base}?query={q}"
199
+ webbrowser.open_new_tab(url)
200
+
201
+ def on_open_isrctn(self):
202
+ q = self._build_portal_query()
203
+ url = f"https://www.isrctn.com/search?q={q}&countries=United%20Kingdom"
204
+ webbrowser.open_new_tab(url)
205
+
206
+ def on_open_cruk(self):
207
+ q = self._build_portal_query()
208
+ url = f"https://find.cancerresearchuk.org/clinical-trials?q={q}"
209
+ webbrowser.open_new_tab(url)
210
+
211
+ # ----- Actions -----
212
+ def on_open(self, event=None):
213
+ sel = self.tree.selection()
214
+ if not sel:
215
+ return
216
+ for iid in sel:
217
+ url = self._url_by_item.get(iid)
218
+ if url:
219
+ webbrowser.open_new_tab(url)
220
+ break
221
+
222
+ def on_select(self, event=None):
223
+ sel = self.tree.selection()
224
+ if not sel:
225
+ return
226
+ iid = sel[0]
227
+ study = self._study_by_item.get(iid)
228
+ if study:
229
+ self._populate_contacts(study)
230
+
231
+ def on_search(self):
232
+ self.btn_search.configure(state=tk.DISABLED)
233
+ self.btn_search_uk.configure(state=tk.DISABLED)
234
+ self.status_lbl.configure(text="Fetching…")
235
+ diagnosis = self.diagnosis.get()
236
+ setting = self.setting.get()
237
+ age = self.age.get()
238
+ kps = self.kps.get()
239
+ prior_bev = self.prior_bev.get()
240
+ keywords = self.keywords.get()
241
+ country = self.country.get().strip()
242
+ require_country = self.require_country.get()
243
+
244
+ def worker():
245
+ try:
246
+ terms = build_terms(diagnosis, keywords)
247
+ # Fetch ClinicalTrials.gov results only (no EUCTR fetch here)
248
+ studies = fetch_all_terms(terms, STATUSES, page_size=100, max_pages=5)
249
+ # Tag CTGov studies as source CTGov
250
+ source_map = {}
251
+ combined_entries = [{"study": s, "source": "CTGov"} for s in studies]
252
+ for e in combined_entries:
253
+ sst = e.get("study") or {}
254
+ psst = (sst.get("protocolSection") or {})
255
+ idm = (psst.get("identificationModule") or {})
256
+ nctid = idm.get("nctId") or idm.get("nct")
257
+ eudr = idm.get("eudractNumber") or idm.get("eudra") or idm.get("eudract")
258
+ if nctid:
259
+ source_map[str(nctid)] = e.get("source")
260
+ if eudr:
261
+ source_map[str(eudr)] = e.get("source")
262
+ rows: List[Dict[str, Any]] = []
263
+ skipped = 0
264
+ for s in studies:
265
+ try:
266
+ ps = (s.get("protocolSection", {}) or {})
267
+ clm = (ps.get("contactsLocationsModule", {}) or {})
268
+ locs = ensure_list(clm.get("locations"))
269
+ if country and require_country:
270
+ locs = [L for L in locs if country.lower() in (L.get("locationCountry") or "").lower()]
271
+ if require_country and not locs:
272
+ continue
273
+ intake = {
274
+ "age": age,
275
+ "kps": kps,
276
+ "prior_bev": prior_bev,
277
+ "setting": setting,
278
+ "keywords": keywords,
279
+ "diagnosis": diagnosis,
280
+ }
281
+ sc, reasons = score_trial(s, intake)
282
+ base = extract_row(s)
283
+ # Ensure city_country exists (fallback from first location)
284
+ if not base.get("city_country"):
285
+ first = locs[0] if locs else None
286
+ if first:
287
+ city = (first.get("locationCity") or "").strip()
288
+ country1 = (first.get("locationCountry") or "").strip()
289
+ parts = [p for p in [city, country1] if p]
290
+ if parts:
291
+ base["city_country"] = ", ".join(parts)
292
+
293
+ # CTGov search: mark source as CTGov (or preserve mapping if present)
294
+ nct_key = base.get("nct")
295
+ eudract_key = (s.get("protocolSection",{}).get("identificationModule",{}).get("eudractNumber"))
296
+ src = source_map.get(nct_key) or source_map.get(eudract_key) or "CTGov"
297
+ base["source"] = src
298
+ base["url"] = f"https://clinicaltrials.gov/study/{base['nct']}" if base.get("nct") else s.get("_source_url", "")
299
+ base["study"] = s
300
+ rows.append(base)
301
+ except Exception:
302
+ skipped += 1
303
+ continue
304
+ rows.sort(key=lambda x: -x.get("score", 0))
305
+ self.after(0, self._render_rows, rows, skipped, len(studies))
306
+ except Exception as e:
307
+ self.after(0, self._show_error, e)
308
+
309
+ threading.Thread(target=worker, daemon=True).start()
310
+
311
+ def on_search_uk(self):
312
+ self.btn_search.configure(state=tk.DISABLED)
313
+ self.btn_search_uk.configure(state=tk.DISABLED)
314
+ self.status_lbl.configure(text="Fetching UK trials…")
315
+ diagnosis = self.diagnosis.get()
316
+ setting = self.setting.get()
317
+ age = self.age.get()
318
+ kps = self.kps.get()
319
+ prior_bev = self.prior_bev.get()
320
+ keywords = self.keywords.get()
321
+ use_ctgov = self.uk_use_ctgov.get()
322
+
323
+ def worker():
324
+ try:
325
+ intake = {
326
+ "age": age,
327
+ "kps": kps,
328
+ "prior_bev": prior_bev,
329
+ "setting": setting,
330
+ "keywords": keywords,
331
+ "diagnosis": diagnosis,
332
+ }
333
+ rows, total_raw, skipped = fetch_uk_trials(diagnosis, keywords, intake, include_ctgov=use_ctgov)
334
+ self.after(0, self._render_rows, rows, skipped, total_raw)
335
+ except Exception as e:
336
+ self.after(0, self._show_error, e)
337
+
338
+ threading.Thread(target=worker, daemon=True).start()
339
+
340
+ def on_search_eu(self):
341
+ """Run EUCTR-only search and display results (separate button)."""
342
+ self.btn_search.configure(state=tk.DISABLED)
343
+ self.btn_search_uk.configure(state=tk.DISABLED)
344
+ self.btn_search_eu.configure(state=tk.DISABLED)
345
+ self.status_lbl.configure(text="Fetching EU trials…")
346
+ diagnosis = self.diagnosis.get()
347
+ keywords = self.keywords.get()
348
+ country = self.country.get().strip()
349
+ require_country = self.require_country.get()
350
+
351
+ def worker_eu():
352
+ try:
353
+ terms = build_terms(diagnosis, keywords)
354
+ eu_studies = []
355
+ try:
356
+ eu_studies = fetch_eu_trials(terms, STATUSES, page_size=50, max_pages=self.euctr_maxpages.get(), polite_delay=self.euctr_delay.get())
357
+ except Exception:
358
+ eu_studies = []
359
+ rows = []
360
+ skipped = 0
361
+ for s in eu_studies:
362
+ try:
363
+ ps = (s.get("protocolSection", {}) or {})
364
+ clm = (ps.get("contactsLocationsModule", {}) or {})
365
+ locs = ensure_list(clm.get("locations"))
366
+ if country and require_country:
367
+ locs = [L for L in locs if country.lower() in (L.get("locationCountry") or "").lower()]
368
+ if require_country and not locs:
369
+ continue
370
+ intake = {
371
+ "age": None,
372
+ "kps": None,
373
+ "prior_bev": False,
374
+ "setting": "",
375
+ "keywords": keywords,
376
+ "diagnosis": diagnosis,
377
+ }
378
+ # Reuse score_trial where possible (may be incomplete for EU studies)
379
+ try:
380
+ sc, reasons = score_trial(s, intake)
381
+ except Exception:
382
+ sc, reasons = 0, []
383
+ base = extract_row(s)
384
+ if not base.get("city_country"):
385
+ first = locs[0] if locs else None
386
+ if first:
387
+ city = (first.get("locationCity") or "").strip()
388
+ country1 = (first.get("locationCountry") or "").strip()
389
+ parts = [p for p in [city, country1] if p]
390
+ if parts:
391
+ base["city_country"] = ", ".join(parts)
392
+ base["score"] = sc
393
+ base["reasons"] = "; ".join(reasons)
394
+ base["source"] = "EUCTR"
395
+ base["url"] = s.get("_source_url", "")
396
+ base["study"] = s
397
+ rows.append(base)
398
+ except Exception:
399
+ skipped += 1
400
+ continue
401
+ rows.sort(key=lambda x: -x.get("score", 0))
402
+ self.after(0, self._render_rows, rows, skipped, len(eu_studies))
403
+ except Exception as e:
404
+ self.after(0, self._show_error, e)
405
+ finally:
406
+ self.btn_search.configure(state=tk.NORMAL)
407
+ self.btn_search_uk.configure(state=tk.NORMAL)
408
+ self.btn_search_eu.configure(state=tk.NORMAL)
409
+
410
+ threading.Thread(target=worker_eu, daemon=True).start()
411
+
412
+ # ----- Rendering & details -----
413
+ def _show_error(self, e: Exception):
414
+ self.btn_search.configure(state=tk.NORMAL)
415
+ self.btn_search_uk.configure(state=tk.NORMAL)
416
+ self.btn_search_eu.configure(state=tk.NORMAL)
417
+ self.status_lbl.configure(text="Error")
418
+ messagebox.showerror("Error", f"Failed to fetch trials.\n{e}")
419
+
420
+ def _render_rows(self, rows: List[Dict[str, Any]], skipped: int, total: int):
421
+ # Clear
422
+ for iid in self.tree.get_children():
423
+ self.tree.delete(iid)
424
+ self._url_by_item.clear()
425
+ self._study_by_item.clear()
426
+ self._current_rows = rows[:] # snapshot for export
427
+
428
+ # Insert
429
+ for r in rows[:300]:
430
+ # Prefer showing EU CT number (EudraCT) when NCT is absent
431
+ nct_display = r.get("nct") or ""
432
+ if not nct_display:
433
+ study = r.get("study") or {}
434
+ try:
435
+ nct_display = (study.get("protocolSection", {}).get("identificationModule", {}).get("eudractNumber")) or nct_display
436
+ except Exception:
437
+ nct_display = nct_display
438
+
439
+ # Prefer trial countries if city_country missing
440
+ city_country = r.get("city_country") or ""
441
+ if not city_country:
442
+ study = r.get("study") or {}
443
+ try:
444
+ locs = (study.get("protocolSection", {}).get("contactsLocationsModule", {}).get("locations")) or []
445
+ countries = []
446
+ for L in locs:
447
+ c = (L.get("locationCountry") or "").strip()
448
+ if c:
449
+ countries.append(c)
450
+ if countries:
451
+ city_country = ", ".join(countries)
452
+ except Exception:
453
+ pass
454
+
455
+ values = (
456
+ r.get("score", 0),
457
+ r.get("title", ""),
458
+ r.get("sponsor", ""),
459
+ city_country,
460
+ r.get("status", ""),
461
+ r.get("phases", ""),
462
+ r.get("conditions", ""),
463
+ nct_display,
464
+ r.get("source", ""),
465
+ )
466
+ iid = self.tree.insert("", "end", values=values)
467
+ if r.get("url"):
468
+ self._url_by_item[iid] = r["url"]
469
+ if r.get("study"):
470
+ self._study_by_item[iid] = r["study"]
471
+
472
+ txt = f"Fetched {total} trials; showing {len(rows)} after filters."
473
+ if skipped:
474
+ txt += f" Skipped {skipped}."
475
+ self.status_lbl.configure(text=txt)
476
+ self.btn_search.configure(state=tk.NORMAL)
477
+ self.btn_search_uk.configure(state=tk.NORMAL)
478
+ self.btn_search_eu.configure(state=tk.NORMAL)
479
+
480
+ def _populate_contacts(self, study: Dict[str, Any]):
481
+ ps = (study.get("protocolSection", {}) or {})
482
+ clm = (ps.get("contactsLocationsModule", {}) or {})
483
+ lines: List[str] = []
484
+
485
+ # Central contacts
486
+ centrals = ensure_list(clm.get("centralContacts"))
487
+ if centrals:
488
+ lines.append("Central Contacts:")
489
+ for c in centrals:
490
+ name = (c.get("name") or "").strip()
491
+ role = (c.get("role") or "").strip()
492
+ phone = (c.get("phone") or "").strip()
493
+ email = (c.get("email") or "").strip()
494
+ parts = [p for p in [name, role, phone, email] if p]
495
+ if parts:
496
+ lines.append(" - " + " | ".join(parts))
497
+
498
+ # Overall officials
499
+ officials = ensure_list(clm.get("overallOfficials"))
500
+ if officials:
501
+ lines.append("Overall Officials:")
502
+ for o in officials:
503
+ name = (o.get("name") or "").strip()
504
+ role = (o.get("role") or "").strip()
505
+ aff = (o.get("affiliation") or "").strip()
506
+ parts = [p for p in [name, role, aff] if p]
507
+ if parts:
508
+ lines.append(" - " + " | ".join(parts))
509
+
510
+ # Locations
511
+ locs = ensure_list(clm.get("locations"))
512
+ if locs:
513
+ lines.append("Locations:")
514
+ for L in locs:
515
+ facility = (L.get("locationFacility") or "").strip()
516
+ city = (L.get("locationCity") or "").strip()
517
+ state = (L.get("locationState") or "").strip()
518
+ country = (L.get("locationCountry") or "").strip()
519
+ status = (L.get("status") or "").strip()
520
+ site_line = ", ".join([p for p in [facility, city, state, country] if p])
521
+ if site_line:
522
+ if status:
523
+ lines.append(f" - {site_line} (status: {status})")
524
+ else:
525
+ lines.append(f" - {site_line}")
526
+ # per-location contacts
527
+ lcontacts = ensure_list(L.get("contacts")) or ensure_list(L.get("locationContacts"))
528
+ for lc in lcontacts:
529
+ lname = (lc.get("name") or "").strip()
530
+ lrole = (lc.get("role") or "").strip()
531
+ lphone = (lc.get("phone") or "").strip()
532
+ lemail = (lc.get("email") or "").strip()
533
+ parts = [p for p in [lname, lrole, lphone, lemail] if p]
534
+ if parts:
535
+ lines.append(" • " + " | ".join(parts))
536
+
537
+ if not lines:
538
+ lines.append("No contacts/locations provided by sponsor at this time.")
539
+
540
+ self.contacts_text.config(state="normal")
541
+ self.contacts_text.delete("1.0", tk.END)
542
+ self.contacts_text.insert(tk.END, "\n".join(lines))
543
+ self.contacts_text.config(state="disabled")
544
+
545
+ # ----- PDF export -----
546
+ def on_save_pdf(self):
547
+ if not self._current_rows:
548
+ messagebox.showinfo("Save PDF", "No results to export. Perform a search first.")
549
+ return
550
+ path = filedialog.asksaveasfilename(
551
+ title="Save PDF",
552
+ defaultextension=".pdf",
553
+ filetypes=[("PDF files", "*.pdf")],
554
+ initialfile="brain_trials_results.pdf",
555
+ )
556
+ if not path:
557
+ return
558
+ try:
559
+ self._export_pdf(self._current_rows, path)
560
+ messagebox.showinfo("Save PDF", f"Saved: {path}")
561
+ except Exception as e:
562
+ messagebox.showerror("Save PDF", f"Failed to create PDF.\n{e}")
563
+
564
+ def _export_pdf(self, rows: List[Dict[str, Any]], path: str):
565
+ from reportlab.lib.pagesizes import A4
566
+ from reportlab.lib.styles import getSampleStyleSheet
567
+ from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer
568
+ from reportlab.lib.units import mm
569
+
570
+ doc = SimpleDocTemplate(path, pagesize=A4, leftMargin=15 * mm, rightMargin=15 * mm, topMargin=15 * mm, bottomMargin=15 * mm)
571
+ styles = getSampleStyleSheet()
572
+ story = []
573
+
574
+ story.append(Paragraph("Brain Cancer Trials – Results", styles["Title"]))
575
+ story.append(Spacer(1, 6))
576
+ story.append(Paragraph(f"Total shown: {len(rows)}", styles["Normal"]))
577
+ story.append(Spacer(1, 12))
578
+ story.append(Paragraph(COPYRIGHT, styles["Normal"]))
579
+
580
+ for r in rows:
581
+ title = r.get("title", "")
582
+ nct = r.get("nct", "")
583
+ sponsor = r.get("sponsor", "")
584
+ status = r.get("status", "")
585
+ phases = r.get("phases", "")
586
+ city_country = r.get("city_country", "")
587
+ score = r.get("score", 0)
588
+ url = r.get("url") or (f"https://clinicaltrials.gov/study/{nct}" if nct else "")
589
+ story.append(Paragraph(f"<b>{title}</b>", styles["Heading4"]))
590
+ meta = (
591
+ f"NCT: {nct or '—'} | Sponsor: {sponsor or '—'} | City/Country: {city_country or '—'} | "
592
+ f"Status: {status or '—'} | Phases: {phases or '—'} | Score: {score}"
593
+ )
594
+ story.append(Paragraph(meta, styles["Normal"]))
595
+ if url:
596
+ story.append(Paragraph(f"URL: <a href='{url}' color='blue'>{url}</a>", styles["Normal"]))
597
+ source_txt = r.get("source", "")
598
+ if source_txt:
599
+ story.append(Paragraph(f"Source: {source_txt}", styles["Normal"]))
600
+ story.append(Spacer(1, 8))
601
+
602
+ doc.build(story)
603
+
604
+
605
+ if __name__ == "__main__":
606
+ app = BrainTrialsApp()
607
+ app.mainloop()
logo_precise.png ADDED

Git LFS Details

  • SHA256: 6cfa9f491d5d7b74224d627ac69d5093d45fd9a80f1e110f2de31f315e53db9e
  • Pointer size: 131 Bytes
  • Size of remote file: 145 kB
runtime.txt ADDED
@@ -0,0 +1 @@
 
 
1
+ python-3.10
streamlit_app.py ADDED
@@ -0,0 +1,12 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Streamlit Cloud entrypoint. We reuse your existing app as-is.
2
+ # Main requirement: keep the module name and path exactly as below so Linux (Streamlit Cloud) can import it.
3
+
4
+ # IMPORTANT: Do not call st.set_page_config here to avoid double configuration.
5
+ # The original module handles all Streamlit layout and rendering.
6
+
7
+ try:
8
+ import GUI_CLinicalTrial # noqa: F401 # importing runs the Streamlit app defined at top-level
9
+ except Exception as e:
10
+ import streamlit as st
11
+ st.error("Failed to import GUI_CLinicalTrial.py. Ensure the file exists and has no syntax errors.")
12
+ st.exception(e)
uk_sources.py ADDED
@@ -0,0 +1,80 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # UK sources aggregator (initial: ClinicalTrials.gov UK filter)
2
+ from typing import List, Dict, Any, Tuple
3
+
4
+ from ctgov_client import (
5
+ build_terms,
6
+ fetch_all_terms,
7
+ score_trial,
8
+ extract_row,
9
+ )
10
+
11
+ STATUSES = ["RECRUITING", "NOT_YET_RECRUITING"]
12
+
13
+
14
+ def _normalize_key(row: Dict[str, Any]) -> str:
15
+ # Prefer identifiers; fallback to normalized title
16
+ nct = (row.get("nct") or "").strip()
17
+ if nct:
18
+ return f"NCT:{nct}"
19
+ title = (row.get("title") or "").lower().strip()
20
+ return f"TITLE:{title}"
21
+
22
+
23
+ def fetch_uk_trials(
24
+ diagnosis: str,
25
+ keywords: str,
26
+ intake: Dict[str, Any],
27
+ include_ctgov: bool = True,
28
+ ) -> Tuple[List[Dict[str, Any]], int, int]:
29
+ """
30
+ Fetch UK trials across selected sources.
31
+ Currently implemented: ClinicalTrials.gov with UK site filter.
32
+
33
+ Returns: (rows, total_raw, skipped)
34
+ rows: list of standard rows with keys: title, nct, status, phases, conditions, site, score, reasons, url
35
+ total_raw: number of raw studies fetched before filters
36
+ skipped: number of studies skipped due to formatting issues
37
+ """
38
+ terms = build_terms(diagnosis, keywords)
39
+ rows: List[Dict[str, Any]] = []
40
+ skipped = 0
41
+ total_raw = 0
42
+
43
+ if include_ctgov:
44
+ studies = fetch_all_terms(terms, STATUSES, page_size=100, max_pages=5)
45
+ total_raw += len(studies)
46
+ for s in studies:
47
+ try:
48
+ ps = (s.get("protocolSection", {}) or {})
49
+ locs = ((ps.get("contactsLocationsModule", {}) or {}).get("locations") or [])
50
+ # UK filter (case-insensitive contains)
51
+ uk_locs = [L for L in locs if "united kingdom" in (L.get("locationCountry") or "").lower()]
52
+ if not uk_locs:
53
+ continue
54
+ sc, reasons = score_trial(s, intake)
55
+ base = extract_row(s)
56
+ # Replace site with first UK site
57
+ first_site = next(iter(uk_locs), {})
58
+ base["site"] = f"{first_site.get('locationFacility','')}, {first_site.get('locationCity','')}, {first_site.get('locationCountry','')}"
59
+ base["score"] = sc
60
+ base["reasons"] = "; ".join(reasons)
61
+ base["url"] = f"https://clinicaltrials.gov/study/{base['nct']}" if base.get("nct") else ""
62
+ rows.append(base)
63
+ except Exception:
64
+ skipped += 1
65
+ continue
66
+
67
+ # Deduplicate
68
+ seen = set()
69
+ deduped: List[Dict[str, Any]] = []
70
+ for r in rows:
71
+ k = _normalize_key(r)
72
+ if k in seen:
73
+ continue
74
+ seen.add(k)
75
+ deduped.append(r)
76
+
77
+ # Sort
78
+ deduped.sort(key=lambda x: -x.get("score", 0))
79
+ return deduped, total_raw, skipped
80
+