Upload 12 files

Browse files

Files changed (13) hide show

.gitattributes +2 -0
.gitignore +21 -0
Brain Cancer Trial Finder.exe +3 -0
GUI_CLinicalTrial.py +289 -0
GUI_CLinicalTrial.spec +44 -0
Neuro_onc_clinicalTrial.py +121 -0
README.md +69 -3
ctgov_client.py +225 -0
desktop_app.py +607 -0
logo_precise.png +3 -0
runtime.txt +1 -0
streamlit_app.py +12 -0
uk_sources.py +80 -0

.gitattributes CHANGED Viewed

@@ -33,3 +33,5 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text

 *.zip filter=lfs diff=lfs merge=lfs -text
 *.zst filter=lfs diff=lfs merge=lfs -text
 *tfevents* filter=lfs diff=lfs merge=lfs -text
+Brain[[:space:]]Cancer[[:space:]]Trial[[:space:]]Finder.exe filter=lfs diff=lfs merge=lfs -text
+logo_precise.png filter=lfs diff=lfs merge=lfs -text

.gitignore ADDED Viewed

	@@ -0,0 +1,21 @@

+# Python
+__pycache__/
+*.py[cod]
+*.pyo
+.venv/
+.env
+# Data, models, caches
+models_*/
+models*/
+cache_dir/
+*.joblib
+*.pkl
+# Logs
+*.log
+# IDE
+.vscode/
+.idea/

Brain Cancer Trial Finder.exe ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:977b04e5ded6299f555c6490cc29a88559dd6a1bdfaab2df2e405aa37a0219da
+size 4759201

GUI_CLinicalTrial.py ADDED Viewed

	@@ -0,0 +1,289 @@

+# python
+# GUI_CLinicalTrial.py — run with: streamlit run GUI_CLinicalTrial.py
+import re
+import requests
+import streamlit as st
+st.set_page_config(page_title="Brain Trials Finder", layout="wide")
+STATUSES = ["RECRUITING", "NOT_YET_RECRUITING"]
+DEFAULT_DIAG_TERMS = {
+    "Glioblastoma": ["glioblastoma", "GBM", "grade 4 astrocytoma"],
+    "Diffuse midline glioma": ["diffuse midline glioma", "DMG"],
+    "Anaplastic astrocytoma": ["anaplastic astrocytoma", "grade 3 astrocytoma"],
+}
+def ensure_list(v):
+    if isinstance(v, list):
+        return v
+    if v is None:
+        return []
+    return [v]
+def mentions(text: str, needle: str) -> bool:
+    if not text:
+        return False
+    return needle.lower() in text.lower()
+def _to_int(v):
+    try:
+        if v is None:
+            return None
+        if isinstance(v, (int, float)):
+            return int(v)
+        # Extract first integer from strings like "18 Years"
+        m = re.search(r"(\d+)", str(v))
+        return int(m.group(1)) if m else None
+    except Exception:
+        return None
+def build_terms(diagnosis: str, keywords: str):
+    base = DEFAULT_DIAG_TERMS.get(diagnosis, [])
+    extra = [k.strip() for k in (keywords or "").split(",") if k.strip()]
+    terms = list(dict.fromkeys([*base, *extra]))  # de-duplicate preserve order
+    return terms or ["brain tumor"]
+def build_expr(diagnosis: str, keywords: str) -> str:
+    terms = build_terms(diagnosis, keywords)
+    # Simple OR query; v2 tokenizes internally
+    return " OR ".join(f'"{t}"' if " " in t else t for t in terms)
+@st.cache_data(ttl=3600)
+def ctgov_search(expr: str, statuses, page_size: int = 100, max_pages: int = 5):
+    """Return a list of study dicts from ClinicalTrials.gov v2."""
+    url = "https://clinicaltrials.gov/api/v2/studies"
+    all_studies = []
+    token = None
+    for _ in range(max_pages):
+        params = {
+            "query.term": expr,
+            "pageSize": page_size,
+            "filter.overallStatus": ",".join(statuses),
+        }
+        if token:
+            params["pageToken"] = token
+        r = requests.get(url, params=params, timeout=30)
+        r.raise_for_status()
+        data = r.json() or {}
+        studies = data.get("studies") or []
+        all_studies.extend(studies)
+        token = data.get("nextPageToken")
+        if not token:
+            break
+    return all_studies
+def extract_row(study: dict) -> dict:
+    ps = (study.get("protocolSection") or {})
+    idm = (ps.get("identificationModule") or {})
+    scm = (ps.get("statusModule") or {})
+    dsm = (ps.get("designModule") or {})
+    cdnm = (ps.get("conditionsModule") or {})
+    slm = (ps.get("sponsorCollaboratorsModule") or {})
+    title = (idm.get("officialTitle") or idm.get("briefTitle") or "").strip()
+    nct = (idm.get("nctId") or "").strip()
+    status_raw = (scm.get("overallStatus") or "").strip()
+    status = status_raw.replace("_", " ").title() if status_raw else ""
+    phases_list = ensure_list(dsm.get("phases"))
+    # Pretty print phases like "PHASE2" -> "Phase 2"
+    def fmt_phase(p: str) -> str:
+        p = str(p or "").upper()
+        if p.startswith("PHASE"):
+            pnum = p.replace("PHASE", "").replace("_", "/").strip()
+            pnum = pnum.replace("1/2", "1/2").replace("2/3", "2/3")
+            return f"Phase {pnum}" if pnum else "Phase"
+        return p.title() if p else ""
+    phases = ", ".join([fmt_phase(p) for p in phases_list if p])
+    conditions = ", ".join(ensure_list(cdnm.get("conditions")))
+    sponsor = ""
+    lead = slm.get("leadSponsor") or {}
+    if isinstance(lead, dict):
+        sponsor = (lead.get("name") or "").strip()
+    return {
+        "title": title,
+        "nct": nct,
+        "status": status,
+        "phases": phases,
+        "conditions": conditions,
+        "sponsor": sponsor,
+    }
+def score_trial(study: dict, intake: dict):
+    ps = (study.get("protocolSection") or {})
+    scm = (ps.get("statusModule") or {})
+    dsm = (ps.get("designModule") or {})
+    elm = (ps.get("eligibilityModule") or {})
+    idm = (ps.get("identificationModule") or {})
+    s = 0
+    reasons = []
+    status = (scm.get("overallStatus") or "")
+    if status == "RECRUITING":
+        s += 15
+    elif status == "NOT_YET_RECRUITING":
+        s += 8
+    phases = ensure_list(dsm.get("phases"))
+    if any("PHASE3" in str(p).upper() for p in phases):
+        s += 12
+    if any("PHASE2" in str(p).upper() for p in phases):
+        s += 8
+    # Age checks
+    min_age_raw = elm.get("minimumAge")
+    max_age_raw = elm.get("maximumAge")
+    min_age = _to_int(min_age_raw)
+    max_age = _to_int(max_age_raw)
+    age = int(intake.get("age") or 0)
+    if min_age is not None and age < min_age:
+        reasons.append(f"Age below minimum ({min_age_raw}).")
+        s -= 30
+    if max_age is not None and age > max_age:
+        reasons.append(f"Age above maximum ({max_age_raw}).")
+        s -= 30
+    # KPS heuristic from criteria text
+    crit = elm.get("eligibilityCriteria") or ""
+    kps = int(intake.get("kps") or 0)
+    if mentions(crit, "Karnofsky") and kps < 70:
+        s -= 10
+        reasons.append("Requires KPS ≥70.")
+    # Keyword bonus
+    title = (idm.get("briefTitle") or idm.get("officialTitle") or "")
+    summary = (ps.get("descriptionModule", {}) or {}).get("briefSummary") or ""
+    keywords = [k.strip() for k in (intake.get("keywords") or "").split(",") if k.strip()]
+    blob = " ".join([title, summary])
+    for kw in keywords:
+        if mentions(blob, kw):
+            s += 2
+    return s, reasons
+# UI
+st.title("Brain Cancer Trials Finder (MVP)")
+with st.sidebar:
+    diagnosis = st.selectbox(
+        "Diagnosis",
+        ["Glioblastoma", "Diffuse midline glioma", "Anaplastic astrocytoma", "Other"],
+        index=0,
+    )
+    setting = st.selectbox("Setting", ["Newly diagnosed", "Recurrent"], index=1)
+    age = st.number_input("Age", min_value=1, max_value=100, value=55)
+    kps = st.slider("Karnofsky (KPS)", min_value=40, max_value=100, step=10, value=80)
+    prior_bev = st.checkbox("Prior bevacizumab", value=False)
+    keywords = st.text_input("Keywords (comma-separated)", value="immunotherapy,vaccine,device")
+    do_search = st.button("Search", type="primary")
+# Trigger search on first load too
+if do_search or "did_first" not in st.session_state:
+    st.session_state["did_first"] = True
+    expr = build_expr(diagnosis, keywords)
+    studies = ctgov_search(expr, STATUSES, page_size=100, max_pages=5)
+    intake = {
+        "age": age,
+        "kps": kps,
+        "prior_bev": prior_bev,
+        "setting": setting,
+        "keywords": keywords,
+        "diagnosis": diagnosis,
+    }
+    rows = []
+    for sdict in studies:
+        try:
+            sc, reasons = score_trial(sdict, intake)
+            row = extract_row(sdict)
+            nct = row.get("nct") or ""
+            url = f"https://clinicaltrials.gov/study/{nct}" if nct else ""
+            rows.append(
+                (
+                    sc,
+                    row.get("title", ""),
+                    nct,
+                    row.get("status", ""),
+                    row.get("phases", ""),
+                    row.get("conditions", ""),
+                    row.get("sponsor", ""),
+                    reasons,
+                    url,
+                    sdict,
+                )
+            )
+        except Exception:
+            continue
+    rows = sorted(rows, key=lambda x: -x[0])[:50]
+    st.caption(f"Found {len(studies)} studies; showing top {len(rows)} by score.")
+    for sc, title, nct, status, phases, conds, sponsor, reasons, url, study in rows:
+        with st.container(border=True):
+            if url:
+                st.markdown(f"**[{title}]({url})**")
+            else:
+                st.markdown(f"**{title}**")
+            meta = f"NCT: {nct or '—'} · Sponsor: {sponsor or '—'} · Status: {status or '—'} · Phases: {phases or '—'} · Score: {sc}"
+            st.write(meta)
+            if conds:
+                st.write(f"Conditions: {conds}")
+            with st.expander("Contacts and Locations"):
+                ps = (study.get("protocolSection") or {})
+                clm = (ps.get("contactsLocationsModule") or {})
+                centrals = ensure_list(clm.get("centralContacts"))
+                if centrals:
+                    st.write("Central Contacts:")
+                    for c in centrals:
+                        parts = [c.get("name"), c.get("role"), c.get("phone"), c.get("email")]
+                        st.write(" - " + " | ".join([p for p in parts if p]))
+                officials = ensure_list(clm.get("overallOfficials"))
+                if officials:
+                    st.write("Overall Officials:")
+                    for o in officials:
+                        parts = [o.get("name"), o.get("role"), o.get("affiliation")]
+                        st.write(" - " + " | ".join([p for p in parts if p]))
+                locs = ensure_list(clm.get("locations"))
+                if locs:
+                    st.write("Locations:")
+                    for L in locs:
+                        facility = (L.get("locationFacility") or "").strip()
+                        city = (L.get("locationCity") or "").strip()
+                        state = (L.get("locationState") or "").strip()
+                        country = (L.get("locationCountry") or "").strip()
+                        status_l = (L.get("status") or "").strip()
+                        site_line = ", ".join([p for p in [facility, city, state, country] if p])
+                        if site_line:
+                            st.write(f" - {site_line}" + (f" (status: {status_l})" if status_l else ""))
+                        lcontacts = ensure_list(L.get("contacts")) or ensure_list(L.get("locationContacts"))
+                        for lc in lcontacts:
+                            parts = [lc.get("name"), lc.get("role"), lc.get("phone"), lc.get("email")]
+                            parts = [p for p in parts if p]
+                            if parts:
+                                st.write("    • " + " | ".join(parts))
+            if reasons:
+                with st.expander("Why this score?"):
+                    for r in reasons:
+                        st.write(f"- {r}")

GUI_CLinicalTrial.spec ADDED Viewed

	@@ -0,0 +1,44 @@

+# -*- mode: python ; coding: utf-8 -*-
+a = Analysis(
+    ['GUI_CLinicalTrial.py'],
+    pathex=[],
+    binaries=[],
+    datas=[],
+    hiddenimports=[],
+    hookspath=[],
+    hooksconfig={},
+    runtime_hooks=[],
+    excludes=[],
+    noarchive=False,
+    optimize=0,
+)
+pyz = PYZ(a.pure)
+exe = EXE(
+    pyz,
+    a.scripts,
+    [],
+    exclude_binaries=True,
+    name='GUI_CLinicalTrial',
+    debug=False,
+    bootloader_ignore_signals=False,
+    strip=False,
+    upx=True,
+    console=True,
+    disable_windowed_traceback=False,
+    argv_emulation=False,
+    target_arch=None,
+    codesign_identity=None,
+    entitlements_file=None,
+)
+coll = COLLECT(
+    exe,
+    a.binaries,
+    a.datas,
+    strip=False,
+    upx=True,
+    upx_exclude=[],
+    name='GUI_CLinicalTrial',
+)

Neuro_onc_clinicalTrial.py ADDED Viewed

	@@ -0,0 +1,121 @@

+#!/usr/bin/env python3
+import argparse
+import csv
+import json
+from typing import List, Dict, Any
+from ctgov_client import (
+    DEFAULT_DIAG_TERMS,
+    build_terms,
+    fetch_all_terms,
+    score_trial,
+    extract_row,
+)
+STATUSES = ["RECRUITING", "NOT_YET_RECRUITING"]
+def save_results(rows: List[Dict[str, Any]], csv_path: str, json_path: str):
+    if not rows:
+        print("No studies found.")
+        return
+    # stable header order
+    keys = [
+        "score",
+        "title",
+        "nct",
+        "url",
+        "status",
+        "phases",
+        "conditions",
+        "site",
+        "reasons",
+    ]
+    with open(csv_path, "w", newline="", encoding="utf-8") as f:
+        writer = csv.DictWriter(f, fieldnames=keys)
+        writer.writeheader()
+        for r in rows:
+            writer.writerow({k: r.get(k, "") for k in keys})
+    with open(json_path, "w", encoding="utf-8") as f:
+        json.dump(rows, f, indent=2, ensure_ascii=False)
+    print(f"Wrote {len(rows)} studies to {csv_path} and {json_path}")
+def main():
+    parser = argparse.ArgumentParser(
+        description="Download actively recruiting neuro-oncology trials from ClinicalTrials.gov v2 API (robust client)"
+    )
+    parser.add_argument(
+        "--diagnosis",
+        default="Glioblastoma",
+        choices=list(DEFAULT_DIAG_TERMS.keys()) + ["Other"],
+        help="Primary diagnosis category to search for.",
+    )
+    parser.add_argument(
+        "--keywords",
+        default="",
+        help="Extra keywords (comma-separated) to refine search.",
+    )
+    parser.add_argument("--age", type=int, default=55, help="Patient age (years)")
+    parser.add_argument("--kps", type=int, default=80, help="Karnofsky Performance Status (40-100)")
+    parser.add_argument("--prior-bev", action="store_true", help="Indicate prior bevacizumab exposure")
+    parser.add_argument(
+        "--setting",
+        default="Recurrent",
+        choices=["Newly diagnosed", "Recurrent"],
+        help="Disease setting",
+    )
+    parser.add_argument("--country", default="", help="Filter: require location country containing this text (case-insensitive)")
+    parser.add_argument("--require-country", action="store_true", help="If set, require at least one site in the given country text")
+    parser.add_argument("--csv", default="neuro_onc_trials.csv", help="CSV output path")
+    parser.add_argument("--json", default="neuro_onc_trials.json", help="JSON output path")
+    parser.add_argument("--page-size", type=int, default=100, help="Results per page per term (max 1000)")
+    parser.add_argument("--pages", type=int, default=5, help="Max pages to fetch per term")
+    args = parser.parse_args()
+    terms = build_terms(args.diagnosis, args.keywords)
+    print("Searching ClinicalTrials.gov for:")
+    print("  Diagnosis:", args.diagnosis)
+    if args.keywords:
+        print("  Extra keywords:", args.keywords)
+    studies = fetch_all_terms(terms, STATUSES, page_size=args.page_size, max_pages=args.pages)
+    rows: List[Dict[str, Any]] = []
+    skipped = 0
+    for s in studies:
+        try:
+            ps = (s.get("protocolSection", {}) or {})
+            locs = ((ps.get("contactsLocationsModule", {}) or {}).get("locations") or [])
+            if args.country and args.require_country:
+                locs = [L for L in locs if args.country.lower() in (L.get("locationCountry") or "").lower()]
+            if args.require_country and not locs:
+                continue
+            sc, reasons = score_trial(
+                s,
+                dict(
+                    age=args.age,
+                    kps=args.kps,
+                    prior_bev=args.prior_bev,
+                    setting=args.setting,
+                    keywords=args.keywords,
+                    diagnosis=args.diagnosis,
+                ),
+            )
+            base = extract_row(s)
+            base["score"] = sc
+            base["reasons"] = "; ".join(reasons)
+            base["url"] = f"https://clinicaltrials.gov/study/{base['nct']}" if base.get("nct") else ""
+            rows.append(base)
+        except Exception:
+            skipped += 1
+            continue
+    rows.sort(key=lambda x: -x.get("score", 0))
+    print(f"Fetched {len(studies)} trials; showing {len(rows)} after filters. Skipped {skipped}.")
+    save_results(rows, args.csv, args.json)
+if __name__ == "__main__":
+    main()

README.md CHANGED Viewed

@@ -1,3 +1,69 @@
----
-license: apache-2.0
----

+Project: PRECISE-GBM - Model training & retraining helpers
+Overview
+This repository contains code to train models (Gaussian Mixture labelling + SVM and ensemble classifiers) and to persist all artifacts required to reproduce or retrain models on new data. It includes:
+- `Scenario_heldout_final_PRECISE.py` — training pipeline producing `.joblib` models and metadata JSONs (selected features, best params, CV results).
+- `retrain_helper.py` — CLI utility to rebuild pipelines, set best params and retrain using saved selected-features and params JSONs. Supports JSON/YAML config files and auto-detection of model type.
+- `README_RETRAIN.md` — detailed retrain examples and a notebook cell.
+This repo also includes helper files to make it ready for GitHub:
+- `requirements.txt` — Python dependencies
+- `.gitignore` — recommended ignores (models, caches, logs)
+- `LICENSE` — MIT license
+- GitHub Actions workflow for CI (pytest smoke test)
+Getting started (Windows PowerShell)
+1) Create and activate a virtual environment
+```powershell
+python -m venv .venv
+.\.venv\Scripts\Activate.ps1
+```
+2) Install dependencies
+```powershell
+pip install --upgrade pip
+pip install -r requirements.txt
+```
+3) Run training (note: the training script reads data from absolute paths configured in the script — adjust them or run from an environment where those files are present)
+```powershell
+python Scenario_heldout_final_PRECISE.py
+```
+The training script will create model files under `models_LM22/` and `models_GBM/` and write metadata JSONs next to each joblib model (selected features, params, cv results) as well as group-level JSON summaries.
+Retraining
+See `README_RETRAIN.md` for detailed CLI and notebook examples. Short example:
+```powershell
+python retrain_helper.py \
+  --model-prefix "models_GBM/scenario_1/GBM_scen1_Tcell" \
+  --train-csv "data\new_train.csv" \
+  --label-col "label"
+```
+Notes
+- The training script contains hard-coded absolute paths to data files. Before running on another machine, update the `scenarios_*` file paths or place the datasets in the same paths.
+- Retrain helper auto-detects model type when `--model-type` is omitted by looking for `{prefix}_svm_params.json` or `{prefix}_ens_params.json`.
+- YAML config support for retrain requires PyYAML (`pip install pyyaml`).
+CI
+A basic GitHub Actions workflow runs a smoke pytest to ensure the retrain helper imports and basic pipeline construction works. It does not run heavy training.
+Contributing
+See `CONTRIBUTING.md` for guidance on opening issues and PRs.
+License
+This project is released under the MIT License — see `LICENSE`.

ctgov_client.py ADDED Viewed

	@@ -0,0 +1,225 @@

+# Shared client for ClinicalTrials.gov v2 API and scoring
+import re
+import requests
+from typing import Any, Dict, List, Tuple
+DEFAULT_DIAG_TERMS = {
+    "Glioblastoma": ["glioblastoma", "GBM", "glioblastoma multiforme"],
+    "Diffuse midline glioma": ["diffuse midline glioma", "DMG", "H3 K27M"],
+    "Anaplastic astrocytoma": ["anaplastic astrocytoma", "grade 3 astrocytoma"],
+    "Astrocytoma": ["astrocytoma", "grade 2 astrocytoma", "grade 4 astrocytoma"],
+    "Oligodendroglioma": ["oligodendroglioma", "1p19q codeleted"],
+    "Meningioma": ["meningioma"],
+    "Medulloblastoma": ["medulloblastoma"],
+    "Ependymoma": ["ependymoma"],
+    "Spinal cord tumor": ["spinal cord tumor", "spinal cord neoplasm"],
+}
+API_BASE = "https://clinicaltrials.gov/api/v2/studies"
+UA = {"User-Agent": "BrainTrialsFinder-Desktop/1.0 (+https://clinicaltrials.gov)"}
+def build_terms(diagnosis: str, keywords: str) -> List[str]:
+    terms: List[str] = []
+    if diagnosis in DEFAULT_DIAG_TERMS:
+        terms.extend(DEFAULT_DIAG_TERMS[diagnosis])
+    else:
+        terms.extend(["brain tumor", "spinal cord tumor", "CNS tumor"])
+    extra = [k.strip() for k in (keywords or "").split(",") if k.strip()]
+    return terms + extra
+def ctgov_search_one(term: str, statuses: List[str], page_size: int = 100, max_pages: int = 5) -> List[Dict[str, Any]]:
+    session = requests.Session()
+    session.headers.update(UA)
+    all_studies: List[Dict[str, Any]] = []
+    page_token = None
+    count = 0
+    max_iters = max_pages or 0
+    while count < max_iters:
+        params = {
+            "query.term": term,
+            "filter.overallStatus": ",".join(statuses),
+            "pageSize": page_size,
+        }
+        if page_token:
+            params["pageToken"] = page_token
+        r = session.get(API_BASE, params=params, timeout=30)
+        r.raise_for_status()
+        data = r.json()
+        studies = data.get("studies", [])
+        if not studies:
+            break
+        all_studies.extend(studies)
+        page_token = data.get("nextPageToken")
+        if not page_token:
+            break
+        count += 1
+    return all_studies
+def fetch_all_terms(terms: List[str], statuses: List[str], page_size=100, max_pages=5) -> List[Dict[str, Any]]:
+    dedup: Dict[str, Dict[str, Any]] = {}
+    for t in terms:
+        try:
+            for s in ctgov_search_one(t, statuses, page_size=page_size, max_pages=max_pages):
+                ident = (s.get("protocolSection", {}) or {}).get("identificationModule", {}) or {}
+                nct = ident.get("nctId")
+                key = nct or id(s)
+                if key not in dedup:
+                    dedup[key] = s
+        except requests.HTTPError:
+            continue
+    return list(dedup.values())
+def mentions(txt: str, term: str) -> bool:
+    return bool(re.search(rf"\b{re.escape(term)}\b", txt or "", re.I))
+def as_text(obj: Any) -> str:
+    if obj is None:
+        return ""
+    if isinstance(obj, dict):
+        for k in ("textblock", "textBlock", "value"):
+            if k in obj:
+                return str(obj.get(k) or "")
+        return " ".join(str(v) for v in obj.values() if v is not None)
+    if isinstance(obj, list):
+        return "; ".join(as_text(x) for x in obj)
+    return str(obj)
+def parse_age_to_int(v: Any):
+    if v is None:
+        return None
+    if isinstance(v, dict):
+        return parse_age_to_int(v.get("value"))
+    if isinstance(v, (int, float)):
+        return int(v)
+    m = re.search(r"(\d+)", str(v))
+    return int(m.group(1)) if m else None
+def ensure_list(v: Any):
+    if v is None:
+        return []
+    if isinstance(v, list):
+        return v
+    return [v]
+def score_trial(t: Dict[str, Any], intake: Dict[str, Any]) -> Tuple[int, List[str]]:
+    age_local = (intake or {}).get("age")
+    kps_local = (intake or {}).get("kps")
+    prior_bev_local = bool((intake or {}).get("prior_bev", False))
+    setting_local = (intake or {}).get("setting") or ""
+    keywords_local = (intake or {}).get("keywords") or ""
+    diagnosis_local = (intake or {}).get("diagnosis") or ""
+    if diagnosis_local in DEFAULT_DIAG_TERMS:
+        diag_terms = DEFAULT_DIAG_TERMS[diagnosis_local]
+    elif diagnosis_local and diagnosis_local != "Other":
+        diag_terms = [diagnosis_local]
+    else:
+        diag_terms = ["brain tumor", "CNS tumor", "spinal cord tumor"]
+    ps = (t or {}).get("protocolSection") or {}
+    elig = ps.get("eligibilityModule")
+    crit = ""
+    min_age = None
+    max_age = None
+    if isinstance(elig, dict):
+        crit_raw = elig.get("eligibilityCriteria") or elig.get("criteria") or elig
+        crit = as_text(crit_raw)
+        min_age = parse_age_to_int(elig.get("minimumAge"))
+        max_age = parse_age_to_int(elig.get("maximumAge"))
+    elif isinstance(elig, str):
+        crit = as_text(elig)
+    phases_list = ensure_list(ps.get("designModule", {}).get("phases"))
+    phases_up = [str(p).upper() for p in phases_list]
+    conds_list = ensure_list(ps.get("conditionsModule", {}).get("conditions"))
+    title = (ps.get("identificationModule", {}) or {}).get("briefTitle", "")
+    s = 0
+    reasons: List[str] = []
+    if any(any(mentions(c, term) for term in diag_terms) for c in conds_list) or any(mentions(title, term) for term in diag_terms):
+        s += 30
+        reasons.append(f"Matches diagnosis: {diagnosis_local or 'neuro-oncology'}.")
+    if any("PHASE 2" in p or "PHASE2" in p for p in phases_up):
+        s += 8
+    if any("PHASE 3" in p or "PHASE3" in p for p in phases_up):
+        s += 12
+    try:
+        if min_age is not None and age_local is not None and age_local < min_age:
+            reasons.append(f"Age below minimum ({min_age}).")
+            s -= 30
+        if max_age is not None and age_local is not None and age_local > max_age:
+            reasons.append(f"Age above maximum ({max_age}).")
+            s -= 30
+    except Exception:
+        pass
+    if mentions(crit, "ECOG 0-1") and (kps_local is None or kps_local < 80):
+        s -= 15
+        reasons.append("Requires ECOG 0–1 (KPS ~≥80).")
+    if mentions(crit, "Karnofsky") and (kps_local is None or kps_local < 70):
+        s -= 10
+        reasons.append("Requires KPS ≥70.")
+    if prior_bev_local and mentions(crit, "no prior bevacizumab"):
+        s -= 25
+        reasons.append("Excludes prior bevacizumab.")
+    if setting_local == "Recurrent" and mentions(crit, "recurrent"):
+        s += 8
+    if setting_local == "Newly diagnosed" and (mentions(crit, "newly diagnosed") or mentions(title, "adjuvant")):
+        s += 8
+    for kw in [k.strip() for k in (keywords_local or "").split(",") if k.strip()]:
+        if mentions(title, kw) or mentions(crit, kw):
+            s += 3
+    return max(0, min(100, s)), reasons
+# python
+def extract_row(study: dict) -> dict:
+    """Return a flat row dict for the table/PDF. Safe against missing fields."""
+    ps = (study.get("protocolSection") or {})
+    idm = (ps.get("identificationModule") or {})
+    scm = (ps.get("statusModule") or {})
+    dsm = (ps.get("designModule") or {})
+    cdnm = (ps.get("conditionsModule") or {})
+    slm = (ps.get("sponsorCollaboratorsModule") or {})
+    clm = (ps.get("contactsLocationsModule") or {})
+    title = (idm.get("officialTitle") or idm.get("briefTitle") or "").strip()
+    nct = (idm.get("nctId") or "").strip()
+    status_raw = (scm.get("overallStatus") or "").strip()
+    # e.g., RECRUITING -> Recruiting
+    status = status_raw.replace("_", " ").title() if status_raw else ""
+    phases_list = ensure_list(dsm.get("phases"))
+    phases = ", ".join(phases_list)
+    conditions = ", ".join(ensure_list(cdnm.get("conditions")))
+    sponsor = ""
+    lead = slm.get("leadSponsor") or {}
+    if isinstance(lead, dict):
+        sponsor = (lead.get("name") or "").strip()
+    city_country = ""
+    locs = ensure_list(clm.get("locations"))
+    if locs:
+        first = locs[0]
+        city = (first.get("locationCity") or "").strip()
+        country = (first.get("locationCountry") or "").strip()
+        parts = [p for p in [city, country] if p]
+        city_country = ", ".join(parts)
+    return {
+        "title": title,
+        "nct": nct,
+        "status": status,
+        "phases": phases,
+        "conditions": conditions,
+        "sponsor": sponsor,
+        "city_country": city_country,
+    }

desktop_app.py ADDED Viewed

	@@ -0,0 +1,607 @@

+# python
+# Desktop GUI for Brain Trials Finder (no Streamlit)
+# Run with: python desktop_app.py
+import threading
+import tkinter as tk
+from tkinter import ttk, messagebox, filedialog
+import urllib.parse
+import webbrowser
+from typing import List, Dict, Any
+from ctgov_client import (
+    DEFAULT_DIAG_TERMS,
+    build_terms,
+    fetch_all_terms,
+    score_trial,
+    extract_row,
+    ensure_list,
+)
+from uk_sources import fetch_uk_trials
+from euctr_client import fetch_eu_trials
+STATUSES = ["RECRUITING", "NOT_YET_RECRUITING"]
+COPYRIGHT = "© 2025 Brain Trials Finder | Prajwal Ghimire"
+__copyright__ = COPYRIGHT
+# Predefined NIHR UK location options for portal queries
+UK_NIHR_LOCATIONS = [
+    "Nottingham",
+    "Liverpool",
+    "Preston",
+    "Brighton",
+    "Cardiff",
+    "Leeds",
+    "Plymouth",
+    "Coventry",
+    "Newcastle upon Tyne",
+    "Dundee",
+    "Cambridge",
+    "Birmingham",
+    "Hull",
+    "Stoke-on-Trent",
+    "Romford",
+    "Southampton",
+    "Bristol",
+    "Middlesbrough",
+    "London",
+    "Sheffield",
+    "Edinburgh",
+    "Oxford",
+]
+class BrainTrialsApp(tk.Tk):
+    def __init__(self):
+        super().__init__()
+        self.title(f"Brain Cancer Trials Finder - Desktop App - {COPYRIGHT}")
+        self.geometry("1200x760")
+        # Inputs frame (top controls)
+        frm = ttk.Frame(self, padding=10)
+        frm.pack(fill="x")
+        # Diagnosis
+        ttk.Label(frm, text="Diagnosis:").grid(row=0, column=0, sticky=tk.W, padx=(0, 6))
+        diag_options = list(DEFAULT_DIAG_TERMS.keys()) + ["Other"]
+        self.diagnosis = tk.StringVar(value="Glioblastoma")
+        ttk.Combobox(frm, textvariable=self.diagnosis, values=diag_options, state="readonly", width=28).grid(row=0, column=1, sticky=tk.W)
+        # Setting
+        ttk.Label(frm, text="Setting:").grid(row=0, column=2, sticky=tk.W, padx=(16, 6))
+        self.setting = tk.StringVar(value="Recurrent")
+        ttk.Combobox(frm, textvariable=self.setting, values=["Newly diagnosed", "Recurrent"], state="readonly", width=20).grid(row=0, column=3, sticky=tk.W)
+        # Age
+        ttk.Label(frm, text="Age:").grid(row=0, column=4, sticky=tk.W, padx=(16, 6))
+        self.age = tk.IntVar(value=55)
+        tk.Spinbox(frm, from_=1, to=100, textvariable=self.age, width=6).grid(row=0, column=5, sticky=tk.W)
+        # KPS
+        ttk.Label(frm, text="KPS:").grid(row=0, column=6, sticky=tk.W, padx=(16, 6))
+        self.kps = tk.IntVar(value=80)
+        tk.Spinbox(frm, from_=40, to=100, increment=10, textvariable=self.kps, width=6).grid(row=0, column=7, sticky=tk.W)
+        # Prior bev
+        self.prior_bev = tk.BooleanVar(value=False)
+        ttk.Checkbutton(frm, text="Prior bevacizumab", variable=self.prior_bev).grid(row=1, column=1, sticky=tk.W, pady=(6, 0))
+        # Keywords
+        ttk.Label(frm, text="Keywords:").grid(row=1, column=2, sticky=tk.W, padx=(16, 6), pady=(6, 0))
+        self.keywords = tk.StringVar(value="immunotherapy,vaccine,device")
+        ttk.Entry(frm, textvariable=self.keywords, width=32).grid(row=1, column=3, sticky=tk.W, pady=(6, 0))
+        # Country filter (optional)
+        ttk.Label(frm, text="Country contains:").grid(row=1, column=4, sticky=tk.W, padx=(16, 6), pady=(6, 0))
+        self.country = tk.StringVar(value="")
+        ttk.Entry(frm, textvariable=self.country, width=18).grid(row=1, column=5, sticky=tk.W, pady=(6, 0))
+        self.require_country = tk.BooleanVar(value=False)
+        ttk.Checkbutton(frm, text="Require site in country", variable=self.require_country).grid(row=1, column=6, sticky=tk.W, pady=(6, 0))
+        # Buttons
+        self.btn_search = ttk.Button(frm, text="Search", command=self.on_search)
+        self.btn_search.grid(row=0, column=8, sticky=tk.W, padx=(16, 0))
+        self.status_lbl = ttk.Label(frm, text="Ready")
+        self.status_lbl.grid(row=1, column=8, sticky=tk.W, padx=(16, 0))
+        # UK Sources section
+        ukfrm = ttk.Labelframe(self, text="UK Sources", padding=10)
+        ukfrm.pack(fill="x", padx=10)
+        self.uk_use_ctgov = tk.BooleanVar(value=True)
+        ttk.Checkbutton(ukfrm, text="ClinicalTrials.gov (UK sites only)", variable=self.uk_use_ctgov).grid(row=0, column=0, sticky=tk.W)
+        # EU CTR toggle and controls
+        self.use_euctr = tk.BooleanVar(value=True)
+        ttk.Checkbutton(ukfrm, text="Include EU Clinical Trials Register (EUCTR)", variable=self.use_euctr).grid(row=0, column=3, sticky=tk.W)
+        ttk.Label(ukfrm, text="EUCTR delay (s):").grid(row=1, column=3, sticky=tk.W, padx=(8,0))
+        self.euctr_delay = tk.DoubleVar(value=0.8)
+        ttk.Entry(ukfrm, textvariable=self.euctr_delay, width=6).grid(row=1, column=4, sticky=tk.W)
+        ttk.Label(ukfrm, text="EUCTR max pages:").grid(row=1, column=5, sticky=tk.W, padx=(8,0))
+        self.euctr_maxpages = tk.IntVar(value=2)
+        ttk.Entry(ukfrm, textvariable=self.euctr_maxpages, width=4).grid(row=1, column=6, sticky=tk.W)
+        self.btn_search_uk = ttk.Button(ukfrm, text="Search UK", command=self.on_search_uk)
+        self.btn_search_uk.grid(row=0, column=1, padx=(16, 0))
+        # Separate EU search button (decoupled from main Search)
+        self.btn_search_eu = ttk.Button(ukfrm, text="Search EU", command=self.on_search_eu)
+        self.btn_search_eu.grid(row=0, column=4, padx=(8, 0))
+        ttk.Button(ukfrm, text="Save PDF", command=self.on_save_pdf).grid(row=0, column=2, padx=(16, 0))
+        # Open portal shortcuts
+        ttk.Button(ukfrm, text="Open NIHR", command=self.on_open_nihr).grid(row=1, column=0, pady=(8, 0), sticky=tk.W)
+        ttk.Button(ukfrm, text="Open ISRCTN (UK)", command=self.on_open_isrctn).grid(row=1, column=1, pady=(8, 0), sticky=tk.W)
+        ttk.Button(ukfrm, text="Open CRUK", command=self.on_open_cruk).grid(row=1, column=2, pady=(8, 0), sticky=tk.W)
+        # NIHR specific location (optional)
+        ttk.Label(ukfrm, text="NIHR location (optional):").grid(row=2, column=0, sticky=tk.W, pady=(8, 0))
+        self.uk_location = tk.StringVar(value="")
+        ttk.Combobox(ukfrm, textvariable=self.uk_location, values=UK_NIHR_LOCATIONS, width=28, state="readonly").grid(row=2, column=1, sticky=tk.W, pady=(8, 0))
+        # Results tree
+        cols = ("score", "title", "sponsor", "city_country", "status", "phases", "conditions", "nct", "source")
+        self.tree = ttk.Treeview(self, columns=cols, show="headings", height=18)
+        self.tree.pack(fill="both", expand=True, padx=10, pady=(6, 10))
+        self.tree.heading("score", text="Score")
+        self.tree.heading("title", text="Title")
+        self.tree.heading("sponsor", text="Sponsor")
+        self.tree.heading("city_country", text="City/Country")
+        self.tree.heading("status", text="Status")
+        self.tree.heading("phases", text="Phases")
+        self.tree.heading("conditions", text="Conditions")
+        self.tree.heading("nct", text="NCT ID")
+        self.tree.heading("source", text="Source")
+        self.tree.column("score", width=60, anchor="center")
+        self.tree.column("title", width=330)
+        self.tree.column("sponsor", width=220)
+        self.tree.column("city_country", width=160)
+        self.tree.column("status", width=120)
+        self.tree.column("phases", width=110)
+        self.tree.column("conditions", width=260)
+        self.tree.column("nct", width=120)
+        self.tree.column("source", width=120)
+        self.tree.bind("<Double-1>", self.on_open)
+        self.tree.bind("<<TreeviewSelect>>", self.on_select)
+        # Store per-row mappings
+        self._url_by_item: Dict[str, str] = {}
+        self._study_by_item: Dict[str, Dict[str, Any]] = {}
+        self._current_rows: List[Dict[str, Any]] = []  # rows currently displayed
+        # Contacts and Locations panel
+        infofrm = ttk.Labelframe(self, text="Contacts and Locations", padding=10)
+        infofrm.pack(fill="both", expand=True, padx=10, pady=(0, 10))
+        self.contacts_text = tk.Text(infofrm, height=12, wrap="word")
+        self.contacts_text.config(state="disabled")
+        scroll = ttk.Scrollbar(infofrm, orient="vertical", command=self.contacts_text.yview)
+        self.contacts_text.configure(yscrollcommand=scroll.set)
+        self.contacts_text.grid(row=0, column=0, sticky="nsew")
+        scroll.grid(row=0, column=1, sticky="ns")
+        infofrm.columnconfigure(0, weight=1)
+        infofrm.rowconfigure(0, weight=1)
+        # Initial load (use lambda to satisfy type checkers)
+        # Removed automatic search on startup; user must press the Search button to fetch results.
+        # self.after(100, lambda: self.on_search())
+    # ----- Portal helpers -----
+    def _build_portal_query(self) -> str:
+        diag = (self.diagnosis.get() or "").strip()
+        if diag and diag != "Other":
+            q = diag
+        else:
+            q = (self.keywords.get() or "").strip() or "brain tumour"
+        return urllib.parse.quote_plus(q)
+    def on_open_nihr(self):
+        q = self._build_portal_query()
+        base = "https://www.bepartofresearch.nihr.ac.uk/results/search-results"
+        loc_txt = (self.uk_location.get() or "").strip()
+        if loc_txt:
+            loc = urllib.parse.quote_plus(loc_txt)
+            url = f"{base}?query={q}&location={loc}"
+        else:
+            url = f"{base}?query={q}"
+        webbrowser.open_new_tab(url)
+    def on_open_isrctn(self):
+        q = self._build_portal_query()
+        url = f"https://www.isrctn.com/search?q={q}&countries=United%20Kingdom"
+        webbrowser.open_new_tab(url)
+    def on_open_cruk(self):
+        q = self._build_portal_query()
+        url = f"https://find.cancerresearchuk.org/clinical-trials?q={q}"
+        webbrowser.open_new_tab(url)
+    # ----- Actions -----
+    def on_open(self, event=None):
+        sel = self.tree.selection()
+        if not sel:
+            return
+        for iid in sel:
+            url = self._url_by_item.get(iid)
+            if url:
+                webbrowser.open_new_tab(url)
+                break
+    def on_select(self, event=None):
+        sel = self.tree.selection()
+        if not sel:
+            return
+        iid = sel[0]
+        study = self._study_by_item.get(iid)
+        if study:
+            self._populate_contacts(study)
+    def on_search(self):
+        self.btn_search.configure(state=tk.DISABLED)
+        self.btn_search_uk.configure(state=tk.DISABLED)
+        self.status_lbl.configure(text="Fetching…")
+        diagnosis = self.diagnosis.get()
+        setting = self.setting.get()
+        age = self.age.get()
+        kps = self.kps.get()
+        prior_bev = self.prior_bev.get()
+        keywords = self.keywords.get()
+        country = self.country.get().strip()
+        require_country = self.require_country.get()
+        def worker():
+            try:
+                terms = build_terms(diagnosis, keywords)
+                # Fetch ClinicalTrials.gov results only (no EUCTR fetch here)
+                studies = fetch_all_terms(terms, STATUSES, page_size=100, max_pages=5)
+                # Tag CTGov studies as source CTGov
+                source_map = {}
+                combined_entries = [{"study": s, "source": "CTGov"} for s in studies]
+                for e in combined_entries:
+                    sst = e.get("study") or {}
+                    psst = (sst.get("protocolSection") or {})
+                    idm = (psst.get("identificationModule") or {})
+                    nctid = idm.get("nctId") or idm.get("nct")
+                    eudr = idm.get("eudractNumber") or idm.get("eudra") or idm.get("eudract")
+                    if nctid:
+                        source_map[str(nctid)] = e.get("source")
+                    if eudr:
+                        source_map[str(eudr)] = e.get("source")
+                rows: List[Dict[str, Any]] = []
+                skipped = 0
+                for s in studies:
+                    try:
+                        ps = (s.get("protocolSection", {}) or {})
+                        clm = (ps.get("contactsLocationsModule", {}) or {})
+                        locs = ensure_list(clm.get("locations"))
+                        if country and require_country:
+                            locs = [L for L in locs if country.lower() in (L.get("locationCountry") or "").lower()]
+                        if require_country and not locs:
+                            continue
+                        intake = {
+                            "age": age,
+                            "kps": kps,
+                            "prior_bev": prior_bev,
+                            "setting": setting,
+                            "keywords": keywords,
+                            "diagnosis": diagnosis,
+                        }
+                        sc, reasons = score_trial(s, intake)
+                        base = extract_row(s)
+                        # Ensure city_country exists (fallback from first location)
+                        if not base.get("city_country"):
+                            first = locs[0] if locs else None
+                            if first:
+                                city = (first.get("locationCity") or "").strip()
+                                country1 = (first.get("locationCountry") or "").strip()
+                                parts = [p for p in [city, country1] if p]
+                                if parts:
+                                    base["city_country"] = ", ".join(parts)
+                        # CTGov search: mark source as CTGov (or preserve mapping if present)
+                        nct_key = base.get("nct")
+                        eudract_key = (s.get("protocolSection",{}).get("identificationModule",{}).get("eudractNumber"))
+                        src = source_map.get(nct_key) or source_map.get(eudract_key) or "CTGov"
+                        base["source"] = src
+                        base["url"] = f"https://clinicaltrials.gov/study/{base['nct']}" if base.get("nct") else s.get("_source_url", "")
+                        base["study"] = s
+                        rows.append(base)
+                    except Exception:
+                        skipped += 1
+                        continue
+                rows.sort(key=lambda x: -x.get("score", 0))
+                self.after(0, self._render_rows, rows, skipped, len(studies))
+            except Exception as e:
+                self.after(0, self._show_error, e)
+        threading.Thread(target=worker, daemon=True).start()
+    def on_search_uk(self):
+        self.btn_search.configure(state=tk.DISABLED)
+        self.btn_search_uk.configure(state=tk.DISABLED)
+        self.status_lbl.configure(text="Fetching UK trials…")
+        diagnosis = self.diagnosis.get()
+        setting = self.setting.get()
+        age = self.age.get()
+        kps = self.kps.get()
+        prior_bev = self.prior_bev.get()
+        keywords = self.keywords.get()
+        use_ctgov = self.uk_use_ctgov.get()
+        def worker():
+            try:
+                intake = {
+                    "age": age,
+                    "kps": kps,
+                    "prior_bev": prior_bev,
+                    "setting": setting,
+                    "keywords": keywords,
+                    "diagnosis": diagnosis,
+                }
+                rows, total_raw, skipped = fetch_uk_trials(diagnosis, keywords, intake, include_ctgov=use_ctgov)
+                self.after(0, self._render_rows, rows, skipped, total_raw)
+            except Exception as e:
+                self.after(0, self._show_error, e)
+        threading.Thread(target=worker, daemon=True).start()
+    def on_search_eu(self):
+        """Run EUCTR-only search and display results (separate button)."""
+        self.btn_search.configure(state=tk.DISABLED)
+        self.btn_search_uk.configure(state=tk.DISABLED)
+        self.btn_search_eu.configure(state=tk.DISABLED)
+        self.status_lbl.configure(text="Fetching EU trials…")
+        diagnosis = self.diagnosis.get()
+        keywords = self.keywords.get()
+        country = self.country.get().strip()
+        require_country = self.require_country.get()
+        def worker_eu():
+            try:
+                terms = build_terms(diagnosis, keywords)
+                eu_studies = []
+                try:
+                    eu_studies = fetch_eu_trials(terms, STATUSES, page_size=50, max_pages=self.euctr_maxpages.get(), polite_delay=self.euctr_delay.get())
+                except Exception:
+                    eu_studies = []
+                rows = []
+                skipped = 0
+                for s in eu_studies:
+                    try:
+                        ps = (s.get("protocolSection", {}) or {})
+                        clm = (ps.get("contactsLocationsModule", {}) or {})
+                        locs = ensure_list(clm.get("locations"))
+                        if country and require_country:
+                            locs = [L for L in locs if country.lower() in (L.get("locationCountry") or "").lower()]
+                        if require_country and not locs:
+                            continue
+                        intake = {
+                            "age": None,
+                            "kps": None,
+                            "prior_bev": False,
+                            "setting": "",
+                            "keywords": keywords,
+                            "diagnosis": diagnosis,
+                        }
+                        # Reuse score_trial where possible (may be incomplete for EU studies)
+                        try:
+                            sc, reasons = score_trial(s, intake)
+                        except Exception:
+                            sc, reasons = 0, []
+                        base = extract_row(s)
+                        if not base.get("city_country"):
+                            first = locs[0] if locs else None
+                            if first:
+                                city = (first.get("locationCity") or "").strip()
+                                country1 = (first.get("locationCountry") or "").strip()
+                                parts = [p for p in [city, country1] if p]
+                                if parts:
+                                    base["city_country"] = ", ".join(parts)
+                        base["score"] = sc
+                        base["reasons"] = "; ".join(reasons)
+                        base["source"] = "EUCTR"
+                        base["url"] = s.get("_source_url", "")
+                        base["study"] = s
+                        rows.append(base)
+                    except Exception:
+                        skipped += 1
+                        continue
+                rows.sort(key=lambda x: -x.get("score", 0))
+                self.after(0, self._render_rows, rows, skipped, len(eu_studies))
+            except Exception as e:
+                self.after(0, self._show_error, e)
+            finally:
+                self.btn_search.configure(state=tk.NORMAL)
+                self.btn_search_uk.configure(state=tk.NORMAL)
+                self.btn_search_eu.configure(state=tk.NORMAL)
+        threading.Thread(target=worker_eu, daemon=True).start()
+    # ----- Rendering & details -----
+    def _show_error(self, e: Exception):
+        self.btn_search.configure(state=tk.NORMAL)
+        self.btn_search_uk.configure(state=tk.NORMAL)
+        self.btn_search_eu.configure(state=tk.NORMAL)
+        self.status_lbl.configure(text="Error")
+        messagebox.showerror("Error", f"Failed to fetch trials.\n{e}")
+    def _render_rows(self, rows: List[Dict[str, Any]], skipped: int, total: int):
+        # Clear
+        for iid in self.tree.get_children():
+            self.tree.delete(iid)
+        self._url_by_item.clear()
+        self._study_by_item.clear()
+        self._current_rows = rows[:]  # snapshot for export
+        # Insert
+        for r in rows[:300]:
+            # Prefer showing EU CT number (EudraCT) when NCT is absent
+            nct_display = r.get("nct") or ""
+            if not nct_display:
+                study = r.get("study") or {}
+                try:
+                    nct_display = (study.get("protocolSection", {}).get("identificationModule", {}).get("eudractNumber")) or nct_display
+                except Exception:
+                    nct_display = nct_display
+            # Prefer trial countries if city_country missing
+            city_country = r.get("city_country") or ""
+            if not city_country:
+                study = r.get("study") or {}
+                try:
+                    locs = (study.get("protocolSection", {}).get("contactsLocationsModule", {}).get("locations")) or []
+                    countries = []
+                    for L in locs:
+                        c = (L.get("locationCountry") or "").strip()
+                        if c:
+                            countries.append(c)
+                    if countries:
+                        city_country = ", ".join(countries)
+                except Exception:
+                    pass
+            values = (
+                r.get("score", 0),
+                r.get("title", ""),
+                r.get("sponsor", ""),
+                city_country,
+                r.get("status", ""),
+                r.get("phases", ""),
+                r.get("conditions", ""),
+                nct_display,
+                r.get("source", ""),
+             )
+            iid = self.tree.insert("", "end", values=values)
+            if r.get("url"):
+                self._url_by_item[iid] = r["url"]
+            if r.get("study"):
+                self._study_by_item[iid] = r["study"]
+        txt = f"Fetched {total} trials; showing {len(rows)} after filters."
+        if skipped:
+            txt += f" Skipped {skipped}."
+        self.status_lbl.configure(text=txt)
+        self.btn_search.configure(state=tk.NORMAL)
+        self.btn_search_uk.configure(state=tk.NORMAL)
+        self.btn_search_eu.configure(state=tk.NORMAL)
+    def _populate_contacts(self, study: Dict[str, Any]):
+        ps = (study.get("protocolSection", {}) or {})
+        clm = (ps.get("contactsLocationsModule", {}) or {})
+        lines: List[str] = []
+        # Central contacts
+        centrals = ensure_list(clm.get("centralContacts"))
+        if centrals:
+            lines.append("Central Contacts:")
+            for c in centrals:
+                name = (c.get("name") or "").strip()
+                role = (c.get("role") or "").strip()
+                phone = (c.get("phone") or "").strip()
+                email = (c.get("email") or "").strip()
+                parts = [p for p in [name, role, phone, email] if p]
+                if parts:
+                    lines.append("  - " + " | ".join(parts))
+        # Overall officials
+        officials = ensure_list(clm.get("overallOfficials"))
+        if officials:
+            lines.append("Overall Officials:")
+            for o in officials:
+                name = (o.get("name") or "").strip()
+                role = (o.get("role") or "").strip()
+                aff = (o.get("affiliation") or "").strip()
+                parts = [p for p in [name, role, aff] if p]
+                if parts:
+                    lines.append("  - " + " | ".join(parts))
+        # Locations
+        locs = ensure_list(clm.get("locations"))
+        if locs:
+            lines.append("Locations:")
+            for L in locs:
+                facility = (L.get("locationFacility") or "").strip()
+                city = (L.get("locationCity") or "").strip()
+                state = (L.get("locationState") or "").strip()
+                country = (L.get("locationCountry") or "").strip()
+                status = (L.get("status") or "").strip()
+                site_line = ", ".join([p for p in [facility, city, state, country] if p])
+                if site_line:
+                    if status:
+                        lines.append(f"  - {site_line} (status: {status})")
+                    else:
+                        lines.append(f"  - {site_line}")
+                # per-location contacts
+                lcontacts = ensure_list(L.get("contacts")) or ensure_list(L.get("locationContacts"))
+                for lc in lcontacts:
+                    lname = (lc.get("name") or "").strip()
+                    lrole = (lc.get("role") or "").strip()
+                    lphone = (lc.get("phone") or "").strip()
+                    lemail = (lc.get("email") or "").strip()
+                    parts = [p for p in [lname, lrole, lphone, lemail] if p]
+                    if parts:
+                        lines.append("      • " + " | ".join(parts))
+        if not lines:
+            lines.append("No contacts/locations provided by sponsor at this time.")
+        self.contacts_text.config(state="normal")
+        self.contacts_text.delete("1.0", tk.END)
+        self.contacts_text.insert(tk.END, "\n".join(lines))
+        self.contacts_text.config(state="disabled")
+    # ----- PDF export -----
+    def on_save_pdf(self):
+        if not self._current_rows:
+            messagebox.showinfo("Save PDF", "No results to export. Perform a search first.")
+            return
+        path = filedialog.asksaveasfilename(
+            title="Save PDF",
+            defaultextension=".pdf",
+            filetypes=[("PDF files", "*.pdf")],
+            initialfile="brain_trials_results.pdf",
+        )
+        if not path:
+            return
+        try:
+            self._export_pdf(self._current_rows, path)
+            messagebox.showinfo("Save PDF", f"Saved: {path}")
+        except Exception as e:
+            messagebox.showerror("Save PDF", f"Failed to create PDF.\n{e}")
+    def _export_pdf(self, rows: List[Dict[str, Any]], path: str):
+        from reportlab.lib.pagesizes import A4
+        from reportlab.lib.styles import getSampleStyleSheet
+        from reportlab.platypus import SimpleDocTemplate, Paragraph, Spacer
+        from reportlab.lib.units import mm
+        doc = SimpleDocTemplate(path, pagesize=A4, leftMargin=15 * mm, rightMargin=15 * mm, topMargin=15 * mm, bottomMargin=15 * mm)
+        styles = getSampleStyleSheet()
+        story = []
+        story.append(Paragraph("Brain Cancer Trials – Results", styles["Title"]))
+        story.append(Spacer(1, 6))
+        story.append(Paragraph(f"Total shown: {len(rows)}", styles["Normal"]))
+        story.append(Spacer(1, 12))
+        story.append(Paragraph(COPYRIGHT, styles["Normal"]))
+        for r in rows:
+            title = r.get("title", "")
+            nct = r.get("nct", "")
+            sponsor = r.get("sponsor", "")
+            status = r.get("status", "")
+            phases = r.get("phases", "")
+            city_country = r.get("city_country", "")
+            score = r.get("score", 0)
+            url = r.get("url") or (f"https://clinicaltrials.gov/study/{nct}" if nct else "")
+            story.append(Paragraph(f"<b>{title}</b>", styles["Heading4"]))
+            meta = (
+                f"NCT: {nct or '—'} | Sponsor: {sponsor or '—'} | City/Country: {city_country or '—'} | "
+                f"Status: {status or '—'} | Phases: {phases or '—'} | Score: {score}"
+            )
+            story.append(Paragraph(meta, styles["Normal"]))
+            if url:
+                story.append(Paragraph(f"URL: <a href='{url}' color='blue'>{url}</a>", styles["Normal"]))
+            source_txt = r.get("source", "")
+            if source_txt:
+                story.append(Paragraph(f"Source: {source_txt}", styles["Normal"]))
+            story.append(Spacer(1, 8))
+        doc.build(story)
+if __name__ == "__main__":
+    app = BrainTrialsApp()
+    app.mainloop()

logo_precise.png ADDED Viewed

Git LFS Details

SHA256: 6cfa9f491d5d7b74224d627ac69d5093d45fd9a80f1e110f2de31f315e53db9e
Pointer size: 131 Bytes
Size of remote file: 145 kB

runtime.txt ADDED Viewed

	@@ -0,0 +1 @@


1	+ python-3.10

streamlit_app.py ADDED Viewed

	@@ -0,0 +1,12 @@

+# Streamlit Cloud entrypoint. We reuse your existing app as-is.
+# Main requirement: keep the module name and path exactly as below so Linux (Streamlit Cloud) can import it.
+# IMPORTANT: Do not call st.set_page_config here to avoid double configuration.
+# The original module handles all Streamlit layout and rendering.
+try:
+    import GUI_CLinicalTrial  # noqa: F401  # importing runs the Streamlit app defined at top-level
+except Exception as e:
+    import streamlit as st
+    st.error("Failed to import GUI_CLinicalTrial.py. Ensure the file exists and has no syntax errors.")
+    st.exception(e)

uk_sources.py ADDED Viewed

	@@ -0,0 +1,80 @@

+# UK sources aggregator (initial: ClinicalTrials.gov UK filter)
+from typing import List, Dict, Any, Tuple
+from ctgov_client import (
+    build_terms,
+    fetch_all_terms,
+    score_trial,
+    extract_row,
+)
+STATUSES = ["RECRUITING", "NOT_YET_RECRUITING"]
+def _normalize_key(row: Dict[str, Any]) -> str:
+    # Prefer identifiers; fallback to normalized title
+    nct = (row.get("nct") or "").strip()
+    if nct:
+        return f"NCT:{nct}"
+    title = (row.get("title") or "").lower().strip()
+    return f"TITLE:{title}"
+def fetch_uk_trials(
+    diagnosis: str,
+    keywords: str,
+    intake: Dict[str, Any],
+    include_ctgov: bool = True,
+) -> Tuple[List[Dict[str, Any]], int, int]:
+    """
+    Fetch UK trials across selected sources.
+    Currently implemented: ClinicalTrials.gov with UK site filter.
+    Returns: (rows, total_raw, skipped)
+    rows: list of standard rows with keys: title, nct, status, phases, conditions, site, score, reasons, url
+    total_raw: number of raw studies fetched before filters
+    skipped: number of studies skipped due to formatting issues
+    """
+    terms = build_terms(diagnosis, keywords)
+    rows: List[Dict[str, Any]] = []
+    skipped = 0
+    total_raw = 0
+    if include_ctgov:
+        studies = fetch_all_terms(terms, STATUSES, page_size=100, max_pages=5)
+        total_raw += len(studies)
+        for s in studies:
+            try:
+                ps = (s.get("protocolSection", {}) or {})
+                locs = ((ps.get("contactsLocationsModule", {}) or {}).get("locations") or [])
+                # UK filter (case-insensitive contains)
+                uk_locs = [L for L in locs if "united kingdom" in (L.get("locationCountry") or "").lower()]
+                if not uk_locs:
+                    continue
+                sc, reasons = score_trial(s, intake)
+                base = extract_row(s)
+                # Replace site with first UK site
+                first_site = next(iter(uk_locs), {})
+                base["site"] = f"{first_site.get('locationFacility','')}, {first_site.get('locationCity','')}, {first_site.get('locationCountry','')}"
+                base["score"] = sc
+                base["reasons"] = "; ".join(reasons)
+                base["url"] = f"https://clinicaltrials.gov/study/{base['nct']}" if base.get("nct") else ""
+                rows.append(base)
+            except Exception:
+                skipped += 1
+                continue
+    # Deduplicate
+    seen = set()
+    deduped: List[Dict[str, Any]] = []
+    for r in rows:
+        k = _normalize_key(r)
+        if k in seen:
+            continue
+        seen.add(k)
+        deduped.append(r)
+    # Sort
+    deduped.sort(key=lambda x: -x.get("score", 0))
+    return deduped, total_raw, skipped