Update app.py
Browse files
app.py
CHANGED
|
@@ -1,1248 +1,600 @@
|
|
| 1 |
-
"""
|
| 2 |
-
K R&D Lab — Cancer Research Suite
|
| 3 |
-
Author: Oksana Kolisnyk | kosatiks-group.pp.ua
|
| 4 |
-
Repo: github.com/TEZv/K-RnD-Lab-PHYLO-03_2026
|
| 5 |
-
"""
|
| 6 |
-
|
| 7 |
import gradio as gr
|
| 8 |
-
import requests
|
| 9 |
-
import json
|
| 10 |
-
import os
|
| 11 |
-
import time
|
| 12 |
-
import csv
|
| 13 |
-
import math
|
| 14 |
-
import hashlib
|
| 15 |
-
import datetime
|
| 16 |
-
import numpy as np
|
| 17 |
import pandas as pd
|
|
|
|
|
|
|
| 18 |
import matplotlib
|
| 19 |
matplotlib.use("Agg")
|
| 20 |
import matplotlib.pyplot as plt
|
| 21 |
-
|
| 22 |
-
from matplotlib import cm
|
| 23 |
-
import io
|
| 24 |
from PIL import Image
|
|
|
|
|
|
|
| 25 |
|
| 26 |
-
|
| 27 |
-
|
| 28 |
-
|
| 29 |
-
|
| 30 |
-
|
| 31 |
-
CACHE_TTL = 86400 # 24 hours in seconds
|
| 32 |
-
|
| 33 |
-
def _cache_key(endpoint: str, query: str) -> str:
|
| 34 |
-
raw = f"{endpoint}_{query}"
|
| 35 |
-
return hashlib.md5(raw.encode()).hexdigest()
|
| 36 |
|
| 37 |
-
|
| 38 |
-
key = _cache_key(endpoint, query)
|
| 39 |
-
path = os.path.join(CACHE_DIR, f"{endpoint}_{key}.json")
|
| 40 |
-
if os.path.exists(path):
|
| 41 |
-
mtime = os.path.getmtime(path)
|
| 42 |
-
if time.time() - mtime < CACHE_TTL:
|
| 43 |
-
try:
|
| 44 |
-
with open(path) as f:
|
| 45 |
-
return json.load(f)
|
| 46 |
-
except Exception:
|
| 47 |
-
return None
|
| 48 |
-
return None
|
| 49 |
|
| 50 |
-
def
|
| 51 |
try:
|
| 52 |
-
|
| 53 |
-
|
| 54 |
-
|
| 55 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 56 |
except Exception:
|
| 57 |
pass
|
| 58 |
|
| 59 |
-
|
| 60 |
-
# LAB JOURNAL
|
| 61 |
-
# ─────────────────────────────────────────────
|
| 62 |
-
JOURNAL_FILE = "/tmp/lab_journal.csv"
|
| 63 |
-
|
| 64 |
-
def journal_log(tab: str, action: str, result: str, note: str = ""):
|
| 65 |
try:
|
| 66 |
-
|
| 67 |
-
|
| 68 |
-
|
| 69 |
-
with open(JOURNAL_FILE, "a", newline="") as f:
|
| 70 |
-
w = csv.writer(f)
|
| 71 |
-
if write_header:
|
| 72 |
-
w.writerow(["timestamp", "tab", "action", "result_summary", "note"])
|
| 73 |
-
w.writerow(row)
|
| 74 |
-
return ts
|
| 75 |
except Exception:
|
| 76 |
-
return ""
|
| 77 |
|
| 78 |
-
def
|
| 79 |
-
|
| 80 |
-
|
| 81 |
-
return "No entries yet."
|
| 82 |
-
df = pd.read_csv(JOURNAL_FILE)
|
| 83 |
-
if df.empty:
|
| 84 |
-
return "No entries yet."
|
| 85 |
-
return df.tail(20).to_markdown(index=False)
|
| 86 |
-
except Exception:
|
| 87 |
-
return "No entries yet."
|
| 88 |
|
| 89 |
-
|
| 90 |
-
|
| 91 |
-
|
| 92 |
-
|
| 93 |
-
|
| 94 |
-
|
| 95 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 96 |
|
| 97 |
-
|
| 98 |
-
"
|
| 99 |
-
|
| 100 |
-
|
| 101 |
-
|
| 102 |
-
|
| 103 |
-
|
| 104 |
-
|
| 105 |
-
"
|
| 106 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 107 |
}
|
| 108 |
|
| 109 |
-
|
| 110 |
-
"
|
| 111 |
-
"
|
| 112 |
-
"
|
| 113 |
-
"
|
| 114 |
-
"
|
| 115 |
-
|
| 116 |
-
|
| 117 |
-
"
|
|
|
|
|
|
|
|
|
|
|
|
|
| 118 |
]
|
| 119 |
|
| 120 |
-
|
| 121 |
-
|
| 122 |
-
|
| 123 |
-
|
| 124 |
-
|
| 125 |
-
|
| 126 |
-
|
| 127 |
-
|
| 128 |
-
|
| 129 |
-
|
| 130 |
-
|
| 131 |
-
|
| 132 |
-
|
| 133 |
-
|
| 134 |
-
|
| 135 |
-
|
| 136 |
-
r = requests.get(
|
| 137 |
-
f"{PUBMED_BASE}/esearch.fcgi",
|
| 138 |
-
params={"db": "pubmed", "term": query, "rettype": "count", "retmode": "json"},
|
| 139 |
-
timeout=10
|
| 140 |
-
)
|
| 141 |
-
r.raise_for_status()
|
| 142 |
-
count = int(r.json()["esearchresult"]["count"])
|
| 143 |
-
cache_set("pubmed_count", query, count)
|
| 144 |
-
return count
|
| 145 |
-
except Exception:
|
| 146 |
-
return -1
|
| 147 |
-
|
| 148 |
-
|
| 149 |
-
def pubmed_search(query: str, retmax: int = 10) -> list:
|
| 150 |
-
"""Return list of PMIDs (cached)."""
|
| 151 |
-
cached = cache_get("pubmed_search", f"{query}_{retmax}")
|
| 152 |
-
if cached is not None:
|
| 153 |
-
return cached
|
| 154 |
-
try:
|
| 155 |
-
time.sleep(0.34)
|
| 156 |
-
r = requests.get(
|
| 157 |
-
f"{PUBMED_BASE}/esearch.fcgi",
|
| 158 |
-
params={"db": "pubmed", "term": query, "retmax": retmax, "retmode": "json"},
|
| 159 |
-
timeout=10
|
| 160 |
-
)
|
| 161 |
-
r.raise_for_status()
|
| 162 |
-
ids = r.json()["esearchresult"]["idlist"]
|
| 163 |
-
cache_set("pubmed_search", f"{query}_{retmax}", ids)
|
| 164 |
-
return ids
|
| 165 |
-
except Exception:
|
| 166 |
-
return []
|
| 167 |
-
|
| 168 |
-
|
| 169 |
-
def pubmed_summary(pmids: list) -> list:
|
| 170 |
-
"""Fetch summaries for a list of PMIDs."""
|
| 171 |
-
if not pmids:
|
| 172 |
-
return []
|
| 173 |
-
cached = cache_get("pubmed_summary", ",".join(pmids))
|
| 174 |
-
if cached is not None:
|
| 175 |
-
return cached
|
| 176 |
-
try:
|
| 177 |
-
time.sleep(0.34)
|
| 178 |
-
r = requests.get(
|
| 179 |
-
f"{PUBMED_BASE}/esummary.fcgi",
|
| 180 |
-
params={"db": "pubmed", "id": ",".join(pmids), "retmode": "json"},
|
| 181 |
-
timeout=15
|
| 182 |
-
)
|
| 183 |
-
r.raise_for_status()
|
| 184 |
-
result = r.json().get("result", {})
|
| 185 |
-
summaries = [result[pid] for pid in pmids if pid in result]
|
| 186 |
-
cache_set("pubmed_summary", ",".join(pmids), summaries)
|
| 187 |
-
return summaries
|
| 188 |
-
except Exception:
|
| 189 |
-
return []
|
| 190 |
-
|
| 191 |
-
|
| 192 |
-
def ot_query(gql: str, variables: dict = None) -> dict:
|
| 193 |
-
"""Run an OpenTargets GraphQL query (cached)."""
|
| 194 |
-
key = json.dumps({"q": gql, "v": variables}, sort_keys=True)
|
| 195 |
-
cached = cache_get("ot_gql", key)
|
| 196 |
-
if cached is not None:
|
| 197 |
-
return cached
|
| 198 |
-
try:
|
| 199 |
-
r = requests.post(
|
| 200 |
-
OT_GRAPHQL,
|
| 201 |
-
json={"query": gql, "variables": variables or {}},
|
| 202 |
-
timeout=20
|
| 203 |
-
)
|
| 204 |
-
r.raise_for_status()
|
| 205 |
-
data = r.json()
|
| 206 |
-
cache_set("ot_gql", key, data)
|
| 207 |
-
return data
|
| 208 |
-
except Exception as e:
|
| 209 |
-
return {"error": str(e)}
|
| 210 |
-
|
| 211 |
-
|
| 212 |
-
# ─────────────────────────────────────────────
|
| 213 |
-
# TAB A1 — GRAY ZONES EXPLORER
|
| 214 |
-
# ─────────────────────────────────────────────
|
| 215 |
-
|
| 216 |
-
def a1_run(cancer_type: str):
|
| 217 |
-
"""Build heatmap of biological process × cancer type paper counts."""
|
| 218 |
-
today = datetime.date.today().isoformat()
|
| 219 |
-
counts = {}
|
| 220 |
-
for proc in PROCESSES:
|
| 221 |
-
q = f'"{proc}" AND "{cancer_type}"[tiab]'
|
| 222 |
-
n = pubmed_count(q)
|
| 223 |
-
counts[proc] = n
|
| 224 |
-
|
| 225 |
-
df = pd.DataFrame({"process": PROCESSES, cancer_type: [counts[p] for p in PROCESSES]})
|
| 226 |
-
df = df.set_index("process")
|
| 227 |
-
df = df.replace(-1, np.nan)
|
| 228 |
-
|
| 229 |
-
fig, ax = plt.subplots(figsize=(6, 8), facecolor="white")
|
| 230 |
-
valid = df[cancer_type].fillna(0).values.reshape(-1, 1)
|
| 231 |
-
cmap = plt.cm.get_cmap("YlOrRd")
|
| 232 |
-
cmap.set_bad("white")
|
| 233 |
-
masked = np.ma.masked_where(df[cancer_type].isna().values.reshape(-1, 1), valid)
|
| 234 |
-
im = ax.imshow(masked, aspect="auto", cmap=cmap, vmin=0)
|
| 235 |
-
ax.set_xticks([0])
|
| 236 |
-
ax.set_xticklabels([cancer_type], fontsize=11, fontweight="bold")
|
| 237 |
-
ax.set_yticks(range(len(PROCESSES)))
|
| 238 |
-
ax.set_yticklabels(PROCESSES, fontsize=9)
|
| 239 |
-
ax.set_title(f"Research Coverage: {cancer_type}\n(PubMed paper count per process)", fontsize=11)
|
| 240 |
-
plt.colorbar(im, ax=ax, label="Paper count")
|
| 241 |
-
fig.tight_layout()
|
| 242 |
-
|
| 243 |
-
buf = io.BytesIO()
|
| 244 |
-
fig.savefig(buf, format="png", dpi=150, facecolor="white")
|
| 245 |
-
buf.seek(0)
|
| 246 |
-
img = Image.open(buf)
|
| 247 |
-
plt.close(fig)
|
| 248 |
-
|
| 249 |
-
sorted_procs = sorted(
|
| 250 |
-
[(p, counts[p]) for p in PROCESSES if counts[p] >= 0],
|
| 251 |
-
key=lambda x: x[1]
|
| 252 |
-
)
|
| 253 |
-
gap_cards = []
|
| 254 |
-
for i, (proc, cnt) in enumerate(sorted_procs[:5], 1):
|
| 255 |
-
gap_cards.append(
|
| 256 |
-
f"**Gap #{i}: {proc}** \n"
|
| 257 |
-
f"Papers found: {cnt} \n"
|
| 258 |
-
f"Query: `\"{proc}\" AND \"{cancer_type}\"`"
|
| 259 |
-
)
|
| 260 |
-
|
| 261 |
-
gaps_md = "\n\n---\n\n".join(gap_cards) if gap_cards else "No data available."
|
| 262 |
-
journal_log("A1-GrayZones", f"cancer={cancer_type}", f"gaps={[p for p,_ in sorted_procs[:5]]}")
|
| 263 |
-
source_note = f"*Source: PubMed E-utilities | Date: {today}*"
|
| 264 |
-
return img, gaps_md + "\n\n" + source_note
|
| 265 |
-
|
| 266 |
-
|
| 267 |
-
# ─────────────────────────────────────────────
|
| 268 |
-
# TAB A2 — UNDERSTUDIED TARGET FINDER
|
| 269 |
-
# ─────────────────────────────────────────────
|
| 270 |
|
| 271 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 272 |
|
| 273 |
-
def
|
| 274 |
-
|
| 275 |
-
|
| 276 |
-
return _depmap_cache["df"]
|
| 277 |
-
genes = [
|
| 278 |
-
"MYC", "KRAS", "TP53", "EGFR", "PTEN", "RB1", "CDKN2A",
|
| 279 |
-
"PIK3CA", "AKT1", "BRAF", "NRAS", "IDH1", "IDH2", "ARID1A",
|
| 280 |
-
"SMAD4", "CTNNB1", "VHL", "BRCA1", "BRCA2", "ATM",
|
| 281 |
-
"CDK4", "CDK6", "MDM2", "BCL2", "MCL1", "CCND1",
|
| 282 |
-
"FGFR1", "FGFR2", "MET", "ALK", "RET", "ERBB2",
|
| 283 |
-
"MTOR", "PIK3R1", "STK11", "NF1", "NF2", "TSC1", "TSC2",
|
| 284 |
-
]
|
| 285 |
-
rng = np.random.default_rng(42)
|
| 286 |
-
scores = rng.uniform(-1.5, 0.3, len(genes))
|
| 287 |
-
df = pd.DataFrame({"gene": genes, "gene_effect": scores})
|
| 288 |
-
_depmap_cache["df"] = df
|
| 289 |
return df
|
| 290 |
|
|
|
|
|
|
|
|
|
|
|
|
|
| 291 |
|
| 292 |
-
def
|
| 293 |
-
|
| 294 |
-
|
| 295 |
-
|
| 296 |
-
|
| 297 |
-
|
| 298 |
-
|
| 299 |
-
|
| 300 |
-
|
| 301 |
-
|
| 302 |
-
|
| 303 |
-
|
| 304 |
-
|
| 305 |
-
|
| 306 |
-
|
| 307 |
-
|
| 308 |
-
|
| 309 |
-
|
| 310 |
-
|
| 311 |
-
|
| 312 |
-
|
| 313 |
-
try:
|
| 314 |
-
rows_ot = ot_data["data"]["disease"]["associatedTargets"]["rows"]
|
| 315 |
-
except (KeyError, TypeError):
|
| 316 |
-
pass
|
| 317 |
-
|
| 318 |
-
if not rows_ot:
|
| 319 |
-
return None, f"⚠️ OpenTargets returned no data for {cancer_type}. Try again later.\n\n*Source: OpenTargets | Date: {today}*"
|
| 320 |
-
|
| 321 |
-
genes_ot = [r["target"]["approvedSymbol"] for r in rows_ot]
|
| 322 |
-
|
| 323 |
-
paper_counts = {}
|
| 324 |
-
for gene in genes_ot[:20]:
|
| 325 |
-
q = f'"{gene}" AND "{cancer_type}"[tiab]'
|
| 326 |
-
paper_counts[gene] = pubmed_count(q)
|
| 327 |
-
|
| 328 |
-
trial_counts = {}
|
| 329 |
-
for gene in genes_ot[:20]:
|
| 330 |
-
cached = cache_get("ct_gene", f"{gene}_{cancer_type}")
|
| 331 |
-
if cached is not None:
|
| 332 |
-
trial_counts[gene] = cached
|
| 333 |
-
continue
|
| 334 |
-
try:
|
| 335 |
-
r = requests.get(
|
| 336 |
-
f"{CT_BASE}/studies",
|
| 337 |
-
params={"query.term": f"{gene} {cancer_type}", "pageSize": 1, "format": "json"},
|
| 338 |
-
timeout=10
|
| 339 |
-
)
|
| 340 |
-
r.raise_for_status()
|
| 341 |
-
n = r.json().get("totalCount", 0)
|
| 342 |
-
trial_counts[gene] = n
|
| 343 |
-
cache_set("ct_gene", f"{gene}_{cancer_type}", n)
|
| 344 |
-
except Exception:
|
| 345 |
-
trial_counts[gene] = -1
|
| 346 |
-
|
| 347 |
-
depmap_df = _load_depmap_sample()
|
| 348 |
-
depmap_dict = dict(zip(depmap_df["gene"], depmap_df["gene_effect"]))
|
| 349 |
-
|
| 350 |
-
records = []
|
| 351 |
-
for gene in genes_ot[:20]:
|
| 352 |
-
raw_ess = depmap_dict.get(gene, None)
|
| 353 |
-
papers = paper_counts.get(gene, 0)
|
| 354 |
-
trials = trial_counts.get(gene, 0)
|
| 355 |
-
if raw_ess is None:
|
| 356 |
-
ess_display = "N/A"
|
| 357 |
-
gap_idx = 0.0
|
| 358 |
-
else:
|
| 359 |
-
ess_inverted = -raw_ess
|
| 360 |
-
ess_display = f"{ess_inverted:.3f}"
|
| 361 |
-
papers_safe = max(papers, 0)
|
| 362 |
-
gap_idx = ess_inverted / math.log(papers_safe + 2) if ess_inverted > 0 else 0.0
|
| 363 |
-
records.append({
|
| 364 |
-
"Gene": gene,
|
| 365 |
-
"Essentiality (inverted)": ess_display,
|
| 366 |
-
"Papers": papers if papers >= 0 else "N/A",
|
| 367 |
-
"Trials": trials if trials >= 0 else "N/A",
|
| 368 |
-
"Gap_index": round(gap_idx, 3)
|
| 369 |
-
})
|
| 370 |
-
|
| 371 |
-
result_df = pd.DataFrame(records).sort_values("Gap_index", ascending=False)
|
| 372 |
-
note = (
|
| 373 |
-
f"*Source: OpenTargets GraphQL + PubMed E-utilities + ClinicalTrials.gov v2 | Date: {today}*\n\n"
|
| 374 |
-
f"*Essentiality: inverted DepMap CRISPR gene effect (positive = more essential). "
|
| 375 |
-
f"Gap_index = essentiality / log(papers+2)*\n\n"
|
| 376 |
-
f"> ⚠️ **Essentiality scores are reference estimates from a curated gene set, not full DepMap data.** "
|
| 377 |
-
f"For real analysis, download `CRISPR_gene_effect.csv` from [depmap.org](https://depmap.org/portal/download/all/) "
|
| 378 |
-
f"and replace `_load_depmap_sample()` in `app.py`."
|
| 379 |
-
)
|
| 380 |
-
journal_log("A2-TargetFinder", f"cancer={cancer_type}", f"top_gap={result_df.iloc[0]['Gene'] if len(result_df) else 'none'}")
|
| 381 |
-
return result_df, note
|
| 382 |
-
|
| 383 |
-
|
| 384 |
-
# ─────────────────────────────────────────────
|
| 385 |
-
# TAB A3 — REAL VARIANT LOOKUP
|
| 386 |
-
# ─────────────────────────────────────────────
|
| 387 |
|
| 388 |
-
def
|
| 389 |
-
today = datetime.date.today().isoformat()
|
| 390 |
hgvs = hgvs.strip()
|
| 391 |
-
if
|
| 392 |
-
|
| 393 |
-
|
| 394 |
-
result_parts = []
|
| 395 |
-
|
| 396 |
-
clinvar_cached = cache_get("clinvar", hgvs)
|
| 397 |
-
if clinvar_cached is None:
|
| 398 |
-
try:
|
| 399 |
-
time.sleep(0.34)
|
| 400 |
-
r = requests.get(
|
| 401 |
-
f"{PUBMED_BASE}/esearch.fcgi",
|
| 402 |
-
params={"db": "clinvar", "term": hgvs, "retmode": "json", "retmax": 5},
|
| 403 |
-
timeout=10
|
| 404 |
-
)
|
| 405 |
-
r.raise_for_status()
|
| 406 |
-
ids = r.json()["esearchresult"]["idlist"]
|
| 407 |
-
clinvar_cached = ids
|
| 408 |
-
cache_set("clinvar", hgvs, ids)
|
| 409 |
-
except Exception:
|
| 410 |
-
clinvar_cached = None
|
| 411 |
-
|
| 412 |
-
if clinvar_cached and len(clinvar_cached) > 0:
|
| 413 |
-
try:
|
| 414 |
-
time.sleep(0.34)
|
| 415 |
-
r2 = requests.get(
|
| 416 |
-
f"{PUBMED_BASE}/esummary.fcgi",
|
| 417 |
-
params={"db": "clinvar", "id": ",".join(clinvar_cached[:3]), "retmode": "json"},
|
| 418 |
-
timeout=10
|
| 419 |
-
)
|
| 420 |
-
r2.raise_for_status()
|
| 421 |
-
cv_result = r2.json().get("result", {})
|
| 422 |
-
cv_rows = []
|
| 423 |
-
for vid in clinvar_cached[:3]:
|
| 424 |
-
if vid in cv_result:
|
| 425 |
-
v = cv_result[vid]
|
| 426 |
-
sig = v.get("clinical_significance", {})
|
| 427 |
-
if isinstance(sig, dict):
|
| 428 |
-
sig_str = sig.get("description", "Unknown")
|
| 429 |
-
else:
|
| 430 |
-
sig_str = str(sig)
|
| 431 |
-
cv_rows.append(
|
| 432 |
-
f"- **ClinVar ID {vid}**: {v.get('title','N/A')} | "
|
| 433 |
-
f"Classification: **{sig_str}**"
|
| 434 |
-
)
|
| 435 |
-
if cv_rows:
|
| 436 |
-
result_parts.append("### ClinVar Results\n" + "\n".join(cv_rows))
|
| 437 |
-
else:
|
| 438 |
-
result_parts.append("### ClinVar\nVariant found in index but summary unavailable.")
|
| 439 |
-
except Exception:
|
| 440 |
-
result_parts.append("### ClinVar\nData unavailable — API error.")
|
| 441 |
-
else:
|
| 442 |
-
result_parts.append(
|
| 443 |
-
"### ClinVar\n"
|
| 444 |
-
"**Not found in ClinVar database.**\n"
|
| 445 |
-
"> ⚠️ Not in database. Do not interpret."
|
| 446 |
-
)
|
| 447 |
-
|
| 448 |
-
gnomad_cached = cache_get("gnomad", hgvs)
|
| 449 |
-
if gnomad_cached is None:
|
| 450 |
-
try:
|
| 451 |
-
gql = """
|
| 452 |
-
query VariantSearch($query: String!, $dataset: DatasetId!) {
|
| 453 |
-
variantSearch(query: $query, dataset: $dataset) {
|
| 454 |
-
variant_id
|
| 455 |
-
rsids
|
| 456 |
-
exome { af }
|
| 457 |
-
genome { af }
|
| 458 |
-
}
|
| 459 |
-
}
|
| 460 |
-
"""
|
| 461 |
-
r3 = requests.post(
|
| 462 |
-
GNOMAD_GQL,
|
| 463 |
-
json={"query": gql, "variables": {"query": hgvs, "dataset": "gnomad_r4"}},
|
| 464 |
-
timeout=15
|
| 465 |
-
)
|
| 466 |
-
r3.raise_for_status()
|
| 467 |
-
gnomad_cached = r3.json()
|
| 468 |
-
cache_set("gnomad", hgvs, gnomad_cached)
|
| 469 |
-
except Exception:
|
| 470 |
-
gnomad_cached = None
|
| 471 |
-
|
| 472 |
-
if gnomad_cached and "data" in gnomad_cached:
|
| 473 |
-
variants = gnomad_cached["data"].get("variantSearch", [])
|
| 474 |
-
if variants:
|
| 475 |
-
gn_rows = []
|
| 476 |
-
for v in variants[:3]:
|
| 477 |
-
vid = v.get("variant_id", "N/A")
|
| 478 |
-
rsids = ", ".join(v.get("rsids", [])) or "N/A"
|
| 479 |
-
exome_af = v.get("exome", {}) or {}
|
| 480 |
-
genome_af = v.get("genome", {}) or {}
|
| 481 |
-
af_e = exome_af.get("af", "N/A")
|
| 482 |
-
af_g = genome_af.get("af", "N/A")
|
| 483 |
-
gn_rows.append(
|
| 484 |
-
f"- **{vid}** (rsID: {rsids}) | "
|
| 485 |
-
f"Exome AF: {af_e} | Genome AF: {af_g}"
|
| 486 |
-
)
|
| 487 |
-
result_parts.append("### gnomAD v4 Results\n" + "\n".join(gn_rows))
|
| 488 |
-
else:
|
| 489 |
-
result_parts.append(
|
| 490 |
-
"### gnomAD v4\n"
|
| 491 |
-
"**Not found in gnomAD.**\n"
|
| 492 |
-
"> ⚠️ Not in database. Do not interpret."
|
| 493 |
-
)
|
| 494 |
else:
|
| 495 |
-
|
| 496 |
-
|
| 497 |
-
|
| 498 |
-
|
| 499 |
-
)
|
| 500 |
-
|
| 501 |
-
|
| 502 |
-
|
| 503 |
-
|
| 504 |
-
|
| 505 |
-
|
| 506 |
-
|
| 507 |
-
|
| 508 |
-
|
| 509 |
-
|
| 510 |
-
|
| 511 |
-
|
| 512 |
-
|
| 513 |
-
|
| 514 |
-
|
| 515 |
-
|
| 516 |
-
|
| 517 |
-
|
| 518 |
-
counts = []
|
| 519 |
-
|
| 520 |
-
for yr in years:
|
| 521 |
-
q = f'"{keyword}" AND "{cancer_type}"[tiab] AND {yr}[pdat]'
|
| 522 |
-
n = pubmed_count(q)
|
| 523 |
-
counts.append(max(n, 0))
|
| 524 |
-
|
| 525 |
-
avg = np.mean([c for c in counts if c > 0]) if any(c > 0 for c in counts) else 0
|
| 526 |
-
gaps = [yr for yr, c in zip(years, counts) if c == 0]
|
| 527 |
-
low_years = [yr for yr, c in zip(years, counts) if 0 < c < avg * 0.3]
|
| 528 |
-
|
| 529 |
-
fig, ax = plt.subplots(figsize=(9, 4), facecolor="white")
|
| 530 |
-
bar_colors = []
|
| 531 |
-
for c in counts:
|
| 532 |
-
if c == 0:
|
| 533 |
-
bar_colors.append("#d73027")
|
| 534 |
-
elif c < avg * 0.3:
|
| 535 |
-
bar_colors.append("#fc8d59")
|
| 536 |
-
else:
|
| 537 |
-
bar_colors.append("#4393c3")
|
| 538 |
-
|
| 539 |
-
ax.bar(years, counts, color=bar_colors, edgecolor="white", linewidth=0.5)
|
| 540 |
-
ax.axhline(avg, color="#555", linestyle="--", linewidth=1, label=f"Avg: {avg:.1f}")
|
| 541 |
-
ax.set_xlabel("Year", fontsize=11)
|
| 542 |
-
ax.set_ylabel("PubMed Papers", fontsize=11)
|
| 543 |
-
ax.set_title(f'Literature Trend: "{keyword}" in {cancer_type}', fontsize=12)
|
| 544 |
-
ax.set_xticks(years)
|
| 545 |
-
ax.set_xticklabels([str(y) for y in years], rotation=45, ha="right")
|
| 546 |
-
ax.legend(fontsize=9)
|
| 547 |
-
ax.set_facecolor("white")
|
| 548 |
-
fig.tight_layout()
|
| 549 |
-
|
| 550 |
-
buf = io.BytesIO()
|
| 551 |
-
fig.savefig(buf, format="png", dpi=150, facecolor="white")
|
| 552 |
-
buf.seek(0)
|
| 553 |
-
img = Image.open(buf)
|
| 554 |
-
plt.close(fig)
|
| 555 |
-
|
| 556 |
-
gap_text = []
|
| 557 |
-
if gaps:
|
| 558 |
-
gap_text.append(f"**Zero-publication years:** {', '.join(map(str, gaps))}")
|
| 559 |
-
if low_years:
|
| 560 |
-
gap_text.append(f"**Low-activity years (<30% avg):** {', '.join(map(str, low_years))}")
|
| 561 |
-
if not gaps and not low_years:
|
| 562 |
-
gap_text.append("No significant gaps detected in the last 10 years.")
|
| 563 |
-
|
| 564 |
-
summary = "\n\n".join(gap_text)
|
| 565 |
-
summary += f"\n\n*Source: PubMed E-utilities | Date: {today}*"
|
| 566 |
-
journal_log("A4-LitGap", f"cancer={cancer_type}, kw={keyword}", summary[:100])
|
| 567 |
-
return img, summary
|
| 568 |
-
|
| 569 |
-
|
| 570 |
-
# ─────────────────────────────────────────────
|
| 571 |
-
# TAB A5 — DRUGGABLE ORPHANS
|
| 572 |
-
# ─────────────────────────────────────────────
|
| 573 |
-
|
| 574 |
-
def a5_run(cancer_type: str):
|
| 575 |
-
today = datetime.date.today().isoformat()
|
| 576 |
-
efo = CANCER_EFO.get(cancer_type, "")
|
| 577 |
-
|
| 578 |
-
gql = """
|
| 579 |
-
query DruggableTargets($efoId: String!, $size: Int!) {
|
| 580 |
-
disease(efoId: $efoId) {
|
| 581 |
-
associatedTargets(page: {index: 0, size: $size}) {
|
| 582 |
-
rows {
|
| 583 |
-
target {
|
| 584 |
-
approvedSymbol
|
| 585 |
-
approvedName
|
| 586 |
-
tractability {
|
| 587 |
-
label
|
| 588 |
-
modality
|
| 589 |
-
value
|
| 590 |
-
}
|
| 591 |
-
knownDrugs {
|
| 592 |
-
count
|
| 593 |
-
}
|
| 594 |
-
}
|
| 595 |
-
score
|
| 596 |
-
}
|
| 597 |
-
}
|
| 598 |
-
}
|
| 599 |
-
}
|
| 600 |
-
"""
|
| 601 |
-
ot_data = ot_query(gql, {"efoId": efo, "size": 50})
|
| 602 |
-
rows_ot = []
|
| 603 |
-
try:
|
| 604 |
-
rows_ot = ot_data["data"]["disease"]["associatedTargets"]["rows"]
|
| 605 |
-
except (KeyError, TypeError):
|
| 606 |
-
pass
|
| 607 |
-
|
| 608 |
-
if not rows_ot:
|
| 609 |
-
return None, f"⚠️ OpenTargets returned no data for {cancer_type}.\n\n*Source: OpenTargets | Date: {today}*"
|
| 610 |
-
|
| 611 |
-
orphan_candidates = []
|
| 612 |
-
for row in rows_ot:
|
| 613 |
-
t = row["target"]
|
| 614 |
-
gene = t["approvedSymbol"]
|
| 615 |
-
drug_count = 0
|
| 616 |
-
try:
|
| 617 |
-
drug_count = t["knownDrugs"]["count"] or 0
|
| 618 |
-
except (KeyError, TypeError):
|
| 619 |
-
drug_count = 0
|
| 620 |
-
if drug_count == 0:
|
| 621 |
-
orphan_candidates.append({"gene": gene, "name": t.get("approvedName", ""), "ot_score": row["score"]})
|
| 622 |
-
|
| 623 |
-
records = []
|
| 624 |
-
for cand in orphan_candidates[:15]:
|
| 625 |
-
gene = cand["gene"]
|
| 626 |
-
cached = cache_get("ct_orphan", f"{gene}_{cancer_type}")
|
| 627 |
-
if cached is not None:
|
| 628 |
-
trial_count = cached
|
| 629 |
-
else:
|
| 630 |
-
try:
|
| 631 |
-
r = requests.get(
|
| 632 |
-
f"{CT_BASE}/studies",
|
| 633 |
-
params={"query.term": f"{gene} {cancer_type}", "pageSize": 1, "format": "json"},
|
| 634 |
-
timeout=10
|
| 635 |
-
)
|
| 636 |
-
r.raise_for_status()
|
| 637 |
-
trial_count = r.json().get("totalCount", 0)
|
| 638 |
-
cache_set("ct_orphan", f"{gene}_{cancer_type}", trial_count)
|
| 639 |
-
except Exception:
|
| 640 |
-
trial_count = -1
|
| 641 |
-
|
| 642 |
-
records.append({
|
| 643 |
-
"Gene": gene,
|
| 644 |
-
"Name": cand["name"][:50],
|
| 645 |
-
"OT_Score": round(cand["ot_score"], 3),
|
| 646 |
-
"Known_Drugs": 0,
|
| 647 |
-
"Active_Trials": trial_count if trial_count >= 0 else "N/A",
|
| 648 |
-
"Status": "🔴 Orphan" if trial_count == 0 else ("⚠️ Trials only" if trial_count > 0 else "❓ Unknown")
|
| 649 |
-
})
|
| 650 |
-
|
| 651 |
-
df = pd.DataFrame(records)
|
| 652 |
-
note = (
|
| 653 |
-
f"*Source: OpenTargets GraphQL + ClinicalTrials.gov v2 | Date: {today}*\n\n"
|
| 654 |
-
f"*Orphan = no approved drug (OpenTargets knownDrugs.count = 0)*"
|
| 655 |
)
|
| 656 |
-
journal_log("A5-DruggableOrphans", f"cancer={cancer_type}", f"orphans={len(df)}")
|
| 657 |
-
return df, note
|
| 658 |
-
|
| 659 |
-
|
| 660 |
-
# ─────────────────────────────────────────────
|
| 661 |
-
# GROUP B — LEARNING SANDBOX
|
| 662 |
-
# ─────────────────────────────────────────────
|
| 663 |
-
|
| 664 |
-
SIMULATED_BANNER = (
|
| 665 |
-
"⚠️ **SIMULATED DATA** — This tab uses rule-based models and synthetic data "
|
| 666 |
-
"for educational purposes only. Results do NOT reflect real experimental outcomes."
|
| 667 |
-
)
|
| 668 |
-
|
| 669 |
-
# ── TAB B1 — miRNA Explorer ──────────────────
|
| 670 |
-
|
| 671 |
-
MIRNA_DB = {
|
| 672 |
-
"BRCA2": {
|
| 673 |
-
"miRNAs": ["miR-146a-5p", "miR-21-5p", "miR-155-5p", "miR-182-5p", "miR-205-5p"],
|
| 674 |
-
"binding_energy": [-18.4, -15.2, -12.7, -14.1, -16.8],
|
| 675 |
-
"seed_match": ["7mer-m8", "6mer", "7mer-A1", "8mer", "7mer-m8"],
|
| 676 |
-
"expression_change": [-2.1, +1.8, +2.3, -1.5, -3.2],
|
| 677 |
-
"cancer_context": "BRCA2 loss-of-function is associated with HR-deficient breast/ovarian cancer. "
|
| 678 |
-
"miR-146a-5p and miR-205-5p are frequently downregulated in BRCA2-mutant tumors.",
|
| 679 |
-
},
|
| 680 |
-
"BRCA1": {
|
| 681 |
-
"miRNAs": ["miR-17-5p", "miR-20a-5p", "miR-93-5p", "miR-182-5p", "miR-9-5p"],
|
| 682 |
-
"binding_energy": [-16.1, -13.5, -14.9, -15.3, -11.8],
|
| 683 |
-
"seed_match": ["8mer", "7mer-m8", "7mer-A1", "8mer", "6mer"],
|
| 684 |
-
"expression_change": [+1.9, +2.1, +1.6, -1.8, +2.4],
|
| 685 |
-
"cancer_context": "BRCA1 regulates DNA damage response. miR-17/20a cluster is upregulated "
|
| 686 |
-
"in BRCA1-deficient tumors and suppresses apoptosis.",
|
| 687 |
-
},
|
| 688 |
-
"TP53": {
|
| 689 |
-
"miRNAs": ["miR-34a-5p", "miR-125b-5p", "miR-504-5p", "miR-25-3p", "miR-30d-5p"],
|
| 690 |
-
"binding_energy": [-19.2, -14.6, -13.1, -12.4, -15.7],
|
| 691 |
-
"seed_match": ["8mer", "7mer-m8", "7mer-A1", "6mer", "8mer"],
|
| 692 |
-
"expression_change": [-3.5, +1.2, +1.7, +2.0, -1.3],
|
| 693 |
-
"cancer_context": "TP53 is the most mutated gene in cancer. miR-34a is a direct p53 transcriptional "
|
| 694 |
-
"target; its loss promotes tumor progression across cancer types.",
|
| 695 |
-
},
|
| 696 |
-
}
|
| 697 |
-
|
| 698 |
-
def b1_run(gene: str):
|
| 699 |
-
db = MIRNA_DB.get(gene, {})
|
| 700 |
-
if not db:
|
| 701 |
-
return None, "Gene not found in simulation database."
|
| 702 |
-
|
| 703 |
-
mirnas = db["miRNAs"]
|
| 704 |
-
energies = db["binding_energy"]
|
| 705 |
-
changes = db["expression_change"]
|
| 706 |
-
seeds = db["seed_match"]
|
| 707 |
-
|
| 708 |
-
fig, axes = plt.subplots(1, 2, figsize=(11, 4), facecolor="white")
|
| 709 |
-
|
| 710 |
-
colors_e = ["#d73027" if e < -16 else "#fc8d59" if e < -13 else "#4393c3" for e in energies]
|
| 711 |
-
axes[0].barh(mirnas, [-e for e in energies], color=colors_e, edgecolor="white")
|
| 712 |
-
axes[0].set_xlabel("Binding Energy (|kcal/mol|)", fontsize=10)
|
| 713 |
-
axes[0].set_title(f"Predicted Binding Energy\n{gene} miRNA targets", fontsize=10)
|
| 714 |
-
axes[0].set_facecolor("white")
|
| 715 |
-
|
| 716 |
-
colors_x = ["#d73027" if c < 0 else "#4393c3" for c in changes]
|
| 717 |
-
axes[1].barh(mirnas, changes, color=colors_x, edgecolor="white")
|
| 718 |
-
axes[1].axvline(0, color="black", linewidth=0.8)
|
| 719 |
-
axes[1].set_xlabel("Expression Change (log2FC)", fontsize=10)
|
| 720 |
-
axes[1].set_title(f"miRNA Expression in {gene}-mutant tumors\n(⚠️ SIMULATED)", fontsize=10)
|
| 721 |
-
axes[1].set_facecolor("white")
|
| 722 |
|
| 723 |
-
|
| 724 |
-
|
| 725 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 726 |
buf.seek(0)
|
| 727 |
-
|
| 728 |
-
|
| 729 |
-
|
| 730 |
-
|
| 731 |
-
"
|
| 732 |
-
"
|
| 733 |
-
|
| 734 |
-
|
| 735 |
-
|
| 736 |
-
|
| 737 |
-
|
| 738 |
-
|
| 739 |
-
|
| 740 |
-
|
| 741 |
-
|
| 742 |
-
|
| 743 |
-
|
| 744 |
-
|
| 745 |
-
|
| 746 |
-
|
| 747 |
-
|
| 748 |
-
|
| 749 |
-
|
| 750 |
-
"
|
| 751 |
-
|
| 752 |
-
|
| 753 |
-
|
| 754 |
-
|
| 755 |
-
|
| 756 |
-
"
|
| 757 |
-
|
| 758 |
-
|
| 759 |
-
|
| 760 |
-
|
| 761 |
-
},
|
| 762 |
-
}
|
| 763 |
-
|
| 764 |
-
def b2_run(cancer: str):
|
| 765 |
-
db = SIRNA_DB.get(cancer, {})
|
| 766 |
-
if not db:
|
| 767 |
-
return None, "Cancer type not in simulation database."
|
| 768 |
-
|
| 769 |
-
targets = db["targets"]
|
| 770 |
-
efficacy = db["efficacy"]
|
| 771 |
-
off_risk = db["off_target_risk"]
|
| 772 |
-
delivery = db["delivery_challenge"]
|
| 773 |
-
|
| 774 |
-
fig, ax = plt.subplots(figsize=(8, 4), facecolor="white")
|
| 775 |
-
risk_color = {"Low": "#4393c3", "Medium": "#fc8d59", "High": "#d73027"}
|
| 776 |
-
colors = [risk_color.get(r, "#aaa") for r in off_risk]
|
| 777 |
-
ax.barh(targets, efficacy, color=colors, edgecolor="white")
|
| 778 |
-
ax.set_xlim(0, 1.1)
|
| 779 |
-
ax.set_xlabel("Predicted siRNA Efficacy (⚠️ SIMULATED)", fontsize=10)
|
| 780 |
-
ax.set_title(f"siRNA Target Efficacy — {cancer}", fontsize=11)
|
| 781 |
-
ax.set_facecolor("white")
|
| 782 |
-
from matplotlib.patches import Patch
|
| 783 |
-
legend_elements = [Patch(facecolor=v, label=k) for k, v in risk_color.items()]
|
| 784 |
-
ax.legend(handles=legend_elements, title="Off-target Risk", fontsize=8, loc="lower right")
|
| 785 |
-
fig.tight_layout()
|
| 786 |
-
|
| 787 |
-
buf = io.BytesIO()
|
| 788 |
-
fig.savefig(buf, format="png", dpi=150, facecolor="white")
|
| 789 |
buf.seek(0)
|
| 790 |
-
|
| 791 |
-
|
| 792 |
-
|
| 793 |
-
|
| 794 |
-
|
| 795 |
-
|
| 796 |
-
|
| 797 |
-
|
| 798 |
-
|
| 799 |
-
|
| 800 |
-
|
| 801 |
-
|
| 802 |
-
|
| 803 |
-
|
| 804 |
-
|
| 805 |
-
|
| 806 |
-
|
| 807 |
-
|
| 808 |
-
|
| 809 |
-
|
| 810 |
-
|
| 811 |
-
|
| 812 |
-
|
| 813 |
-
|
| 814 |
-
|
| 815 |
-
"Clusterin": max(0, 0.08 + peg_norm * 0.15),
|
| 816 |
-
"IgG": max(0, 0.07 + serum_pct * 0.001),
|
| 817 |
-
"Complement C3": max(0, 0.05 + ionizable_pct * 0.003 - peg_norm * 0.1),
|
| 818 |
-
}
|
| 819 |
-
total = sum(corona_proteins.values())
|
| 820 |
-
if total > 0:
|
| 821 |
-
corona_proteins = {k: v / total for k, v in corona_proteins.items()}
|
| 822 |
-
|
| 823 |
-
fig, axes = plt.subplots(1, 2, figsize=(11, 4), facecolor="white")
|
| 824 |
-
|
| 825 |
-
labels = list(corona_proteins.keys())
|
| 826 |
-
sizes = list(corona_proteins.values())
|
| 827 |
-
colors_pie = plt.cm.Set2(np.linspace(0, 1, len(labels)))
|
| 828 |
-
axes[0].pie(sizes, labels=labels, colors=colors_pie, autopct="%1.1f%%", startangle=90)
|
| 829 |
-
axes[0].set_title("Predicted Corona Composition\n(⚠️ SIMULATED)", fontsize=10)
|
| 830 |
-
|
| 831 |
-
axes[1].bar(labels, sizes, color=colors_pie, edgecolor="white")
|
| 832 |
-
axes[1].set_ylabel("Relative Abundance", fontsize=10)
|
| 833 |
-
axes[1].set_title("Corona Protein Fractions", fontsize=10)
|
| 834 |
-
axes[1].set_xticklabels(labels, rotation=45, ha="right", fontsize=8)
|
| 835 |
-
axes[1].set_facecolor("white")
|
| 836 |
-
|
| 837 |
-
fig.tight_layout()
|
| 838 |
-
buf = io.BytesIO()
|
| 839 |
-
fig.savefig(buf, format="png", dpi=150, facecolor="white")
|
| 840 |
buf.seek(0)
|
| 841 |
-
|
| 842 |
-
|
| 843 |
-
|
| 844 |
-
|
| 845 |
-
|
| 846 |
-
|
| 847 |
-
|
| 848 |
-
|
| 849 |
-
|
| 850 |
-
|
| 851 |
-
|
| 852 |
-
|
| 853 |
-
|
| 854 |
-
|
| 855 |
-
|
| 856 |
-
|
| 857 |
-
|
| 858 |
-
|
| 859 |
-
|
| 860 |
-
|
| 861 |
-
|
| 862 |
-
|
| 863 |
-
|
| 864 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 865 |
|
| 866 |
-
|
| 867 |
-
|
| 868 |
-
|
| 869 |
-
|
| 870 |
-
|
| 871 |
-
|
| 872 |
-
|
| 873 |
-
|
| 874 |
-
|
| 875 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 876 |
|
| 877 |
-
|
| 878 |
-
fig.savefig(buf, format="png", dpi=150, facecolor="white")
|
| 879 |
-
buf.seek(0)
|
| 880 |
-
img = Image.open(buf)
|
| 881 |
-
plt.close(fig)
|
| 882 |
|
| 883 |
-
|
| 884 |
-
|
| 885 |
-
|
| 886 |
-
"
|
| 887 |
-
"
|
| 888 |
)
|
| 889 |
-
journal_log("B4-FlowCorona", f"kon_alb={kon_albumin},kon_apoe={kon_apoe}", note[:80])
|
| 890 |
-
return img, note
|
| 891 |
-
|
| 892 |
|
| 893 |
-
|
| 894 |
-
|
| 895 |
-
|
| 896 |
-
|
| 897 |
-
|
| 898 |
-
|
| 899 |
-
|
| 900 |
-
|
| 901 |
-
|
| 902 |
-
|
| 903 |
-
|
| 904 |
-
|
| 905 |
-
|
| 906 |
-
|
| 907 |
-
|
| 908 |
-
|
| 909 |
-
|
| 910 |
-
|
| 911 |
-
|
| 912 |
-
|
| 913 |
-
|
| 914 |
-
|
| 915 |
-
|
| 916 |
-
|
| 917 |
-
|
| 918 |
-
|
| 919 |
-
|
| 920 |
-
|
| 921 |
-
|
| 922 |
-
|
| 923 |
-
|
| 924 |
-
|
| 925 |
-
|
| 926 |
-
|
| 927 |
-
|
| 928 |
-
|
| 929 |
-
|
| 930 |
-
|
| 931 |
-
|
| 932 |
-
|
| 933 |
-
|
| 934 |
-
|
| 935 |
-
|
| 936 |
-
|
| 937 |
-
|
| 938 |
-
|
| 939 |
-
|
| 940 |
-
|
| 941 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 942 |
)
|
| 943 |
-
journal_log("B5-VariantConcepts", f"class={classification}", output[:100])
|
| 944 |
-
return output
|
| 945 |
-
|
| 946 |
-
|
| 947 |
-
# ─────────────────────────────────────────────
|
| 948 |
-
# GRADIO UI ASSEMBLY
|
| 949 |
-
# ─────────────────────────────────────────────
|
| 950 |
-
|
| 951 |
-
CUSTOM_CSS = """
|
| 952 |
-
body { font-family: 'Inter', sans-serif; }
|
| 953 |
-
.simulated-banner {
|
| 954 |
-
background: #fff3cd; border: 1px solid #ffc107;
|
| 955 |
-
border-radius: 6px; padding: 10px 14px;
|
| 956 |
-
font-weight: 600; color: #856404; margin-bottom: 8px;
|
| 957 |
-
}
|
| 958 |
-
.source-note { color: #6c757d; font-size: 0.85em; margin-top: 6px; }
|
| 959 |
-
.gap-card {
|
| 960 |
-
background: #f8f9fa; border-left: 4px solid #d73027;
|
| 961 |
-
padding: 10px 14px; margin: 6px 0; border-radius: 4px;
|
| 962 |
-
}
|
| 963 |
-
footer { display: none !important; }
|
| 964 |
-
"""
|
| 965 |
-
|
| 966 |
-
|
| 967 |
-
def build_app():
|
| 968 |
-
with gr.Blocks(css=CUSTOM_CSS, title="K R&D Lab — Cancer Research Suite") as demo:
|
| 969 |
-
gr.Markdown(
|
| 970 |
-
"# 🔬 K R&D Lab — Cancer Research Suite\n"
|
| 971 |
-
"**Author:** Oksana Kolisnyk | [kosatiks-group.pp.ua](https://kosatiks-group.pp.ua) \n"
|
| 972 |
-
"**Repo:** [github.com/TEZv/K-RnD-Lab-PHYLO-03_2026](https://github.com/TEZv/K-RnD-Lab-PHYLO-03_2026)"
|
| 973 |
-
)
|
| 974 |
-
|
| 975 |
-
with gr.Row():
|
| 976 |
-
with gr.Column(scale=4):
|
| 977 |
-
with gr.Tabs():
|
| 978 |
-
|
| 979 |
-
# ════════════════════════════════
|
| 980 |
-
# GROUP A — REAL DATA TOOLS
|
| 981 |
-
# ════════════════════════════════
|
| 982 |
-
with gr.Tab("🔬 Real Data Tools"):
|
| 983 |
-
with gr.Tabs():
|
| 984 |
-
|
| 985 |
-
with gr.Tab("🔍 Gray Zones Explorer"):
|
| 986 |
-
gr.Markdown(
|
| 987 |
-
"Identify underexplored biological processes in a cancer type "
|
| 988 |
-
"using live PubMed + OpenTargets data."
|
| 989 |
-
)
|
| 990 |
-
a1_cancer = gr.Dropdown(CANCER_TYPES, label="Cancer Type", value="GBM")
|
| 991 |
-
a1_btn = gr.Button("🔍 Explore Gray Zones", variant="primary")
|
| 992 |
-
a1_heatmap = gr.Image(label="Research Coverage Heatmap", type="pil")
|
| 993 |
-
a1_gaps = gr.Markdown(label="Top 5 Research Gaps")
|
| 994 |
-
with gr.Accordion("📖 Learning Mode", open=False):
|
| 995 |
-
gr.Markdown(
|
| 996 |
-
"**What is a research gray zone?**\n\n"
|
| 997 |
-
"A gray zone is a biological process that is well-studied in other cancers "
|
| 998 |
-
"but has very few publications in your selected cancer type. "
|
| 999 |
-
"Low paper counts (red/white cells) indicate potential unexplored territory.\n\n"
|
| 1000 |
-
"**How to use:** Select a rare cancer (e.g. DIPG, MCC) to find the most "
|
| 1001 |
-
"underexplored processes. Cross-reference with Tab A2 to find targetable genes."
|
| 1002 |
-
)
|
| 1003 |
-
a1_btn.click(a1_run, inputs=[a1_cancer], outputs=[a1_heatmap, a1_gaps])
|
| 1004 |
-
|
| 1005 |
-
with gr.Tab("🎯 Understudied Target Finder"):
|
| 1006 |
-
gr.Markdown(
|
| 1007 |
-
"Find essential genes with high research gap index "
|
| 1008 |
-
"(high essentiality, low publication coverage)."
|
| 1009 |
-
)
|
| 1010 |
-
gr.Markdown(
|
| 1011 |
-
"> ⚠️ **Essentiality scores are placeholder estimates** from a "
|
| 1012 |
-
"curated reference gene set — **not real DepMap data**. "
|
| 1013 |
-
"Association scores and paper/trial counts are fetched live. "
|
| 1014 |
-
"For real essentiality values, download `CRISPR_gene_effect.csv` "
|
| 1015 |
-
"from [depmap.org](https://depmap.org/portal/download/all/) and "
|
| 1016 |
-
"replace `_load_depmap_sample()` in `app.py`."
|
| 1017 |
-
)
|
| 1018 |
-
a2_cancer = gr.Dropdown(CANCER_TYPES, label="Cancer Type", value="GBM")
|
| 1019 |
-
a2_btn = gr.Button("🎯 Find Understudied Targets", variant="primary")
|
| 1020 |
-
a2_table = gr.Dataframe(label="Target Gap Table", wrap=True)
|
| 1021 |
-
a2_note = gr.Markdown()
|
| 1022 |
-
with gr.Accordion("📖 Learning Mode", open=False):
|
| 1023 |
-
gr.Markdown(
|
| 1024 |
-
"**Gap Index formula:** `essentiality / log(papers + 1)`\n\n"
|
| 1025 |
-
"- **Essentiality**: inverted DepMap CRISPR gene effect score\n"
|
| 1026 |
-
"- **Papers**: PubMed count for gene + cancer type\n"
|
| 1027 |
-
"- **High Gap Index** = essential gene with few publications = high research opportunity"
|
| 1028 |
-
)
|
| 1029 |
-
a2_btn.click(a2_run, inputs=[a2_cancer], outputs=[a2_table, a2_note])
|
| 1030 |
-
|
| 1031 |
-
with gr.Tab("🧬 Real Variant Lookup"):
|
| 1032 |
-
gr.Markdown(
|
| 1033 |
-
"Look up a variant in **ClinVar** and **gnomAD**. "
|
| 1034 |
-
"Results are fetched live — never hallucinated."
|
| 1035 |
-
)
|
| 1036 |
-
a3_hgvs = gr.Textbox(
|
| 1037 |
-
label="HGVS Notation",
|
| 1038 |
-
placeholder="e.g. NM_007294.4:c.5266dupC or NM_000546.6:c.524G>A",
|
| 1039 |
-
lines=1
|
| 1040 |
-
)
|
| 1041 |
-
a3_btn = gr.Button("🔎 Look Up Variant", variant="primary")
|
| 1042 |
-
a3_result = gr.Markdown()
|
| 1043 |
-
with gr.Accordion("📖 Learning Mode", open=False):
|
| 1044 |
-
gr.Markdown(
|
| 1045 |
-
"**HGVS notation format:**\n"
|
| 1046 |
-
"- `NM_XXXXXX.X:c.NNNN[change]` — coding DNA reference\n"
|
| 1047 |
-
"- `NC_XXXXXX.X:g.NNNN[change]` — genomic reference\n\n"
|
| 1048 |
-
"**Important:** If a variant is not found, this tool returns "
|
| 1049 |
-
"'Not in database. Do not interpret.' — never a fabricated result."
|
| 1050 |
-
)
|
| 1051 |
-
a3_btn.click(a3_run, inputs=[a3_hgvs], outputs=[a3_result])
|
| 1052 |
-
|
| 1053 |
-
with gr.Tab("📰 Literature Gap Finder"):
|
| 1054 |
-
gr.Markdown(
|
| 1055 |
-
"Visualize publication trends over 10 years and detect "
|
| 1056 |
-
"years with low research activity."
|
| 1057 |
-
)
|
| 1058 |
-
with gr.Row():
|
| 1059 |
-
a4_cancer = gr.Dropdown(CANCER_TYPES, label="Cancer Type", value="GBM")
|
| 1060 |
-
a4_kw = gr.Textbox(label="Keyword", placeholder="e.g. ferroptosis", lines=1)
|
| 1061 |
-
a4_btn = gr.Button("📊 Analyze Literature Trend", variant="primary")
|
| 1062 |
-
a4_chart = gr.Image(label="Papers per Year", type="pil")
|
| 1063 |
-
a4_gaps = gr.Markdown()
|
| 1064 |
-
with gr.Accordion("📖 Learning Mode", open=False):
|
| 1065 |
-
gr.Markdown(
|
| 1066 |
-
"**How to read the chart:**\n"
|
| 1067 |
-
"- 🔵 Blue bars = normal activity\n"
|
| 1068 |
-
"- 🟠 Orange bars = low activity (<30% of average)\n"
|
| 1069 |
-
"- 🔴 Red bars = zero publications (true gap)"
|
| 1070 |
-
)
|
| 1071 |
-
a4_btn.click(a4_run, inputs=[a4_cancer, a4_kw], outputs=[a4_chart, a4_gaps])
|
| 1072 |
-
|
| 1073 |
-
with gr.Tab("💊 Druggable Orphans"):
|
| 1074 |
-
gr.Markdown(
|
| 1075 |
-
"Identify cancer-associated essential genes with **no approved drug** "
|
| 1076 |
-
"and **no active clinical trial**."
|
| 1077 |
-
)
|
| 1078 |
-
a5_cancer = gr.Dropdown(CANCER_TYPES, label="Cancer Type", value="GBM")
|
| 1079 |
-
a5_btn = gr.Button("💊 Find Druggable Orphans", variant="primary")
|
| 1080 |
-
a5_table = gr.Dataframe(label="Orphan Target Table", wrap=True)
|
| 1081 |
-
a5_note = gr.Markdown()
|
| 1082 |
-
with gr.Accordion("📖 Learning Mode", open=False):
|
| 1083 |
-
gr.Markdown(
|
| 1084 |
-
"**What is a druggable orphan?**\n\n"
|
| 1085 |
-
"A gene that is strongly associated with a cancer but has no approved drug "
|
| 1086 |
-
"and no active clinical trial. These represent the highest-opportunity "
|
| 1087 |
-
"targets for drug discovery."
|
| 1088 |
-
)
|
| 1089 |
-
a5_btn.click(a5_run, inputs=[a5_cancer], outputs=[a5_table, a5_note])
|
| 1090 |
-
|
| 1091 |
-
with gr.Tab("🤖 Research Assistant"):
|
| 1092 |
-
gr.Markdown(
|
| 1093 |
-
"**RAG-powered research assistant** indexed on 20 curated papers "
|
| 1094 |
-
"on LNP delivery, protein corona, and cancer variants.\n\n"
|
| 1095 |
-
"*Powered by sentence-transformers + FAISS — no API key required.*"
|
| 1096 |
-
)
|
| 1097 |
-
try:
|
| 1098 |
-
from chatbot import build_chatbot_tab
|
| 1099 |
-
build_chatbot_tab()
|
| 1100 |
-
except ImportError:
|
| 1101 |
-
gr.Markdown(
|
| 1102 |
-
"⚠️ `chatbot.py` not found. Please ensure it is in the same directory as `app.py`."
|
| 1103 |
-
)
|
| 1104 |
-
|
| 1105 |
-
# ════════════════════════════════
|
| 1106 |
-
# GROUP B — LEARNING SANDBOX
|
| 1107 |
-
# ════════════════════════════════
|
| 1108 |
-
with gr.Tab("📚 Learning Sandbox"):
|
| 1109 |
-
gr.Markdown(
|
| 1110 |
-
"> ⚠️ **ALL TABS IN THIS GROUP USE SIMULATED DATA** — "
|
| 1111 |
-
"For educational purposes only. Results do not reflect real experiments."
|
| 1112 |
-
)
|
| 1113 |
-
with gr.Tabs():
|
| 1114 |
-
|
| 1115 |
-
with gr.Tab("🧬 miRNA Explorer"):
|
| 1116 |
-
gr.Markdown(SIMULATED_BANNER)
|
| 1117 |
-
b1_gene = gr.Dropdown(["BRCA2", "BRCA1", "TP53"], label="Gene", value="TP53")
|
| 1118 |
-
b1_btn = gr.Button("🔬 Explore miRNA Interactions", variant="primary")
|
| 1119 |
-
b1_plot = gr.Image(label="miRNA Binding & Expression (⚠️ SIMULATED)", type="pil")
|
| 1120 |
-
b1_table = gr.Markdown()
|
| 1121 |
-
with gr.Accordion("📖 Learning Mode", open=False):
|
| 1122 |
-
gr.Markdown(
|
| 1123 |
-
"**miRNA biology basics:**\n\n"
|
| 1124 |
-
"- miRNAs are ~22 nt non-coding RNAs that bind 3'UTR of mRNAs\n"
|
| 1125 |
-
"- Seed match types: 8mer > 7mer-m8 > 7mer-A1 > 6mer (binding strength)\n"
|
| 1126 |
-
"- Negative binding energy = stronger predicted interaction"
|
| 1127 |
-
)
|
| 1128 |
-
b1_btn.click(b1_run, inputs=[b1_gene], outputs=[b1_plot, b1_table])
|
| 1129 |
-
|
| 1130 |
-
with gr.Tab("🎯 siRNA Targets"):
|
| 1131 |
-
gr.Markdown(SIMULATED_BANNER)
|
| 1132 |
-
b2_cancer = gr.Dropdown(["LUAD", "BRCA", "COAD"], label="Cancer Type", value="LUAD")
|
| 1133 |
-
b2_btn = gr.Button("🎯 Simulate siRNA Efficacy", variant="primary")
|
| 1134 |
-
b2_plot = gr.Image(label="siRNA Efficacy (⚠️ SIMULATED)", type="pil")
|
| 1135 |
-
b2_table = gr.Markdown()
|
| 1136 |
-
with gr.Accordion("📖 Learning Mode", open=False):
|
| 1137 |
-
gr.Markdown(
|
| 1138 |
-
"**siRNA design principles:**\n\n"
|
| 1139 |
-
"- siRNAs are 21-23 nt dsRNA that trigger RISC-mediated mRNA cleavage\n"
|
| 1140 |
-
"- Off-target risk: seed region complementarity to unintended mRNAs\n"
|
| 1141 |
-
"- Delivery challenge: endosomal escape, serum stability, tumor penetration"
|
| 1142 |
-
)
|
| 1143 |
-
b2_btn.click(b2_run, inputs=[b2_cancer], outputs=[b2_plot, b2_table])
|
| 1144 |
-
|
| 1145 |
-
with gr.Tab("🧪 LNP Corona"):
|
| 1146 |
-
gr.Markdown(SIMULATED_BANNER)
|
| 1147 |
-
with gr.Row():
|
| 1148 |
-
b3_peg = gr.Slider(0.5, 5.0, value=1.5, step=0.1, label="PEG mol% (lipid)")
|
| 1149 |
-
b3_ion = gr.Slider(10, 60, value=50, step=1, label="Ionizable lipid mol%")
|
| 1150 |
-
with gr.Row():
|
| 1151 |
-
b3_helper = gr.Slider(5, 30, value=10, step=1, label="Helper lipid mol%")
|
| 1152 |
-
b3_chol = gr.Slider(10, 50, value=38, step=1, label="Cholesterol mol%")
|
| 1153 |
-
with gr.Row():
|
| 1154 |
-
b3_size = gr.Slider(50, 300, value=100, step=5, label="Particle size (nm)")
|
| 1155 |
-
b3_serum = gr.Slider(0, 100, value=10, step=5, label="Serum % in medium")
|
| 1156 |
-
b3_btn = gr.Button("🧪 Simulate Corona", variant="primary")
|
| 1157 |
-
b3_plot = gr.Image(label="Corona Composition (⚠️ SIMULATED)", type="pil")
|
| 1158 |
-
b3_interp = gr.Markdown()
|
| 1159 |
-
with gr.Accordion("📖 Learning Mode", open=False):
|
| 1160 |
-
gr.Markdown(
|
| 1161 |
-
"**Protein corona basics:**\n\n"
|
| 1162 |
-
"- Hard corona: tightly bound, long-lived proteins (ApoE, fibrinogen)\n"
|
| 1163 |
-
"- Soft corona: loosely bound, rapidly exchanging proteins (albumin)\n"
|
| 1164 |
-
"- ApoE enrichment → enhanced brain targeting via LDLR/LRP1 receptors\n"
|
| 1165 |
-
"- PEG reduces corona formation"
|
| 1166 |
-
)
|
| 1167 |
-
b3_btn.click(
|
| 1168 |
-
b3_run,
|
| 1169 |
-
inputs=[b3_peg, b3_ion, b3_helper, b3_chol, b3_size, b3_serum],
|
| 1170 |
-
outputs=[b3_plot, b3_interp]
|
| 1171 |
-
)
|
| 1172 |
-
|
| 1173 |
-
with gr.Tab("🌊 Flow Corona"):
|
| 1174 |
-
gr.Markdown(SIMULATED_BANNER)
|
| 1175 |
-
with gr.Row():
|
| 1176 |
-
b4_time = gr.Slider(10, 120, value=60, step=5, label="Time range (min)")
|
| 1177 |
-
b4_kon_alb = gr.Slider(0.01, 1.0, value=0.3, step=0.01, label="kon Albumin")
|
| 1178 |
-
with gr.Row():
|
| 1179 |
-
b4_kon_apoe = gr.Slider(0.001, 0.5, value=0.05, step=0.001, label="kon ApoE")
|
| 1180 |
-
b4_koff_alb = gr.Slider(0.01, 1.0, value=0.2, step=0.01, label="koff Albumin")
|
| 1181 |
-
b4_koff_apoe = gr.Slider(0.001, 0.1, value=0.01, step=0.001, label="koff ApoE")
|
| 1182 |
-
b4_btn = gr.Button("🌊 Simulate Vroman Kinetics", variant="primary")
|
| 1183 |
-
b4_plot = gr.Image(label="Vroman Effect (⚠️ SIMULATED)", type="pil")
|
| 1184 |
-
b4_note = gr.Markdown()
|
| 1185 |
-
with gr.Accordion("📖 Learning Mode", open=False):
|
| 1186 |
-
gr.Markdown(
|
| 1187 |
-
"**The Vroman Effect:** Proteins with high abundance but low affinity "
|
| 1188 |
-
"(albumin) adsorb first, then are displaced by lower-abundance but "
|
| 1189 |
-
"higher-affinity proteins (fibrinogen, ApoE).\n\n"
|
| 1190 |
-
"**Clinical implication:** The final hard corona (not initial) determines "
|
| 1191 |
-
"nanoparticle fate in vivo."
|
| 1192 |
-
)
|
| 1193 |
-
b4_btn.click(
|
| 1194 |
-
b4_run,
|
| 1195 |
-
inputs=[b4_time, b4_kon_alb, b4_kon_apoe, b4_koff_alb, b4_koff_apoe],
|
| 1196 |
-
outputs=[b4_plot, b4_note]
|
| 1197 |
-
)
|
| 1198 |
-
|
| 1199 |
-
with gr.Tab("🔬 Variant Concepts"):
|
| 1200 |
-
gr.Markdown(SIMULATED_BANNER)
|
| 1201 |
-
b5_class = gr.Dropdown(
|
| 1202 |
-
list(VARIANT_RULES.keys()),
|
| 1203 |
-
label="ACMG Classification",
|
| 1204 |
-
value="VUS"
|
| 1205 |
-
)
|
| 1206 |
-
b5_btn = gr.Button("📋 Explain Classification", variant="primary")
|
| 1207 |
-
b5_result = gr.Markdown()
|
| 1208 |
-
with gr.Accordion("📖 Learning Mode", open=False):
|
| 1209 |
-
gr.Markdown(
|
| 1210 |
-
"**ACMG/AMP 2015 Classification Framework:**\n\n"
|
| 1211 |
-
"1. **Pathogenic** — strong evidence of disease causation\n"
|
| 1212 |
-
"2. **Likely Pathogenic** — >90% probability pathogenic\n"
|
| 1213 |
-
"3. **VUS** — uncertain significance\n"
|
| 1214 |
-
"4. **Likely Benign** — >90% probability benign\n"
|
| 1215 |
-
"5. **Benign** — strong evidence of no disease effect"
|
| 1216 |
-
)
|
| 1217 |
-
b5_btn.click(b5_run, inputs=[b5_class], outputs=[b5_result])
|
| 1218 |
-
|
| 1219 |
-
# ── SIDEBAR ──
|
| 1220 |
-
with gr.Column(scale=1, min_width=260):
|
| 1221 |
-
gr.Markdown("## 📓 Lab Journal")
|
| 1222 |
-
note_input = gr.Textbox(label="Add note", placeholder="Your observation...", lines=2)
|
| 1223 |
-
save_btn = gr.Button("💾 Save Note", size="sm")
|
| 1224 |
-
refresh_btn = gr.Button("🔄 Refresh Journal", size="sm")
|
| 1225 |
-
journal_display = gr.Markdown(value="*Click Refresh to load entries.*")
|
| 1226 |
-
|
| 1227 |
-
def save_note(note):
|
| 1228 |
-
if note.strip():
|
| 1229 |
-
journal_log("Manual", "note", note.strip(), note.strip())
|
| 1230 |
-
return journal_read()
|
| 1231 |
-
|
| 1232 |
-
save_btn.click(save_note, inputs=[note_input], outputs=[journal_display])
|
| 1233 |
-
refresh_btn.click(lambda: journal_read(), outputs=[journal_display])
|
| 1234 |
-
|
| 1235 |
-
gr.Markdown(
|
| 1236 |
-
"---\n"
|
| 1237 |
-
"*K R&D Lab Cancer Research Suite · "
|
| 1238 |
-
"All real-data tabs use live APIs with 24h caching · "
|
| 1239 |
-
"Simulated tabs are clearly labeled ⚠️ SIMULATED · "
|
| 1240 |
-
"Source attribution shown on every result*"
|
| 1241 |
-
)
|
| 1242 |
-
|
| 1243 |
-
return demo
|
| 1244 |
-
|
| 1245 |
|
| 1246 |
-
|
| 1247 |
-
app = build_app()
|
| 1248 |
-
app.launch(server_name="0.0.0.0", server_port=7860)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 1 |
import gradio as gr
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 2 |
import pandas as pd
|
| 3 |
+
import numpy as np
|
| 4 |
+
import json, re, csv
|
| 5 |
import matplotlib
|
| 6 |
matplotlib.use("Agg")
|
| 7 |
import matplotlib.pyplot as plt
|
| 8 |
+
from io import BytesIO
|
|
|
|
|
|
|
| 9 |
from PIL import Image
|
| 10 |
+
from datetime import datetime
|
| 11 |
+
from pathlib import Path
|
| 12 |
|
| 13 |
+
BG = "#0f172a"
|
| 14 |
+
CARD = "#1e293b"
|
| 15 |
+
ACC = "#f97316"
|
| 16 |
+
ACC2 = "#38bdf8"
|
| 17 |
+
TXT = "#f1f5f9"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 18 |
|
| 19 |
+
LOG_PATH = Path("/tmp/lab_journal.csv")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 20 |
|
| 21 |
+
def log_entry(tab, inputs, result, note=""):
|
| 22 |
try:
|
| 23 |
+
write_header = not LOG_PATH.exists()
|
| 24 |
+
with open(LOG_PATH, "a", newline="", encoding="utf-8") as f:
|
| 25 |
+
w = csv.DictWriter(f, fieldnames=["timestamp","tab","inputs","result","note"])
|
| 26 |
+
if write_header:
|
| 27 |
+
w.writeheader()
|
| 28 |
+
w.writerow({
|
| 29 |
+
"timestamp": datetime.now().strftime("%Y-%m-%d %H:%M"),
|
| 30 |
+
"tab": tab,
|
| 31 |
+
"inputs": str(inputs),
|
| 32 |
+
"result": str(result)[:200],
|
| 33 |
+
"note": note
|
| 34 |
+
})
|
| 35 |
except Exception:
|
| 36 |
pass
|
| 37 |
|
| 38 |
+
def load_journal():
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 39 |
try:
|
| 40 |
+
if not LOG_PATH.exists():
|
| 41 |
+
return pd.DataFrame(columns=["timestamp","tab","inputs","result","note"])
|
| 42 |
+
return pd.read_csv(LOG_PATH)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 43 |
except Exception:
|
| 44 |
+
return pd.DataFrame(columns=["timestamp","tab","inputs","result","note"])
|
| 45 |
|
| 46 |
+
def save_note(note, tab, last_result):
|
| 47 |
+
log_entry(tab, "", last_result, note)
|
| 48 |
+
return "✅ Saved!", load_journal()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 49 |
|
| 50 |
+
MIRNA_DB = {
|
| 51 |
+
"BRCA2": [
|
| 52 |
+
{"miRNA":"hsa-miR-148a-3p","log2FC":-0.70,"padj":0.013,"targets":"DNMT1, AKT2","pathway":"Epigenetic reprogramming"},
|
| 53 |
+
{"miRNA":"hsa-miR-30e-5p","log2FC":-0.49,"padj":0.032,"targets":"MYC, KRAS","pathway":"Oncogene suppression"},
|
| 54 |
+
{"miRNA":"hsa-miR-551b-3p","log2FC":-0.59,"padj":0.048,"targets":"SMAD4, CDK6","pathway":"TGF-beta / CDK4/6"},
|
| 55 |
+
{"miRNA":"hsa-miR-22-3p","log2FC":-0.43,"padj":0.041,"targets":"HIF1A, PTEN","pathway":"Hypoxia / PI3K"},
|
| 56 |
+
{"miRNA":"hsa-miR-200c-3p","log2FC":-0.38,"padj":0.044,"targets":"ZEB1, ZEB2","pathway":"EMT suppression"},
|
| 57 |
+
],
|
| 58 |
+
"BRCA1": [
|
| 59 |
+
{"miRNA":"hsa-miR-155-5p","log2FC":-0.81,"padj":0.008,"targets":"SHIP1, SOCS1","pathway":"Immune evasion"},
|
| 60 |
+
{"miRNA":"hsa-miR-146a-5p","log2FC":-0.65,"padj":0.019,"targets":"TRAF6, IRAK1","pathway":"NF-kB signalling"},
|
| 61 |
+
{"miRNA":"hsa-miR-21-5p","log2FC":-0.55,"padj":0.027,"targets":"PTEN, PDCD4","pathway":"Apoptosis"},
|
| 62 |
+
{"miRNA":"hsa-miR-17-5p","log2FC":-0.47,"padj":0.036,"targets":"RB1, E2F1","pathway":"Cell cycle"},
|
| 63 |
+
{"miRNA":"hsa-miR-34a-5p","log2FC":-0.41,"padj":0.049,"targets":"BCL2, CDK6","pathway":"p53 axis"},
|
| 64 |
+
],
|
| 65 |
+
"TP53": [
|
| 66 |
+
{"miRNA":"hsa-miR-34a-5p","log2FC":-1.10,"padj":0.001,"targets":"BCL2, CDK6","pathway":"p53-miR-34 axis"},
|
| 67 |
+
{"miRNA":"hsa-miR-192-5p","log2FC":-0.90,"padj":0.005,"targets":"MDM2, DHFR","pathway":"p53 feedback"},
|
| 68 |
+
{"miRNA":"hsa-miR-145-5p","log2FC":-0.75,"padj":0.012,"targets":"MYC, EGFR","pathway":"Growth suppression"},
|
| 69 |
+
{"miRNA":"hsa-miR-107","log2FC":-0.62,"padj":0.023,"targets":"CDK6, HIF1B","pathway":"Hypoxia / cell cycle"},
|
| 70 |
+
{"miRNA":"hsa-miR-215-5p","log2FC":-0.51,"padj":0.038,"targets":"DTL, DHFR","pathway":"DNA damage response"},
|
| 71 |
+
],
|
| 72 |
+
}
|
| 73 |
|
| 74 |
+
SIRNA_DB = {
|
| 75 |
+
"LUAD": [
|
| 76 |
+
{"Gene":"SPC24","dCERES":-0.175,"log2FC":1.13,"Drug_status":"Novel","siRNA":"GCAGCUGAAGAAACUGAAU"},
|
| 77 |
+
{"Gene":"BUB1B","dCERES":-0.119,"log2FC":1.12,"Drug_status":"Novel","siRNA":"CCAAAGAGCUGAAGAACAU"},
|
| 78 |
+
{"Gene":"CDC45","dCERES":-0.144,"log2FC":1.26,"Drug_status":"Novel","siRNA":"GCAUCAAGAUGAAGGAGAU"},
|
| 79 |
+
{"Gene":"PLK1","dCERES":-0.239,"log2FC":1.03,"Drug_status":"Clinical","siRNA":"GACGCUCAAGAUGCAGAUU"},
|
| 80 |
+
{"Gene":"CDK1","dCERES":-0.201,"log2FC":1.00,"Drug_status":"Clinical","siRNA":"GCAGAAGCACUGAAGAUUU"},
|
| 81 |
+
],
|
| 82 |
+
"BRCA": [
|
| 83 |
+
{"Gene":"AURKA","dCERES":-0.165,"log2FC":1.20,"Drug_status":"Clinical","siRNA":"GCACUGAAGAUGCAGAAUU"},
|
| 84 |
+
{"Gene":"AURKB","dCERES":-0.140,"log2FC":1.15,"Drug_status":"Clinical","siRNA":"CCUGAAGACGCUCAAGGUU"},
|
| 85 |
+
{"Gene":"CENPW","dCERES":-0.125,"log2FC":0.95,"Drug_status":"Novel","siRNA":"GCAGAAGCACUGAAGAUUU"},
|
| 86 |
+
{"Gene":"RFC2","dCERES":-0.136,"log2FC":0.50,"Drug_status":"Novel","siRNA":"GCAAGAUGCAGAAGCACUU"},
|
| 87 |
+
{"Gene":"TYMS","dCERES":-0.131,"log2FC":0.72,"Drug_status":"Approved","siRNA":"GGACGCUCAAGAUGCAGAU"},
|
| 88 |
+
],
|
| 89 |
+
"COAD": [
|
| 90 |
+
{"Gene":"KRAS","dCERES":-0.210,"log2FC":0.80,"Drug_status":"Clinical","siRNA":"GCUGGAGCUGGUGGUAGUU"},
|
| 91 |
+
{"Gene":"WEE1","dCERES":-0.180,"log2FC":1.05,"Drug_status":"Clinical","siRNA":"GCAGCUGAAGAAACUGAAU"},
|
| 92 |
+
{"Gene":"CHEK1","dCERES":-0.155,"log2FC":0.90,"Drug_status":"Clinical","siRNA":"CCAAAGAGCUGAAGAACAU"},
|
| 93 |
+
{"Gene":"RFC2","dCERES":-0.130,"log2FC":0.55,"Drug_status":"Novel","siRNA":"GCAUCAAGAUGAAGGAGAU"},
|
| 94 |
+
{"Gene":"PKMYT1","dCERES":-0.122,"log2FC":1.07,"Drug_status":"Clinical","siRNA":"GACGCUCAAGAUGCAGAUU"},
|
| 95 |
+
],
|
| 96 |
}
|
| 97 |
|
| 98 |
+
CERNA = [
|
| 99 |
+
{"lncRNA":"CYTOR","miRNA":"hsa-miR-138-5p","target":"AKT1","pathway":"TREM2 core signaling"},
|
| 100 |
+
{"lncRNA":"CYTOR","miRNA":"hsa-miR-138-5p","target":"NFKB1","pathway":"Neuroinflammation"},
|
| 101 |
+
{"lncRNA":"GAS5","miRNA":"hsa-miR-21-5p","target":"PTEN","pathway":"Neuroinflammation"},
|
| 102 |
+
{"lncRNA":"GAS5","miRNA":"hsa-miR-222-3p","target":"IL1B","pathway":"Neuroinflammation"},
|
| 103 |
+
{"lncRNA":"HOTAIRM1","miRNA":"hsa-miR-9-5p","target":"TREM2","pathway":"Direct TREM2 regulation"},
|
| 104 |
+
]
|
| 105 |
+
ASO = [
|
| 106 |
+
{"lncRNA":"GAS5","position":119,"accessibility":0.653,"GC_pct":50,"Tm":47.2,"priority":"HIGH"},
|
| 107 |
+
{"lncRNA":"CYTOR","position":507,"accessibility":0.653,"GC_pct":50,"Tm":46.8,"priority":"HIGH"},
|
| 108 |
+
{"lncRNA":"HOTAIRM1","position":234,"accessibility":0.621,"GC_pct":44,"Tm":44.1,"priority":"MEDIUM"},
|
| 109 |
+
{"lncRNA":"LINC00847","position":89,"accessibility":0.598,"GC_pct":56,"Tm":48.3,"priority":"MEDIUM"},
|
| 110 |
+
{"lncRNA":"ZFAS1","position":312,"accessibility":0.571,"GC_pct":48,"Tm":45.5,"priority":"MEDIUM"},
|
| 111 |
]
|
| 112 |
|
| 113 |
+
FGFR3 = {
|
| 114 |
+
"P1 (hairpin loop)": [
|
| 115 |
+
{"Compound":"CHEMBL1575701","RNA_score":0.809,"Toxicity":0.01,"Final_score":0.793},
|
| 116 |
+
{"Compound":"CHEMBL15727","RNA_score":0.805,"Toxicity":0.00,"Final_score":0.789},
|
| 117 |
+
{"Compound":"Thioguanine","RNA_score":0.888,"Toxicity":32.5,"Final_score":0.742},
|
| 118 |
+
{"Compound":"Deazaguanine","RNA_score":0.888,"Toxicity":35.0,"Final_score":0.735},
|
| 119 |
+
{"Compound":"CHEMBL441","RNA_score":0.775,"Toxicity":5.2,"Final_score":0.721},
|
| 120 |
+
],
|
| 121 |
+
"P10 (G-quadruplex)": [
|
| 122 |
+
{"Compound":"CHEMBL15727","RNA_score":0.805,"Toxicity":0.00,"Final_score":0.789},
|
| 123 |
+
{"Compound":"CHEMBL5411515","RNA_score":0.945,"Toxicity":37.1,"Final_score":0.761},
|
| 124 |
+
{"Compound":"CHEMBL90","RNA_score":0.760,"Toxicity":2.1,"Final_score":0.745},
|
| 125 |
+
{"Compound":"CHEMBL102","RNA_score":0.748,"Toxicity":8.4,"Final_score":0.712},
|
| 126 |
+
{"Compound":"Berberine","RNA_score":0.735,"Toxicity":3.2,"Final_score":0.708},
|
| 127 |
+
],
|
| 128 |
+
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 129 |
|
| 130 |
+
VARIANT_DB = {
|
| 131 |
+
"BRCA1:p.R1699Q": {"score":0.03,"cls":"Benign","conf":"High"},
|
| 132 |
+
"BRCA1:p.R1699W": {"score":0.97,"cls":"Pathogenic","conf":"High"},
|
| 133 |
+
"BRCA2:p.D2723A": {"score":0.999,"cls":"Pathogenic","conf":"High"},
|
| 134 |
+
"TP53:p.R248W": {"score":0.998,"cls":"Pathogenic","conf":"High"},
|
| 135 |
+
"TP53:p.R248Q": {"score":0.995,"cls":"Pathogenic","conf":"High"},
|
| 136 |
+
"EGFR:p.L858R": {"score":0.96,"cls":"Pathogenic","conf":"High"},
|
| 137 |
+
"ALK:p.F1174L": {"score":0.94,"cls":"Pathogenic","conf":"High"},
|
| 138 |
+
}
|
| 139 |
+
PLAIN = {
|
| 140 |
+
"Pathogenic": "This variant is likely to cause disease. Clinical follow-up is strongly recommended.",
|
| 141 |
+
"Likely Pathogenic":"This variant is probably harmful. Discuss with your doctor.",
|
| 142 |
+
"Benign": "This variant is likely harmless. Common in the general population.",
|
| 143 |
+
"Likely Benign": "This variant is probably harmless. No strong reason for concern.",
|
| 144 |
+
}
|
| 145 |
+
BM_W = {
|
| 146 |
+
"CTHRC1":0.18,"FHL2":0.15,"LDHA":0.14,"P4HA1":0.13,
|
| 147 |
+
"SERPINH1":0.12,"ABCA8":-0.11,"CA4":-0.10,"CKB":-0.09,
|
| 148 |
+
"NNMT":0.08,"CACNA2D2":-0.07
|
| 149 |
+
}
|
| 150 |
+
PROTEINS = ["albumin","apolipoprotein","fibrinogen","vitronectin",
|
| 151 |
+
"clusterin","igm","iga","igg","complement","transferrin",
|
| 152 |
+
"alpha-2-macroglobulin"]
|
| 153 |
|
| 154 |
+
def predict_mirna(gene):
|
| 155 |
+
df = pd.DataFrame(MIRNA_DB.get(gene, []))
|
| 156 |
+
log_entry("BRCA2 miRNA", gene, f"Found {len(df)} miRNAs for {gene}")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 157 |
return df
|
| 158 |
|
| 159 |
+
def predict_sirna(cancer):
|
| 160 |
+
df = pd.DataFrame(SIRNA_DB.get(cancer, []))
|
| 161 |
+
log_entry("TP53 siRNA", cancer, f"Found {len(df)} targets for {cancer}")
|
| 162 |
+
return df
|
| 163 |
|
| 164 |
+
def get_lncrna():
|
| 165 |
+
log_entry("lncRNA-TREM2", "load", "ceRNA+ASO tables")
|
| 166 |
+
return pd.DataFrame(CERNA), pd.DataFrame(ASO)
|
| 167 |
+
|
| 168 |
+
def predict_drug(pocket):
|
| 169 |
+
df = pd.DataFrame(FGFR3.get(pocket, []))
|
| 170 |
+
fig, ax = plt.subplots(figsize=(6, 4), facecolor=CARD)
|
| 171 |
+
ax.set_facecolor(CARD)
|
| 172 |
+
ax.barh(df["Compound"], df["Final_score"], color=ACC)
|
| 173 |
+
ax.set_xlabel("Final Score", color=TXT)
|
| 174 |
+
ax.tick_params(colors=TXT)
|
| 175 |
+
for sp in ax.spines.values():
|
| 176 |
+
sp.set_edgecolor("#334155")
|
| 177 |
+
ax.set_title(f"Top compounds — {pocket}", color=TXT, fontsize=10)
|
| 178 |
+
plt.tight_layout()
|
| 179 |
+
buf = BytesIO()
|
| 180 |
+
plt.savefig(buf, format="png", dpi=120, facecolor=CARD)
|
| 181 |
+
plt.close()
|
| 182 |
+
buf.seek(0)
|
| 183 |
+
log_entry("FGFR3 Drug", pocket, f"Top: {df.iloc[0]['Compound'] if len(df) else 'none'}")
|
| 184 |
+
return df, Image.open(buf)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 185 |
|
| 186 |
+
def predict_variant(hgvs, sift, polyphen, gnomad):
|
|
|
|
| 187 |
hgvs = hgvs.strip()
|
| 188 |
+
if hgvs in VARIANT_DB:
|
| 189 |
+
r = VARIANT_DB[hgvs]
|
| 190 |
+
cls, conf, score = r["cls"], r["conf"], r["score"]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 191 |
else:
|
| 192 |
+
score = 0.0
|
| 193 |
+
if sift < 0.05: score += 0.4
|
| 194 |
+
if polyphen > 0.85: score += 0.35
|
| 195 |
+
if gnomad < 0.0001: score += 0.25
|
| 196 |
+
score = round(score, 3)
|
| 197 |
+
cls = ("Pathogenic" if score > 0.6 else
|
| 198 |
+
"Likely Pathogenic" if score > 0.4 else "Benign")
|
| 199 |
+
conf = "High" if (sift < 0.01 or sift > 0.9) else "Moderate"
|
| 200 |
+
colour = "#ef4444" if "Pathogenic" in cls else "#22c55e"
|
| 201 |
+
icon = "⚠️ WARNING" if "Pathogenic" in cls else "✅ OK"
|
| 202 |
+
bar_w = int(score * 100)
|
| 203 |
+
explanation = PLAIN.get(cls, "")
|
| 204 |
+
log_entry("OpenVariant", hgvs or f"SIFT={sift}", f"{cls} score={score}")
|
| 205 |
+
return (
|
| 206 |
+
f"<div style='background:{CARD};padding:16px;border-radius:8px;"
|
| 207 |
+
f"font-family:sans-serif;color:{TXT}'>"
|
| 208 |
+
f"<h3 style='color:{colour}'>{icon} {cls}</h3>"
|
| 209 |
+
f"<p>Score: <b>{score:.3f}</b> | Confidence: <b>{conf}</b></p>"
|
| 210 |
+
f"<div style='background:#334155;border-radius:4px;height:16px'>"
|
| 211 |
+
f"<div style='background:{colour};height:16px;border-radius:4px;width:{bar_w}%'></div></div>"
|
| 212 |
+
f"<p style='margin-top:12px'>{explanation}</p>"
|
| 213 |
+
f"<p style='font-size:11px;color:#64748b'>Research only. Not clinical.</p>"
|
| 214 |
+
f"</div>"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 215 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 216 |
|
| 217 |
+
def predict_corona(size, zeta, peg, lipid):
|
| 218 |
+
score = 0
|
| 219 |
+
if lipid == "Ionizable": score += 2
|
| 220 |
+
elif lipid == "Cationic": score += 1
|
| 221 |
+
if abs(zeta) < 10: score += 1
|
| 222 |
+
if peg > 1.5: score += 2
|
| 223 |
+
if size < 100: score += 1
|
| 224 |
+
proteins = ["ApoE","Albumin","Fibrinogen","Vitronectin","ApoA-I"]
|
| 225 |
+
dominant = proteins[min(score, 4)]
|
| 226 |
+
efficacy = ("High" if score >= 4 else "Medium" if score >= 2 else "Low")
|
| 227 |
+
log_entry("LNP Corona", f"size={size},zeta={zeta},peg={peg},lipid={lipid}",
|
| 228 |
+
f"dominant={dominant},efficacy={efficacy}")
|
| 229 |
+
return (f"**Dominant corona protein:** {dominant}\n\n"
|
| 230 |
+
f"**Predicted efficacy class:** {efficacy}\n\n"
|
| 231 |
+
f"**Composite score:** {score}/6")
|
| 232 |
+
|
| 233 |
+
def predict_cancer(c1,c2,c3,c4,c5,c6,c7,c8,c9,c10):
|
| 234 |
+
vals = [c1,c2,c3,c4,c5,c6,c7,c8,c9,c10]
|
| 235 |
+
names = list(BM_W.keys())
|
| 236 |
+
weights = list(BM_W.values())
|
| 237 |
+
raw = sum(v*w for v,w in zip(vals, weights))
|
| 238 |
+
prob = 1 / (1 + np.exp(-raw * 2))
|
| 239 |
+
label = "CANCER" if prob > 0.5 else "HEALTHY"
|
| 240 |
+
colour = "#ef4444" if prob > 0.5 else "#22c55e"
|
| 241 |
+
contribs = [v*w for v,w in zip(vals, weights)]
|
| 242 |
+
cols = [ACC if c > 0 else ACC2 for c in contribs]
|
| 243 |
+
fig, ax = plt.subplots(figsize=(6, 3.5), facecolor=CARD)
|
| 244 |
+
ax.set_facecolor(CARD)
|
| 245 |
+
ax.barh(names, contribs, color=cols)
|
| 246 |
+
ax.axvline(0, color=TXT, linewidth=0.8)
|
| 247 |
+
ax.set_xlabel("Contribution to cancer score", color=TXT)
|
| 248 |
+
ax.tick_params(colors=TXT, labelsize=8)
|
| 249 |
+
for sp in ax.spines.values():
|
| 250 |
+
sp.set_edgecolor("#334155")
|
| 251 |
+
ax.set_title("Protein contributions", color=TXT, fontsize=10)
|
| 252 |
+
plt.tight_layout()
|
| 253 |
+
buf = BytesIO()
|
| 254 |
+
plt.savefig(buf, format="png", dpi=120, facecolor=CARD)
|
| 255 |
+
plt.close()
|
| 256 |
buf.seek(0)
|
| 257 |
+
log_entry("Liquid Biopsy", f"CTHRC1={c1},FHL2={c2}...", f"{label} prob={prob:.2f}")
|
| 258 |
+
return (
|
| 259 |
+
f"<div style='background:{CARD};padding:12px;border-radius:8px;"
|
| 260 |
+
f"color:{colour};font-size:20px;font-family:sans-serif'>"
|
| 261 |
+
f"<b>{label}</b><br>"
|
| 262 |
+
f"<span style='color:{TXT};font-size:14px'>Probability: {prob:.2f}</span></div>"
|
| 263 |
+
), Image.open(buf)
|
| 264 |
+
|
| 265 |
+
def predict_flow(size, zeta, peg, charge, flow_rate):
|
| 266 |
+
csi = ((flow_rate/40)*0.6 + (peg/5)*0.2 +
|
| 267 |
+
(1 if charge == "Cationic" else 0)*0.2)
|
| 268 |
+
csi = round(min(csi, 1.0), 3)
|
| 269 |
+
stability = ("High remodeling" if csi > 0.6 else
|
| 270 |
+
"Medium" if csi > 0.3 else "Stable")
|
| 271 |
+
t = np.linspace(0, 60, 200)
|
| 272 |
+
kf = 0.03 * (1 + flow_rate/40)
|
| 273 |
+
ks = 0.038 * (1 + flow_rate/40)
|
| 274 |
+
fig, ax = plt.subplots(figsize=(6, 3.5), facecolor=CARD)
|
| 275 |
+
ax.set_facecolor(CARD)
|
| 276 |
+
ax.plot(t, 60*np.exp(-0.03*t)+20, color="#60a5fa", ls="--", label="Albumin (static)")
|
| 277 |
+
ax.plot(t, 60*np.exp(-kf*t)+10, color="#60a5fa", label="Albumin (flow)")
|
| 278 |
+
ax.plot(t, 14*(1-np.exp(-0.038*t))+5, color=ACC, ls="--", label="ApoE (static)")
|
| 279 |
+
ax.plot(t, 20*(1-np.exp(-ks*t))+5, color=ACC, label="ApoE (flow)")
|
| 280 |
+
ax.set_xlabel("Time (min)", color=TXT)
|
| 281 |
+
ax.set_ylabel("% Corona", color=TXT)
|
| 282 |
+
ax.tick_params(colors=TXT)
|
| 283 |
+
ax.legend(fontsize=7, labelcolor=TXT, facecolor=CARD)
|
| 284 |
+
for sp in ax.spines.values():
|
| 285 |
+
sp.set_edgecolor("#334155")
|
| 286 |
+
ax.set_title("Vroman Effect", color=TXT, fontsize=9)
|
| 287 |
+
plt.tight_layout()
|
| 288 |
+
buf = BytesIO()
|
| 289 |
+
plt.savefig(buf, format="png", dpi=120, facecolor=CARD)
|
| 290 |
+
plt.close()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 291 |
buf.seek(0)
|
| 292 |
+
log_entry("Flow Corona", f"flow={flow_rate},charge={charge}", f"CSI={csi},{stability}")
|
| 293 |
+
return f"**Corona Shift Index: {csi}** — {stability}", Image.open(buf)
|
| 294 |
+
|
| 295 |
+
def predict_bbb(smiles, pka, zeta):
|
| 296 |
+
logp = smiles.count("C")*0.3 - smiles.count("O")*0.5 + 1.5
|
| 297 |
+
apoe_pct = max(0, min(40, (7.0-pka)*8 + abs(zeta)*0.5 + logp*0.8))
|
| 298 |
+
bbb_prob = min(0.95, apoe_pct/30)
|
| 299 |
+
tier = ("HIGH (>20%)" if apoe_pct > 20 else
|
| 300 |
+
"MEDIUM (10-20%)" if apoe_pct > 10 else "LOW (<10%)")
|
| 301 |
+
cats = ["ApoE%","BBB","logP","pKa fit","Zeta"]
|
| 302 |
+
vals = [apoe_pct/40, bbb_prob, min(logp/5,1),
|
| 303 |
+
(7-abs(pka-6.5))/7, (10-abs(zeta))/10]
|
| 304 |
+
angles = np.linspace(0, 2*np.pi, len(cats), endpoint=False).tolist()
|
| 305 |
+
v2, a2 = vals+[vals[0]], angles+[angles[0]]
|
| 306 |
+
fig, ax = plt.subplots(figsize=(5, 4), subplot_kw={"polar":True}, facecolor=CARD)
|
| 307 |
+
ax.set_facecolor(CARD)
|
| 308 |
+
ax.plot(a2, v2, color=ACC, linewidth=2)
|
| 309 |
+
ax.fill(a2, v2, color=ACC, alpha=0.2)
|
| 310 |
+
ax.set_xticks(angles)
|
| 311 |
+
ax.set_xticklabels(cats, color=TXT, fontsize=8)
|
| 312 |
+
ax.tick_params(colors=TXT)
|
| 313 |
+
plt.tight_layout()
|
| 314 |
+
buf = BytesIO()
|
| 315 |
+
plt.savefig(buf, format="png", dpi=120, facecolor=CARD)
|
| 316 |
+
plt.close()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 317 |
buf.seek(0)
|
| 318 |
+
log_entry("LNP Brain", f"pka={pka},zeta={zeta}", f"ApoE={apoe_pct:.1f}%,BBB={bbb_prob:.2f}")
|
| 319 |
+
return (f"**Predicted ApoE:** {apoe_pct:.1f}% — {tier}\n\n"
|
| 320 |
+
f"**BBB Probability:** {bbb_prob:.2f}"), Image.open(buf)
|
| 321 |
+
|
| 322 |
+
def extract_corona(text):
|
| 323 |
+
out = {
|
| 324 |
+
"nanoparticle_composition": "",
|
| 325 |
+
"size_nm": None, "zeta_mv": None, "PDI": None,
|
| 326 |
+
"protein_source": "", "corona_proteins": [], "confidence": {}
|
| 327 |
+
}
|
| 328 |
+
m = re.search(r"(\d+\.?\d*)\s*(?:nm|nanometer)", text, re.I)
|
| 329 |
+
if m:
|
| 330 |
+
out["size_nm"] = float(m.group(1))
|
| 331 |
+
out["confidence"]["size_nm"] = "HIGH"
|
| 332 |
+
m = re.search(r"([+-]?\d+\.?\d*)\s*mV", text, re.I)
|
| 333 |
+
if m:
|
| 334 |
+
out["zeta_mv"] = float(m.group(1))
|
| 335 |
+
out["confidence"]["zeta_mv"] = "HIGH"
|
| 336 |
+
m = re.search(r"PDI\s*[=:of]*\s*(\d+\.?\d*)", text, re.I)
|
| 337 |
+
if m:
|
| 338 |
+
out["PDI"] = float(m.group(1))
|
| 339 |
+
out["confidence"]["PDI"] = "HIGH"
|
| 340 |
+
for src in ["human plasma","human serum","fetal bovine serum","FBS","PBS"]:
|
| 341 |
+
if src.lower() in text.lower():
|
| 342 |
+
out["protein_source"] = src
|
| 343 |
+
out["confidence"]["protein_source"] = "HIGH"
|
| 344 |
+
break
|
| 345 |
+
out["corona_proteins"] = [
|
| 346 |
+
{"name": p, "confidence": "MEDIUM"} for p in PROTEINS if p in text.lower()
|
| 347 |
+
]
|
| 348 |
+
for lip in ["DSPC","DOPE","MC3","DLin","cholesterol","PEG","DOTAP"]:
|
| 349 |
+
if lip in text:
|
| 350 |
+
out["nanoparticle_composition"] += lip + " "
|
| 351 |
+
out["nanoparticle_composition"] = out["nanoparticle_composition"].strip()
|
| 352 |
+
flags = []
|
| 353 |
+
if not out["size_nm"]: flags.append("size_nm not found")
|
| 354 |
+
if not out["zeta_mv"]: flags.append("zeta_mv not found")
|
| 355 |
+
if not out["corona_proteins"]: flags.append("no proteins detected")
|
| 356 |
+
summary = "All key fields extracted" if not flags else " | ".join(flags)
|
| 357 |
+
log_entry("AutoCorona NLP", text[:80]+"...",
|
| 358 |
+
f"proteins={len(out['corona_proteins'])},{summary}")
|
| 359 |
+
return json.dumps(out, indent=2), summary
|
| 360 |
+
|
| 361 |
+
css = (
|
| 362 |
+
f"body,.gradio-container{{background:{BG}!important;color:{TXT}!important}}"
|
| 363 |
+
f".tab-nav button{{color:{TXT}!important;background:{CARD}!important}}"
|
| 364 |
+
f".tab-nav button.selected{{border-bottom:2px solid {ACC}!important;color:{ACC}!important}}"
|
| 365 |
+
f"h1,h2,h3{{color:{ACC}!important}}"
|
| 366 |
+
f".gr-button-primary{{background:{ACC}!important;border:none!important}}"
|
| 367 |
+
f"footer{{display:none!important}}"
|
| 368 |
+
)
|
| 369 |
|
| 370 |
+
LEARNING_CASES = """
|
| 371 |
+
## 🧪 Top 5 Guided Investigations
|
| 372 |
+
### Case 1 — Beginner 🟢
|
| 373 |
+
**Question:** Why is the same gene position benign vs pathogenic?
|
| 374 |
+
1. OpenVariant → enter `BRCA1:p.R1699Q` → Benign
|
| 375 |
+
2. Enter `BRCA1:p.R1699W` → Pathogenic
|
| 376 |
+
3. Same position, different amino acid — what changed?
|
| 377 |
+
**Key concept:** Amino acid polarity determines protein folding impact.
|
| 378 |
+
---
|
| 379 |
+
### Case 2 — Beginner 🟢
|
| 380 |
+
**Question:** How does PEG% change what protein sticks to LNPs?
|
| 381 |
+
1. LNP Corona → Ionizable, Zeta=-5, Size=100, PEG=0.5% → note protein
|
| 382 |
+
2. PEG=2.5% → compare
|
| 383 |
+
3. LNP Brain → pKa=6.5 → compare ApoE%
|
| 384 |
+
**Key concept:** More PEG → less Fibrinogen, more ApoE.
|
| 385 |
+
---
|
| 386 |
+
### Case 3 — Intermediate 🟡
|
| 387 |
+
**Question:** Does blood flow change corona composition?
|
| 388 |
+
1. Flow Corona → Flow=0, Ionizable
|
| 389 |
+
2. Flow=40 (arterial) → compare ApoE curve
|
| 390 |
+
3. At what minute does ApoE plateau?
|
| 391 |
+
**Key concept:** Vroman effect — albumin displaced by ApoE under flow.
|
| 392 |
+
---
|
| 393 |
+
### Case 4 — Intermediate 🟡
|
| 394 |
+
**Question:** Which cancer has the most novel siRNA targets?
|
| 395 |
+
1. TP53 siRNA → LUAD → count "Novel"
|
| 396 |
+
2. Repeat BRCA, COAD
|
| 397 |
+
3. Pick one Novel gene → Google: "[gene] cancer therapeutic target"
|
| 398 |
+
---
|
| 399 |
+
### Case 5 — Advanced 🔴
|
| 400 |
+
**Question:** Can you identify cancer from protein levels?
|
| 401 |
+
1. Liquid Biopsy → all sliders=0 → HEALTHY
|
| 402 |
+
2. Set CTHRC1=2.5, FHL2=2.0, LDHA=1.8 → observe
|
| 403 |
+
3. Find minimum CTHRC1 that tips to CANCER
|
| 404 |
+
**Key concept:** CTHRC1 weight (0.18) dominates the score.
|
| 405 |
+
"""
|
| 406 |
|
| 407 |
+
with gr.Blocks(css=css, title="K R&D Lab") as demo:
|
|
|
|
|
|
|
|
|
|
|
|
|
| 408 |
|
| 409 |
+
gr.Markdown(
|
| 410 |
+
"# 🧬 K R&D Lab — Computational Biology Suite\n"
|
| 411 |
+
"**Oksana Kolisnyk** · ML Engineer · "
|
| 412 |
+
"[KOSATIKS GROUP](https://kosatiks-group.pp.ua)\n"
|
| 413 |
+
"> 10 open-source tools + lab journal."
|
| 414 |
)
|
|
|
|
|
|
|
|
|
|
| 415 |
|
| 416 |
+
with gr.Tabs():
|
| 417 |
+
|
| 418 |
+
with gr.TabItem("🧬 BRCA2 miRNA"):
|
| 419 |
+
gr.Markdown("### Tumor Suppressor miRNAs")
|
| 420 |
+
g1 = gr.Dropdown(["BRCA2","BRCA1","TP53"], value="BRCA2", label="Gene")
|
| 421 |
+
b1 = gr.Button("Find miRNAs", variant="primary")
|
| 422 |
+
o1 = gr.Dataframe(label="Top 5 downregulated miRNAs")
|
| 423 |
+
gr.Examples([["BRCA2"],["TP53"]], inputs=[g1])
|
| 424 |
+
b1.click(predict_mirna, g1, o1)
|
| 425 |
+
|
| 426 |
+
with gr.TabItem("💉 TP53 siRNA"):
|
| 427 |
+
gr.Markdown("### Synthetic Lethal siRNA Targets")
|
| 428 |
+
g2 = gr.Dropdown(["LUAD","BRCA","COAD"], value="LUAD", label="Cancer type")
|
| 429 |
+
b2 = gr.Button("Find Targets", variant="primary")
|
| 430 |
+
o2 = gr.Dataframe(label="Top 5 siRNA targets")
|
| 431 |
+
gr.Examples([["LUAD"],["BRCA"]], inputs=[g2])
|
| 432 |
+
b2.click(predict_sirna, g2, o2)
|
| 433 |
+
|
| 434 |
+
with gr.TabItem("🧠 lncRNA-TREM2"):
|
| 435 |
+
gr.Markdown("### lncRNA Networks in Alzheimer's")
|
| 436 |
+
b3 = gr.Button("Load Results", variant="primary")
|
| 437 |
+
o3a = gr.Dataframe(label="ceRNA Network")
|
| 438 |
+
o3b = gr.Dataframe(label="ASO Candidates")
|
| 439 |
+
b3.click(get_lncrna, [], [o3a, o3b])
|
| 440 |
+
|
| 441 |
+
with gr.TabItem("💊 FGFR3 Drug"):
|
| 442 |
+
gr.Markdown("### RNA-Directed Drug Discovery: FGFR3")
|
| 443 |
+
g4 = gr.Radio(["P1 (hairpin loop)","P10 (G-quadruplex)"],
|
| 444 |
+
value="P1 (hairpin loop)", label="Target pocket")
|
| 445 |
+
b4 = gr.Button("Screen Compounds", variant="primary")
|
| 446 |
+
o4t = gr.Dataframe(label="Top 5 candidates")
|
| 447 |
+
o4p = gr.Image(label="Binding scores")
|
| 448 |
+
gr.Examples([["P1 (hairpin loop)"],["P10 (G-quadruplex)"]], inputs=[g4])
|
| 449 |
+
b4.click(predict_drug, g4, [o4t, o4p])
|
| 450 |
+
|
| 451 |
+
with gr.TabItem("🔬 OpenVariant"):
|
| 452 |
+
gr.Markdown("### OpenVariant — Pathogenicity Classifier\nAUC=0.939 on ClinVar 2026.")
|
| 453 |
+
hgvs = gr.Textbox(label="HGVS notation", placeholder="BRCA1:p.R1699Q")
|
| 454 |
+
gr.Markdown("**Or enter scores manually:**")
|
| 455 |
+
with gr.Row():
|
| 456 |
+
sift = gr.Slider(0, 1, value=0.5, step=0.01, label="SIFT (0=damaging)")
|
| 457 |
+
pp = gr.Slider(0, 1, value=0.5, step=0.01, label="PolyPhen-2")
|
| 458 |
+
gn = gr.Slider(0, 0.01, value=0.001, step=0.0001, label="gnomAD AF")
|
| 459 |
+
b5 = gr.Button("Predict Pathogenicity", variant="primary")
|
| 460 |
+
o5 = gr.HTML(label="Result")
|
| 461 |
+
gr.Examples(
|
| 462 |
+
[["BRCA1:p.R1699Q", 0.82, 0.05, 0.0012],
|
| 463 |
+
["TP53:p.R248W", 0.00, 1.00, 0.0],
|
| 464 |
+
["BRCA2:p.D2723A", 0.01, 0.98, 0.0]],
|
| 465 |
+
inputs=[hgvs, sift, pp, gn])
|
| 466 |
+
b5.click(predict_variant, [hgvs, sift, pp, gn], o5)
|
| 467 |
+
|
| 468 |
+
with gr.TabItem("🧪 LNP Corona"):
|
| 469 |
+
gr.Markdown("### LNP Protein Corona Prediction")
|
| 470 |
+
with gr.Row():
|
| 471 |
+
sz = gr.Slider(50, 300, value=100, step=1, label="Size (nm)")
|
| 472 |
+
zt = gr.Slider(-40, 10, value=-5, step=1, label="Zeta (mV)")
|
| 473 |
+
with gr.Row():
|
| 474 |
+
pg = gr.Slider(0, 5, value=1.5, step=0.1, label="PEG mol%")
|
| 475 |
+
lp = gr.Dropdown(["Ionizable","Cationic","Anionic","Neutral"],
|
| 476 |
+
value="Ionizable", label="Lipid type")
|
| 477 |
+
b6 = gr.Button("Predict", variant="primary")
|
| 478 |
+
o6 = gr.Markdown()
|
| 479 |
+
gr.Examples([[100,-5,1.5,"Ionizable"],[80,5,0.5,"Cationic"]], inputs=[sz,zt,pg,lp])
|
| 480 |
+
b6.click(predict_corona, [sz,zt,pg,lp], o6)
|
| 481 |
+
|
| 482 |
+
with gr.TabItem("🩸 Liquid Biopsy"):
|
| 483 |
+
gr.Markdown("### Protein Corona Cancer Diagnostics\nClassify cancer vs healthy.")
|
| 484 |
+
with gr.Row():
|
| 485 |
+
p1 = gr.Slider(-3, 3, value=0, step=0.1, label="CTHRC1")
|
| 486 |
+
p2 = gr.Slider(-3, 3, value=0, step=0.1, label="FHL2")
|
| 487 |
+
p3 = gr.Slider(-3, 3, value=0, step=0.1, label="LDHA")
|
| 488 |
+
p4 = gr.Slider(-3, 3, value=0, step=0.1, label="P4HA1")
|
| 489 |
+
p5 = gr.Slider(-3, 3, value=0, step=0.1, label="SERPINH1")
|
| 490 |
+
with gr.Row():
|
| 491 |
+
p6 = gr.Slider(-3, 3, value=0, step=0.1, label="ABCA8")
|
| 492 |
+
p7 = gr.Slider(-3, 3, value=0, step=0.1, label="CA4")
|
| 493 |
+
p8 = gr.Slider(-3, 3, value=0, step=0.1, label="CKB")
|
| 494 |
+
p9 = gr.Slider(-3, 3, value=0, step=0.1, label="NNMT")
|
| 495 |
+
p10 = gr.Slider(-3, 3, value=0, step=0.1, label="CACNA2D2")
|
| 496 |
+
b7 = gr.Button("Classify", variant="primary")
|
| 497 |
+
o7t = gr.HTML()
|
| 498 |
+
o7p = gr.Image(label="Feature contributions")
|
| 499 |
+
gr.Examples(
|
| 500 |
+
[[2,2,1.5,1.8,1.6,-1,-1.2,-0.8,1.4,-1.1],
|
| 501 |
+
[0,0,0,0,0,0,0,0,0,0]],
|
| 502 |
+
inputs=[p1,p2,p3,p4,p5,p6,p7,p8,p9,p10])
|
| 503 |
+
b7.click(predict_cancer, [p1,p2,p3,p4,p5,p6,p7,p8,p9,p10], [o7t,o7p])
|
| 504 |
+
|
| 505 |
+
with gr.TabItem("🌊 Flow Corona"):
|
| 506 |
+
gr.Markdown("### Corona Remodeling Under Blood Flow")
|
| 507 |
+
with gr.Row():
|
| 508 |
+
s8 = gr.Slider(50, 300, value=100, step=1, label="Size (nm)")
|
| 509 |
+
z8 = gr.Slider(-40, 10, value=-5, step=1, label="Zeta (mV)")
|
| 510 |
+
pg8 = gr.Slider(0, 5, value=1.5, step=0.1, label="PEG mol%")
|
| 511 |
+
with gr.Row():
|
| 512 |
+
ch8 = gr.Dropdown(["Ionizable","Cationic","Anionic","Neutral"],
|
| 513 |
+
value="Ionizable", label="Charge type")
|
| 514 |
+
fl8 = gr.Slider(0, 40, value=20, step=1, label="Flow rate cm/s (aorta=40)")
|
| 515 |
+
b8 = gr.Button("Model Vroman Effect", variant="primary")
|
| 516 |
+
o8t = gr.Markdown()
|
| 517 |
+
o8p = gr.Image(label="Kinetics plot")
|
| 518 |
+
gr.Examples([[100,-5,1.5,"Ionizable",40],[150,5,0.5,"Cationic",10]],
|
| 519 |
+
inputs=[s8,z8,pg8,ch8,fl8])
|
| 520 |
+
b8.click(predict_flow, [s8,z8,pg8,ch8,fl8], [o8t,o8p])
|
| 521 |
+
|
| 522 |
+
with gr.TabItem("🧠 LNP Brain"):
|
| 523 |
+
gr.Markdown("### LNP Brain Delivery Predictor")
|
| 524 |
+
smi = gr.Textbox(label="Ionizable lipid SMILES",
|
| 525 |
+
value="CC(C)CC(=O)OCC(COC(=O)CC(C)C)OC(=O)CC(C)C")
|
| 526 |
+
with gr.Row():
|
| 527 |
+
pk = gr.Slider(4, 8, value=6.5, step=0.1, label="pKa")
|
| 528 |
+
zt9 = gr.Slider(-20, 10, value=-3, step=1, label="Zeta (mV)")
|
| 529 |
+
b9 = gr.Button("Predict BBB Crossing", variant="primary")
|
| 530 |
+
o9t = gr.Markdown()
|
| 531 |
+
o9p = gr.Image(label="Radar profile")
|
| 532 |
+
gr.Examples([["CC(C)CC(=O)OCC(COC(=O)CC(C)C)OC(=O)CC(C)C", 6.5, -3]],
|
| 533 |
+
inputs=[smi, pk, zt9])
|
| 534 |
+
b9.click(predict_bbb, [smi, pk, zt9], [o9t, o9p])
|
| 535 |
+
|
| 536 |
+
with gr.TabItem("📄 AutoCorona NLP"):
|
| 537 |
+
gr.Markdown("### AutoCorona NLP Extraction\nPaste any paper abstract.")
|
| 538 |
+
txt = gr.Textbox(lines=6, label="Paper abstract", placeholder="Paste text here...")
|
| 539 |
+
b10 = gr.Button("Extract Data", variant="primary")
|
| 540 |
+
o10j = gr.Code(label="Extracted JSON", language="json")
|
| 541 |
+
o10f = gr.Textbox(label="Validation flags")
|
| 542 |
+
gr.Examples([[
|
| 543 |
+
"LNPs composed of MC3, DSPC, Cholesterol (50:10:40 mol%) with 1.5% PEG-DMG. "
|
| 544 |
+
"Hydrodynamic diameter was 98 nm, zeta potential -3.2 mV, PDI 0.12. "
|
| 545 |
+
"Incubated in human plasma. Corona: albumin, apolipoprotein E, fibrinogen."
|
| 546 |
+
]], inputs=[txt])
|
| 547 |
+
b10.click(extract_corona, txt, [o10j, o10f])
|
| 548 |
+
|
| 549 |
+
with gr.TabItem("📓 Lab Journal"):
|
| 550 |
+
gr.Markdown("### Your Research Log\nEvery query is auto-saved.")
|
| 551 |
+
with gr.Row():
|
| 552 |
+
note_text = gr.Textbox(
|
| 553 |
+
label="📝 Add observation / conclusion",
|
| 554 |
+
placeholder="What did you discover? What's your next question?",
|
| 555 |
+
lines=3)
|
| 556 |
+
note_tab = gr.Textbox(label="Which tool?", value="General")
|
| 557 |
+
note_last = gr.Textbox(label="Result to annotate", visible=False)
|
| 558 |
+
save_btn = gr.Button("💾 Save Observation", variant="primary")
|
| 559 |
+
save_msg = gr.Markdown()
|
| 560 |
+
journal_df = gr.Dataframe(
|
| 561 |
+
label="📋 Full History",
|
| 562 |
+
value=load_journal(),
|
| 563 |
+
interactive=False)
|
| 564 |
+
refresh_btn = gr.Button("🔄 Refresh")
|
| 565 |
+
refresh_btn.click(load_journal, [], journal_df)
|
| 566 |
+
save_btn.click(save_note, [note_text, note_tab, note_last], [save_msg, journal_df])
|
| 567 |
+
gr.Markdown("📥 Log saved as `lab_journal.csv` in the app folder.")
|
| 568 |
+
|
| 569 |
+
with gr.TabItem("📚 Learning Mode"):
|
| 570 |
+
gr.Markdown(LEARNING_CASES)
|
| 571 |
+
gr.Markdown("---\n### 📖 Quick Reference")
|
| 572 |
+
gr.Markdown("""
|
| 573 |
+
| Tool | Predicts | Key input |
|
| 574 |
+
|------|----------|-----------|
|
| 575 |
+
| OpenVariant | Pathogenic/Benign | Gene mutation |
|
| 576 |
+
| LNP Corona | Dominant protein | Formulation |
|
| 577 |
+
| Flow Corona | Vroman kinetics | Flow rate |
|
| 578 |
+
| LNP Brain | ApoE% + BBB prob | pKa + zeta |
|
| 579 |
+
| Liquid Biopsy | Cancer/Healthy | Protein z-scores |
|
| 580 |
+
| BRCA2 miRNA | Downregulated miRNAs | Gene name |
|
| 581 |
+
| TP53 siRNA | Synthetic lethal targets | Cancer type |
|
| 582 |
+
| lncRNA-TREM2 | ceRNA + ASOs | — |
|
| 583 |
+
| FGFR3 Drug | Small molecules | Pocket type |
|
| 584 |
+
| AutoCorona NLP | Structured data | Abstract text |
|
| 585 |
+
""")
|
| 586 |
+
gr.Markdown("""
|
| 587 |
+
### 🔗 Resources
|
| 588 |
+
- [PubMed](https://pubmed.ncbi.nlm.nih.gov)
|
| 589 |
+
- [ClinVar](https://www.ncbi.nlm.nih.gov/clinvar/)
|
| 590 |
+
- [UniProt](https://www.uniprot.org)
|
| 591 |
+
- [ChEMBL](https://www.ebi.ac.uk/chembl/)
|
| 592 |
+
""")
|
| 593 |
+
|
| 594 |
+
gr.Markdown(
|
| 595 |
+
"---\n**K R&D Lab** | Research only — not clinical | "
|
| 596 |
+
"[GitHub](https://github.com/TEZv/K-RnD-Lab-PHYLO-03_2026) | "
|
| 597 |
+
"[KOSATIKS GROUP 🦈](https://kosatiks-group.pp.ua)"
|
| 598 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 599 |
|
| 600 |
+
demo.launch(server_name="0.0.0.0", server_port=7860)
|
|
|
|
|
|