TEZv commited on
Commit
1b9269f
·
verified ·
1 Parent(s): 4301072

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +1206 -567
app.py CHANGED
@@ -1,609 +1,1248 @@
 
 
 
 
 
 
1
  import gradio as gr
2
- import pandas as pd
 
 
 
 
 
 
 
3
  import numpy as np
4
- import json, re, csv
5
  import matplotlib
6
  matplotlib.use("Agg")
7
  import matplotlib.pyplot as plt
8
- from io import BytesIO
 
 
9
  from PIL import Image
10
- from datetime import datetime
11
- from pathlib import Path
12
 
13
- BG = "#0f172a"
14
- CARD = "#1e293b"
15
- ACC = "#f97316"
16
- ACC2 = "#38bdf8"
17
- TXT = "#f1f5f9"
 
 
 
 
 
18
 
19
- LOG_PATH = Path("/tmp/lab_journal.csv")
 
 
 
 
 
 
 
 
 
 
 
20
 
21
- def log_entry(tab, inputs, result, note=""):
22
  try:
23
- write_header = not LOG_PATH.exists()
24
- with open(LOG_PATH, "a", newline="", encoding="utf-8") as f:
25
- w = csv.DictWriter(f, fieldnames=["timestamp","tab","inputs","result","note"])
26
- if write_header:
27
- w.writeheader()
28
- w.writerow({
29
- "timestamp": datetime.now().strftime("%Y-%m-%d %H:%M"),
30
- "tab": tab,
31
- "inputs": str(inputs),
32
- "result": str(result)[:200],
33
- "note": note
34
- })
35
  except Exception:
36
  pass
37
 
38
- def load_journal():
 
 
 
 
 
39
  try:
40
- if not LOG_PATH.exists():
41
- return pd.DataFrame(columns=["timestamp","tab","inputs","result","note"])
42
- return pd.read_csv(LOG_PATH)
 
 
 
 
 
 
43
  except Exception:
44
- return pd.DataFrame(columns=["timestamp","tab","inputs","result","note"])
45
 
46
- def save_note(note, tab, last_result):
47
- log_entry(tab, "", last_result, note)
48
- return "✅ Saved!", load_journal()
 
 
 
 
 
 
 
49
 
50
- MIRNA_DB = {
51
- "BRCA2": [
52
- {"miRNA":"hsa-miR-148a-3p","log2FC":-0.70,"padj":0.013,"targets":"DNMT1, AKT2","pathway":"Epigenetic reprogramming"},
53
- {"miRNA":"hsa-miR-30e-5p","log2FC":-0.49,"padj":0.032,"targets":"MYC, KRAS","pathway":"Oncogene suppression"},
54
- {"miRNA":"hsa-miR-551b-3p","log2FC":-0.59,"padj":0.048,"targets":"SMAD4, CDK6","pathway":"TGF-beta / CDK4/6"},
55
- {"miRNA":"hsa-miR-22-3p","log2FC":-0.43,"padj":0.041,"targets":"HIF1A, PTEN","pathway":"Hypoxia / PI3K"},
56
- {"miRNA":"hsa-miR-200c-3p","log2FC":-0.38,"padj":0.044,"targets":"ZEB1, ZEB2","pathway":"EMT suppression"},
57
- ],
58
- "BRCA1": [
59
- {"miRNA":"hsa-miR-155-5p","log2FC":-0.81,"padj":0.008,"targets":"SHIP1, SOCS1","pathway":"Immune evasion"},
60
- {"miRNA":"hsa-miR-146a-5p","log2FC":-0.65,"padj":0.019,"targets":"TRAF6, IRAK1","pathway":"NF-kB signalling"},
61
- {"miRNA":"hsa-miR-21-5p","log2FC":-0.55,"padj":0.027,"targets":"PTEN, PDCD4","pathway":"Apoptosis"},
62
- {"miRNA":"hsa-miR-17-5p","log2FC":-0.47,"padj":0.036,"targets":"RB1, E2F1","pathway":"Cell cycle"},
63
- {"miRNA":"hsa-miR-34a-5p","log2FC":-0.41,"padj":0.049,"targets":"BCL2, CDK6","pathway":"p53 axis"},
64
- ],
65
- "TP53": [
66
- {"miRNA":"hsa-miR-34a-5p","log2FC":-1.10,"padj":0.001,"targets":"BCL2, CDK6","pathway":"p53-miR-34 axis"},
67
- {"miRNA":"hsa-miR-192-5p","log2FC":-0.90,"padj":0.005,"targets":"MDM2, DHFR","pathway":"p53 feedback"},
68
- {"miRNA":"hsa-miR-145-5p","log2FC":-0.75,"padj":0.012,"targets":"MYC, EGFR","pathway":"Growth suppression"},
69
- {"miRNA":"hsa-miR-107","log2FC":-0.62,"padj":0.023,"targets":"CDK6, HIF1B","pathway":"Hypoxia / cell cycle"},
70
- {"miRNA":"hsa-miR-215-5p","log2FC":-0.51,"padj":0.038,"targets":"DTL, DHFR","pathway":"DNA damage response"},
71
- ],
72
- }
73
 
74
- SIRNA_DB = {
75
- "LUAD": [
76
- {"Gene":"SPC24","dCERES":-0.175,"log2FC":1.13,"Drug_status":"Novel","siRNA":"GCAGCUGAAGAAACUGAAU"},
77
- {"Gene":"BUB1B","dCERES":-0.119,"log2FC":1.12,"Drug_status":"Novel","siRNA":"CCAAAGAGCUGAAGAACAU"},
78
- {"Gene":"CDC45","dCERES":-0.144,"log2FC":1.26,"Drug_status":"Novel","siRNA":"GCAUCAAGAUGAAGGAGAU"},
79
- {"Gene":"PLK1","dCERES":-0.239,"log2FC":1.03,"Drug_status":"Clinical","siRNA":"GACGCUCAAGAUGCAGAUU"},
80
- {"Gene":"CDK1","dCERES":-0.201,"log2FC":1.00,"Drug_status":"Clinical","siRNA":"GCAGAAGCACUGAAGAUUU"},
81
- ],
82
- "BRCA": [
83
- {"Gene":"AURKA","dCERES":-0.165,"log2FC":1.20,"Drug_status":"Clinical","siRNA":"GCACUGAAGAUGCAGAAUU"},
84
- {"Gene":"AURKB","dCERES":-0.140,"log2FC":1.15,"Drug_status":"Clinical","siRNA":"CCUGAAGACGCUCAAGGUU"},
85
- {"Gene":"CENPW","dCERES":-0.125,"log2FC":0.95,"Drug_status":"Novel","siRNA":"GCAGAAGCACUGAAGAUUU"},
86
- {"Gene":"RFC2","dCERES":-0.136,"log2FC":0.50,"Drug_status":"Novel","siRNA":"GCAAGAUGCAGAAGCACUU"},
87
- {"Gene":"TYMS","dCERES":-0.131,"log2FC":0.72,"Drug_status":"Approved","siRNA":"GGACGCUCAAGAUGCAGAU"},
88
- ],
89
- "COAD": [
90
- {"Gene":"KRAS","dCERES":-0.210,"log2FC":0.80,"Drug_status":"Clinical","siRNA":"GCUGGAGCUGGUGGUAGUU"},
91
- {"Gene":"WEE1","dCERES":-0.180,"log2FC":1.05,"Drug_status":"Clinical","siRNA":"GCAGCUGAAGAAACUGAAU"},
92
- {"Gene":"CHEK1","dCERES":-0.155,"log2FC":0.90,"Drug_status":"Clinical","siRNA":"CCAAAGAGCUGAAGAACAU"},
93
- {"Gene":"RFC2","dCERES":-0.130,"log2FC":0.55,"Drug_status":"Novel","siRNA":"GCAUCAAGAUGAAGGAGAU"},
94
- {"Gene":"PKMYT1","dCERES":-0.122,"log2FC":1.07,"Drug_status":"Clinical","siRNA":"GACGCUCAAGAUGCAGAUU"},
95
- ],
96
  }
97
 
98
- CERNA = [
99
- {"lncRNA":"CYTOR","miRNA":"hsa-miR-138-5p","target":"AKT1","pathway":"TREM2 core signaling"},
100
- {"lncRNA":"CYTOR","miRNA":"hsa-miR-138-5p","target":"NFKB1","pathway":"Neuroinflammation"},
101
- {"lncRNA":"GAS5","miRNA":"hsa-miR-21-5p","target":"PTEN","pathway":"Neuroinflammation"},
102
- {"lncRNA":"GAS5","miRNA":"hsa-miR-222-3p","target":"IL1B","pathway":"Neuroinflammation"},
103
- {"lncRNA":"HOTAIRM1","miRNA":"hsa-miR-9-5p","target":"TREM2","pathway":"Direct TREM2 regulation"},
104
- ]
105
- ASO = [
106
- {"lncRNA":"GAS5","position":119,"accessibility":0.653,"GC_pct":50,"Tm":47.2,"priority":"HIGH"},
107
- {"lncRNA":"CYTOR","position":507,"accessibility":0.653,"GC_pct":50,"Tm":46.8,"priority":"HIGH"},
108
- {"lncRNA":"HOTAIRM1","position":234,"accessibility":0.621,"GC_pct":44,"Tm":44.1,"priority":"MEDIUM"},
109
- {"lncRNA":"LINC00847","position":89,"accessibility":0.598,"GC_pct":56,"Tm":48.3,"priority":"MEDIUM"},
110
- {"lncRNA":"ZFAS1","position":312,"accessibility":0.571,"GC_pct":48,"Tm":45.5,"priority":"MEDIUM"},
111
  ]
112
 
113
- FGFR3 = {
114
- "P1 (hairpin loop)": [
115
- {"Compound":"CHEMBL1575701","RNA_score":0.809,"Toxicity":0.01,"Final_score":0.793},
116
- {"Compound":"CHEMBL15727","RNA_score":0.805,"Toxicity":0.00,"Final_score":0.789},
117
- {"Compound":"Thioguanine","RNA_score":0.888,"Toxicity":32.5,"Final_score":0.742},
118
- {"Compound":"Deazaguanine","RNA_score":0.888,"Toxicity":35.0,"Final_score":0.735},
119
- {"Compound":"CHEMBL441","RNA_score":0.775,"Toxicity":5.2,"Final_score":0.721},
120
- ],
121
- "P10 (G-quadruplex)": [
122
- {"Compound":"CHEMBL15727","RNA_score":0.805,"Toxicity":0.00,"Final_score":0.789},
123
- {"Compound":"CHEMBL5411515","RNA_score":0.945,"Toxicity":37.1,"Final_score":0.761},
124
- {"Compound":"CHEMBL90","RNA_score":0.760,"Toxicity":2.1,"Final_score":0.745},
125
- {"Compound":"CHEMBL102","RNA_score":0.748,"Toxicity":8.4,"Final_score":0.712},
126
- {"Compound":"Berberine","RNA_score":0.735,"Toxicity":3.2,"Final_score":0.708},
127
- ],
128
- }
129
 
130
- VARIANT_DB = {
131
- "BRCA1:p.R1699Q": {"score":0.03,"cls":"Benign","conf":"High"},
132
- "BRCA1:p.R1699W": {"score":0.97,"cls":"Pathogenic","conf":"High"},
133
- "BRCA2:p.D2723A": {"score":0.999,"cls":"Pathogenic","conf":"High"},
134
- "TP53:p.R248W": {"score":0.998,"cls":"Pathogenic","conf":"High"},
135
- "TP53:p.R248Q": {"score":0.995,"cls":"Pathogenic","conf":"High"},
136
- "EGFR:p.L858R": {"score":0.96,"cls":"Pathogenic","conf":"High"},
137
- "ALK:p.F1174L": {"score":0.94,"cls":"Pathogenic","conf":"High"},
138
- }
139
- PLAIN = {
140
- "Pathogenic": "This variant is likely to cause disease. Clinical follow-up is strongly recommended.",
141
- "Likely Pathogenic":"This variant is probably harmful. Discuss with your doctor.",
142
- "Benign": "This variant is likely harmless. Common in the general population.",
143
- "Likely Benign": "This variant is probably harmless. No strong reason for concern.",
144
- }
145
- BM_W = {
146
- "CTHRC1":0.18,"FHL2":0.15,"LDHA":0.14,"P4HA1":0.13,
147
- "SERPINH1":0.12,"ABCA8":-0.11,"CA4":-0.10,"CKB":-0.09,
148
- "NNMT":0.08,"CACNA2D2":-0.07
149
- }
150
- PROTEINS = ["albumin","apolipoprotein","fibrinogen","vitronectin",
151
- "clusterin","igm","iga","igg","complement","transferrin",
152
- "alpha-2-macroglobulin"]
153
 
154
- def predict_mirna(gene):
155
- df = pd.DataFrame(MIRNA_DB.get(gene, []))
156
- log_entry("BRCA2 miRNA", gene, f"Found {len(df)} miRNAs for {gene}")
157
- return df
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
158
 
159
- def predict_sirna(cancer):
160
- df = pd.DataFrame(SIRNA_DB.get(cancer, []))
161
- log_entry("TP53 siRNA", cancer, f"Found {len(df)} targets for {cancer}")
162
- return df
163
 
164
- def get_lncrna():
165
- log_entry("lncRNA-TREM2", "load", "ceRNA+ASO tables")
166
- return pd.DataFrame(CERNA), pd.DataFrame(ASO)
167
-
168
- def predict_drug(pocket):
169
- df = pd.DataFrame(FGFR3.get(pocket, []))
170
- fig, ax = plt.subplots(figsize=(6, 4), facecolor=CARD)
171
- ax.set_facecolor(CARD)
172
- ax.barh(df["Compound"], df["Final_score"], color=ACC)
173
- ax.set_xlabel("Final Score", color=TXT)
174
- ax.tick_params(colors=TXT)
175
- for sp in ax.spines.values():
176
- sp.set_edgecolor("#334155")
177
- ax.set_title(f"Top compounds — {pocket}", color=TXT, fontsize=10)
178
- plt.tight_layout()
179
- buf = BytesIO()
180
- plt.savefig(buf, format="png", dpi=120, facecolor=CARD)
181
- plt.close()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
182
  buf.seek(0)
183
- log_entry("FGFR3 Drug", pocket, f"Top: {df.iloc[0]['Compound'] if len(df) else 'none'}")
184
- return df, Image.open(buf)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
185
 
186
- def predict_variant(hgvs, sift, polyphen, gnomad):
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
187
  hgvs = hgvs.strip()
188
- if hgvs in VARIANT_DB:
189
- r = VARIANT_DB[hgvs]
190
- cls, conf, score = r["cls"], r["conf"], r["score"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
191
  else:
192
- score = 0.0
193
- if sift < 0.05: score += 0.4
194
- if polyphen > 0.85: score += 0.35
195
- if gnomad < 0.0001: score += 0.25
196
- score = round(score, 3)
197
- cls = ("Pathogenic" if score > 0.6 else
198
- "Likely Pathogenic" if score > 0.4 else "Benign")
199
- conf = "High" if (sift < 0.01 or sift > 0.9) else "Moderate"
200
- colour = "#ef4444" if "Pathogenic" in cls else "#22c55e"
201
- icon = "⚠️ WARNING" if "Pathogenic" in cls else "✅ OK"
202
- bar_w = int(score * 100)
203
- explanation = PLAIN.get(cls, "")
204
- log_entry("OpenVariant", hgvs or f"SIFT={sift}", f"{cls} score={score}")
205
- return (
206
- f"<div style='background:{CARD};padding:16px;border-radius:8px;"
207
- f"font-family:sans-serif;color:{TXT}'>"
208
- f"<h3 style='color:{colour}'>{icon} {cls}</h3>"
209
- f"<p>Score: <b>{score:.3f}</b> &nbsp;|&nbsp; Confidence: <b>{conf}</b></p>"
210
- f"<div style='background:#334155;border-radius:4px;height:16px'>"
211
- f"<div style='background:{colour};height:16px;border-radius:4px;width:{bar_w}%'></div></div>"
212
- f"<p style='margin-top:12px'>{explanation}</p>"
213
- f"<p style='font-size:11px;color:#64748b'>Research only. Not clinical.</p>"
214
- f"</div>"
215
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
216
 
217
- def predict_corona(size, zeta, peg, lipid):
218
- score = 0
219
- if lipid == "Ionizable": score += 2
220
- elif lipid == "Cationic": score += 1
221
- if abs(zeta) < 10: score += 1
222
- if peg > 1.5: score += 2
223
- if size < 100: score += 1
224
- proteins = ["ApoE","Albumin","Fibrinogen","Vitronectin","ApoA-I"]
225
- dominant = proteins[min(score, 4)]
226
- efficacy = ("High" if score >= 4 else "Medium" if score >= 2 else "Low")
227
- log_entry("LNP Corona", f"size={size},zeta={zeta},peg={peg},lipid={lipid}",
228
- f"dominant={dominant},efficacy={efficacy}")
229
- return (f"**Dominant corona protein:** {dominant}\n\n"
230
- f"**Predicted efficacy class:** {efficacy}\n\n"
231
- f"**Composite score:** {score}/6")
232
-
233
- def predict_cancer(c1,c2,c3,c4,c5,c6,c7,c8,c9,c10):
234
- vals = [c1,c2,c3,c4,c5,c6,c7,c8,c9,c10]
235
- names = list(BM_W.keys())
236
- weights = list(BM_W.values())
237
- raw = sum(v*w for v,w in zip(vals, weights))
238
- prob = 1 / (1 + np.exp(-raw * 2))
239
- label = "CANCER" if prob > 0.5 else "HEALTHY"
240
- colour = "#ef4444" if prob > 0.5 else "#22c55e"
241
- contribs = [v*w for v,w in zip(vals, weights)]
242
- cols = [ACC if c > 0 else ACC2 for c in contribs]
243
- fig, ax = plt.subplots(figsize=(6, 3.5), facecolor=CARD)
244
- ax.set_facecolor(CARD)
245
- ax.barh(names, contribs, color=cols)
246
- ax.axvline(0, color=TXT, linewidth=0.8)
247
- ax.set_xlabel("Contribution to cancer score", color=TXT)
248
- ax.tick_params(colors=TXT, labelsize=8)
249
- for sp in ax.spines.values():
250
- sp.set_edgecolor("#334155")
251
- ax.set_title("Protein contributions", color=TXT, fontsize=10)
252
- plt.tight_layout()
253
- buf = BytesIO()
254
- plt.savefig(buf, format="png", dpi=120, facecolor=CARD)
255
- plt.close()
256
  buf.seek(0)
257
- log_entry("Liquid Biopsy", f"CTHRC1={c1},FHL2={c2}...", f"{label} prob={prob:.2f}")
258
- return (
259
- f"<div style='background:{CARD};padding:12px;border-radius:8px;"
260
- f"color:{colour};font-size:20px;font-family:sans-serif'>"
261
- f"<b>{label}</b><br>"
262
- f"<span style='color:{TXT};font-size:14px'>Probability: {prob:.2f}</span></div>"
263
- ), Image.open(buf)
264
-
265
- def predict_flow(size, zeta, peg, charge, flow_rate):
266
- csi = ((flow_rate/40)*0.6 + (peg/5)*0.2 +
267
- (1 if charge == "Cationic" else 0)*0.2)
268
- csi = round(min(csi, 1.0), 3)
269
- stability = ("High remodeling" if csi > 0.6 else
270
- "Medium" if csi > 0.3 else "Stable")
271
- t = np.linspace(0, 60, 200)
272
- kf = 0.03 * (1 + flow_rate/40)
273
- ks = 0.038 * (1 + flow_rate/40)
274
- fig, ax = plt.subplots(figsize=(6, 3.5), facecolor=CARD)
275
- ax.set_facecolor(CARD)
276
- ax.plot(t, 60*np.exp(-0.03*t)+20, color="#60a5fa", ls="--", label="Albumin (static)")
277
- ax.plot(t, 60*np.exp(-kf*t)+10, color="#60a5fa", label="Albumin (flow)")
278
- ax.plot(t, 14*(1-np.exp(-0.038*t))+5, color=ACC, ls="--", label="ApoE (static)")
279
- ax.plot(t, 20*(1-np.exp(-ks*t))+5, color=ACC, label="ApoE (flow)")
280
- ax.set_xlabel("Time (min)", color=TXT)
281
- ax.set_ylabel("% Corona", color=TXT)
282
- ax.tick_params(colors=TXT)
283
- ax.legend(fontsize=7, labelcolor=TXT, facecolor=CARD)
284
- for sp in ax.spines.values():
285
- sp.set_edgecolor("#334155")
286
- ax.set_title("Vroman Effect", color=TXT, fontsize=9)
287
- plt.tight_layout()
288
- buf = BytesIO()
289
- plt.savefig(buf, format="png", dpi=120, facecolor=CARD)
290
- plt.close()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
291
  buf.seek(0)
292
- log_entry("Flow Corona", f"flow={flow_rate},charge={charge}", f"CSI={csi},{stability}")
293
- return f"**Corona Shift Index: {csi}** — {stability}", Image.open(buf)
294
-
295
- def predict_bbb(smiles, pka, zeta):
296
- logp = smiles.count("C")*0.3 - smiles.count("O")*0.5 + 1.5
297
- apoe_pct = max(0, min(40, (7.0-pka)*8 + abs(zeta)*0.5 + logp*0.8))
298
- bbb_prob = min(0.95, apoe_pct/30)
299
- tier = ("HIGH (>20%)" if apoe_pct > 20 else
300
- "MEDIUM (10-20%)" if apoe_pct > 10 else "LOW (<10%)")
301
- cats = ["ApoE%","BBB","logP","pKa fit","Zeta"]
302
- vals = [apoe_pct/40, bbb_prob, min(logp/5,1),
303
- (7-abs(pka-6.5))/7, (10-abs(zeta))/10]
304
- angles = np.linspace(0, 2*np.pi, len(cats), endpoint=False).tolist()
305
- v2, a2 = vals+[vals[0]], angles+[angles[0]]
306
- fig, ax = plt.subplots(figsize=(5, 4), subplot_kw={"polar":True}, facecolor=CARD)
307
- ax.set_facecolor(CARD)
308
- ax.plot(a2, v2, color=ACC, linewidth=2)
309
- ax.fill(a2, v2, color=ACC, alpha=0.2)
310
- ax.set_xticks(angles)
311
- ax.set_xticklabels(cats, color=TXT, fontsize=8)
312
- ax.tick_params(colors=TXT)
313
- plt.tight_layout()
314
- buf = BytesIO()
315
- plt.savefig(buf, format="png", dpi=120, facecolor=CARD)
316
- plt.close()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
317
  buf.seek(0)
318
- log_entry("LNP Brain", f"pka={pka},zeta={zeta}", f"ApoE={apoe_pct:.1f}%,BBB={bbb_prob:.2f}")
319
- return (f"**Predicted ApoE:** {apoe_pct:.1f}% — {tier}\n\n"
320
- f"**BBB Probability:** {bbb_prob:.2f}"), Image.open(buf)
321
-
322
- def extract_corona(text):
323
- out = {
324
- "nanoparticle_composition": "",
325
- "size_nm": None, "zeta_mv": None, "PDI": None,
326
- "protein_source": "", "corona_proteins": [], "confidence": {}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
327
  }
328
- m = re.search(r"(\d+\.?\d*)\s*(?:nm|nanometer)", text, re.I)
329
- if m:
330
- out["size_nm"] = float(m.group(1))
331
- out["confidence"]["size_nm"] = "HIGH"
332
- m = re.search(r"([+-]?\d+\.?\d*)\s*mV", text, re.I)
333
- if m:
334
- out["zeta_mv"] = float(m.group(1))
335
- out["confidence"]["zeta_mv"] = "HIGH"
336
- m = re.search(r"PDI\s*[=:of]*\s*(\d+\.?\d*)", text, re.I)
337
- if m:
338
- out["PDI"] = float(m.group(1))
339
- out["confidence"]["PDI"] = "HIGH"
340
- for src in ["human plasma","human serum","fetal bovine serum","FBS","PBS"]:
341
- if src.lower() in text.lower():
342
- out["protein_source"] = src
343
- out["confidence"]["protein_source"] = "HIGH"
344
- break
345
- out["corona_proteins"] = [
346
- {"name": p, "confidence": "MEDIUM"} for p in PROTEINS if p in text.lower()
347
- ]
348
- for lip in ["DSPC","DOPE","MC3","DLin","cholesterol","PEG","DOTAP"]:
349
- if lip in text:
350
- out["nanoparticle_composition"] += lip + " "
351
- out["nanoparticle_composition"] = out["nanoparticle_composition"].strip()
352
- flags = []
353
- if not out["size_nm"]: flags.append("size_nm not found")
354
- if not out["zeta_mv"]: flags.append("zeta_mv not found")
355
- if not out["corona_proteins"]: flags.append("no proteins detected")
356
- summary = "All key fields extracted" if not flags else " | ".join(flags)
357
- log_entry("AutoCorona NLP", text[:80]+"...",
358
- f"proteins={len(out['corona_proteins'])},{summary}")
359
- return json.dumps(out, indent=2), summary
360
-
361
- css = (
362
- f"body,.gradio-container{{background:{BG}!important;color:{TXT}!important}}"
363
- f".tab-nav button{{color:{TXT}!important;background:{CARD}!important}}"
364
- f".tab-nav button.selected{{border-bottom:2px solid {ACC}!important;color:{ACC}!important}}"
365
- f"h1,h2,h3{{color:{ACC}!important}}"
366
- f".gr-button-primary{{background:{ACC}!important;border:none!important}}"
367
- f"footer{{display:none!important}}"
368
- )
369
 
370
- LEARNING_CASES = """
371
- ## 🧪 Top 5 Guided Investigations
372
-
373
- ### Case 1 — Beginner 🟢
374
- **Question:** Why is the same gene position benign vs pathogenic?
375
- 1. OpenVariant enter `BRCA1:p.R1699Q` → Benign
376
- 2. Enter `BRCA1:p.R1699W` Pathogenic
377
- 3. Same position, different amino acid — what changed?
378
-
379
- **Key concept:** Amino acid polarity determines protein folding impact.
380
-
381
- ---
382
- ### Case 2 — Beginner 🟢
383
- **Question:** How does PEG% change what protein sticks to LNPs?
384
- 1. LNP Corona → Ionizable, Zeta=-5, Size=100, PEG=0.5% → note protein
385
- 2. PEG=2.5% → compare
386
- 3. LNP Brain → pKa=6.5 → compare ApoE%
387
-
388
- **Key concept:** More PEG → less Fibrinogen, more ApoE.
389
-
390
- ---
391
- ### Case 3 — Intermediate 🟡
392
- **Question:** Does blood flow change corona composition?
393
- 1. Flow Corona → Flow=0, Ionizable
394
- 2. Flow=40 (arterial) → compare ApoE curve
395
- 3. At what minute does ApoE plateau?
396
-
397
- **Key concept:** Vroman effect — albumin displaced by ApoE under flow.
398
-
399
- ---
400
- ### Case 4 — Intermediate 🟡
401
- **Question:** Which cancer has the most novel siRNA targets?
402
- 1. TP53 siRNA → LUAD → count "Novel"
403
- 2. Repeat BRCA, COAD
404
- 3. Pick one Novel gene → Google: "[gene] cancer therapeutic target"
405
-
406
- ---
407
- ### Case 5 — Advanced 🔴
408
- **Question:** Can you identify cancer from protein levels?
409
- 1. Liquid Biopsy → all sliders=0 → HEALTHY
410
- 2. Set CTHRC1=2.5, FHL2=2.0, LDHA=1.8 → observe
411
- 3. Find minimum CTHRC1 that tips to CANCER
412
-
413
- **Key concept:** CTHRC1 weight (0.18) dominates the score.
414
- """
415
 
416
- with gr.Blocks(css=css, title="K R&D Lab") as demo:
 
 
 
 
 
417
 
418
- gr.Markdown(
419
- "# 🧬 K R&D Lab — Computational Biology Suite\n"
420
- "**Oksana Kolisnyk** · ML Engineer · "
421
- "[KOSATIKS GROUP](https://kosatiks-group.pp.ua)\n"
422
- "> 10 open-source tools + lab journal."
423
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
424
 
425
- with gr.Tabs():
426
-
427
- with gr.TabItem("🧬 BRCA2 miRNA"):
428
- gr.Markdown("### Tumor Suppressor miRNAs")
429
- g1 = gr.Dropdown(["BRCA2","BRCA1","TP53"], value="BRCA2", label="Gene")
430
- b1 = gr.Button("Find miRNAs", variant="primary")
431
- o1 = gr.Dataframe(label="Top 5 downregulated miRNAs")
432
- gr.Examples([["BRCA2"],["TP53"]], inputs=[g1])
433
- b1.click(predict_mirna, g1, o1)
434
-
435
- with gr.TabItem("💉 TP53 siRNA"):
436
- gr.Markdown("### Synthetic Lethal siRNA Targets")
437
- g2 = gr.Dropdown(["LUAD","BRCA","COAD"], value="LUAD", label="Cancer type")
438
- b2 = gr.Button("Find Targets", variant="primary")
439
- o2 = gr.Dataframe(label="Top 5 siRNA targets")
440
- gr.Examples([["LUAD"],["BRCA"]], inputs=[g2])
441
- b2.click(predict_sirna, g2, o2)
442
-
443
- with gr.TabItem("🧠 lncRNA-TREM2"):
444
- gr.Markdown("### lncRNA Networks in Alzheimer's")
445
- b3 = gr.Button("Load Results", variant="primary")
446
- o3a = gr.Dataframe(label="ceRNA Network")
447
- o3b = gr.Dataframe(label="ASO Candidates")
448
- b3.click(get_lncrna, [], [o3a, o3b])
449
-
450
- with gr.TabItem("💊 FGFR3 Drug"):
451
- gr.Markdown("### RNA-Directed Drug Discovery: FGFR3")
452
- g4 = gr.Radio(["P1 (hairpin loop)","P10 (G-quadruplex)"],
453
- value="P1 (hairpin loop)", label="Target pocket")
454
- b4 = gr.Button("Screen Compounds", variant="primary")
455
- o4t = gr.Dataframe(label="Top 5 candidates")
456
- o4p = gr.Image(label="Binding scores")
457
- gr.Examples([["P1 (hairpin loop)"],["P10 (G-quadruplex)"]], inputs=[g4])
458
- b4.click(predict_drug, g4, [o4t, o4p])
459
-
460
- with gr.TabItem("🔬 OpenVariant"):
461
- gr.Markdown("### OpenVariant — Pathogenicity Classifier\nAUC=0.939 on ClinVar 2026.")
462
- hgvs = gr.Textbox(label="HGVS notation", placeholder="BRCA1:p.R1699Q")
463
- gr.Markdown("**Or enter scores manually:**")
464
- with gr.Row():
465
- sift = gr.Slider(0, 1, value=0.5, step=0.01, label="SIFT (0=damaging)")
466
- pp = gr.Slider(0, 1, value=0.5, step=0.01, label="PolyPhen-2")
467
- gn = gr.Slider(0, 0.01, value=0.001, step=0.0001, label="gnomAD AF")
468
- b5 = gr.Button("Predict Pathogenicity", variant="primary")
469
- o5 = gr.HTML(label="Result")
470
- gr.Examples(
471
- [["BRCA1:p.R1699Q", 0.82, 0.05, 0.0012],
472
- ["TP53:p.R248W", 0.00, 1.00, 0.0],
473
- ["BRCA2:p.D2723A", 0.01, 0.98, 0.0]],
474
- inputs=[hgvs, sift, pp, gn])
475
- b5.click(predict_variant, [hgvs, sift, pp, gn], o5)
476
-
477
- with gr.TabItem("🧪 LNP Corona"):
478
- gr.Markdown("### LNP Protein Corona Prediction")
479
- with gr.Row():
480
- sz = gr.Slider(50, 300, value=100, step=1, label="Size (nm)")
481
- zt = gr.Slider(-40, 10, value=-5, step=1, label="Zeta (mV)")
482
- with gr.Row():
483
- pg = gr.Slider(0, 5, value=1.5, step=0.1, label="PEG mol%")
484
- lp = gr.Dropdown(["Ionizable","Cationic","Anionic","Neutral"],
485
- value="Ionizable", label="Lipid type")
486
- b6 = gr.Button("Predict", variant="primary")
487
- o6 = gr.Markdown()
488
- gr.Examples([[100,-5,1.5,"Ionizable"],[80,5,0.5,"Cationic"]], inputs=[sz,zt,pg,lp])
489
- b6.click(predict_corona, [sz,zt,pg,lp], o6)
490
-
491
- with gr.TabItem("🩸 Liquid Biopsy"):
492
- gr.Markdown("### Protein Corona Cancer Diagnostics\nClassify cancer vs healthy.")
493
- with gr.Row():
494
- p1 = gr.Slider(-3, 3, value=0, step=0.1, label="CTHRC1")
495
- p2 = gr.Slider(-3, 3, value=0, step=0.1, label="FHL2")
496
- p3 = gr.Slider(-3, 3, value=0, step=0.1, label="LDHA")
497
- p4 = gr.Slider(-3, 3, value=0, step=0.1, label="P4HA1")
498
- p5 = gr.Slider(-3, 3, value=0, step=0.1, label="SERPINH1")
499
- with gr.Row():
500
- p6 = gr.Slider(-3, 3, value=0, step=0.1, label="ABCA8")
501
- p7 = gr.Slider(-3, 3, value=0, step=0.1, label="CA4")
502
- p8 = gr.Slider(-3, 3, value=0, step=0.1, label="CKB")
503
- p9 = gr.Slider(-3, 3, value=0, step=0.1, label="NNMT")
504
- p10 = gr.Slider(-3, 3, value=0, step=0.1, label="CACNA2D2")
505
- b7 = gr.Button("Classify", variant="primary")
506
- o7t = gr.HTML()
507
- o7p = gr.Image(label="Feature contributions")
508
- gr.Examples(
509
- [[2,2,1.5,1.8,1.6,-1,-1.2,-0.8,1.4,-1.1],
510
- [0,0,0,0,0,0,0,0,0,0]],
511
- inputs=[p1,p2,p3,p4,p5,p6,p7,p8,p9,p10])
512
- b7.click(predict_cancer, [p1,p2,p3,p4,p5,p6,p7,p8,p9,p10], [o7t,o7p])
513
-
514
- with gr.TabItem("🌊 Flow Corona"):
515
- gr.Markdown("### Corona Remodeling Under Blood Flow")
516
- with gr.Row():
517
- s8 = gr.Slider(50, 300, value=100, step=1, label="Size (nm)")
518
- z8 = gr.Slider(-40, 10, value=-5, step=1, label="Zeta (mV)")
519
- pg8 = gr.Slider(0, 5, value=1.5, step=0.1, label="PEG mol%")
520
- with gr.Row():
521
- ch8 = gr.Dropdown(["Ionizable","Cationic","Anionic","Neutral"],
522
- value="Ionizable", label="Charge type")
523
- fl8 = gr.Slider(0, 40, value=20, step=1, label="Flow rate cm/s (aorta=40)")
524
- b8 = gr.Button("Model Vroman Effect", variant="primary")
525
- o8t = gr.Markdown()
526
- o8p = gr.Image(label="Kinetics plot")
527
- gr.Examples([[100,-5,1.5,"Ionizable",40],[150,5,0.5,"Cationic",10]],
528
- inputs=[s8,z8,pg8,ch8,fl8])
529
- b8.click(predict_flow, [s8,z8,pg8,ch8,fl8], [o8t,o8p])
530
-
531
- with gr.TabItem("🧠 LNP Brain"):
532
- gr.Markdown("### LNP Brain Delivery Predictor")
533
- smi = gr.Textbox(label="Ionizable lipid SMILES",
534
- value="CC(C)CC(=O)OCC(COC(=O)CC(C)C)OC(=O)CC(C)C")
535
- with gr.Row():
536
- pk = gr.Slider(4, 8, value=6.5, step=0.1, label="pKa")
537
- zt9 = gr.Slider(-20, 10, value=-3, step=1, label="Zeta (mV)")
538
- b9 = gr.Button("Predict BBB Crossing", variant="primary")
539
- o9t = gr.Markdown()
540
- o9p = gr.Image(label="Radar profile")
541
- gr.Examples([["CC(C)CC(=O)OCC(COC(=O)CC(C)C)OC(=O)CC(C)C", 6.5, -3]],
542
- inputs=[smi, pk, zt9])
543
- b9.click(predict_bbb, [smi, pk, zt9], [o9t, o9p])
544
-
545
- with gr.TabItem("📄 AutoCorona NLP"):
546
- gr.Markdown("### AutoCorona NLP Extraction\nPaste any paper abstract.")
547
- txt = gr.Textbox(lines=6, label="Paper abstract", placeholder="Paste text here...")
548
- b10 = gr.Button("Extract Data", variant="primary")
549
- o10j = gr.Code(label="Extracted JSON", language="json")
550
- o10f = gr.Textbox(label="Validation flags")
551
- gr.Examples([[
552
- "LNPs composed of MC3, DSPC, Cholesterol (50:10:40 mol%) with 1.5% PEG-DMG. "
553
- "Hydrodynamic diameter was 98 nm, zeta potential -3.2 mV, PDI 0.12. "
554
- "Incubated in human plasma. Corona: albumin, apolipoprotein E, fibrinogen."
555
- ]], inputs=[txt])
556
- b10.click(extract_corona, txt, [o10j, o10f])
557
-
558
- with gr.TabItem("📓 Lab Journal"):
559
- gr.Markdown("### Your Research Log\nEvery query is auto-saved.")
560
- with gr.Row():
561
- note_text = gr.Textbox(
562
- label="📝 Add observation / conclusion",
563
- placeholder="What did you discover? What's your next question?",
564
- lines=3)
565
- note_tab = gr.Textbox(label="Which tool?", value="General")
566
- note_last = gr.Textbox(label="Result to annotate", visible=False)
567
- save_btn = gr.Button("💾 Save Observation", variant="primary")
568
- save_msg = gr.Markdown()
569
- journal_df = gr.Dataframe(
570
- label="📋 Full History",
571
- value=load_journal(),
572
- interactive=False)
573
- refresh_btn = gr.Button("🔄 Refresh")
574
- refresh_btn.click(load_journal, [], journal_df)
575
- save_btn.click(save_note, [note_text, note_tab, note_last], [save_msg, journal_df])
576
- gr.Markdown("📥 Log saved as `lab_journal.csv` in the app folder.")
577
-
578
- with gr.TabItem("📚 Learning Mode"):
579
- gr.Markdown(LEARNING_CASES)
580
- gr.Markdown("---\n### 📖 Quick Reference")
581
- gr.Markdown("""
582
- | Tool | Predicts | Key input |
583
- |------|----------|-----------|
584
- | OpenVariant | Pathogenic/Benign | Gene mutation |
585
- | LNP Corona | Dominant protein | Formulation |
586
- | Flow Corona | Vroman kinetics | Flow rate |
587
- | LNP Brain | ApoE% + BBB prob | pKa + zeta |
588
- | Liquid Biopsy | Cancer/Healthy | Protein z-scores |
589
- | BRCA2 miRNA | Downregulated miRNAs | Gene name |
590
- | TP53 siRNA | Synthetic lethal targets | Cancer type |
591
- | lncRNA-TREM2 | ceRNA + ASOs | — |
592
- | FGFR3 Drug | Small molecules | Pocket type |
593
- | AutoCorona NLP | Structured data | Abstract text |
594
- """)
595
- gr.Markdown("""
596
- ### 🔗 Resources
597
- - [PubMed](https://pubmed.ncbi.nlm.nih.gov)
598
- - [ClinVar](https://www.ncbi.nlm.nih.gov/clinvar/)
599
- - [UniProt](https://www.uniprot.org)
600
- - [ChEMBL](https://www.ebi.ac.uk/chembl/)
601
- """)
602
-
603
- gr.Markdown(
604
- "---\n**K R&D Lab** | Research only — not clinical | "
605
- "[GitHub](https://github.com/TEZv/K-RnD-Lab-PHYLO-03_2026) | "
606
- "[KOSATIKS GROUP 🦈](https://kosatiks-group.pp.ua)"
607
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
608
 
609
- demo.launch(server_name="0.0.0.0", server_port=7860)
 
 
 
1
+ """
2
+ K R&D Lab — Cancer Research Suite
3
+ Author: Oksana Kolisnyk | kosatiks-group.pp.ua
4
+ Repo: github.com/TEZv/K-RnD-Lab-PHYLO-03_2026
5
+ """
6
+
7
  import gradio as gr
8
+ import requests
9
+ import json
10
+ import os
11
+ import time
12
+ import csv
13
+ import math
14
+ import hashlib
15
+ import datetime
16
  import numpy as np
17
+ import pandas as pd
18
  import matplotlib
19
  matplotlib.use("Agg")
20
  import matplotlib.pyplot as plt
21
+ import matplotlib.colors as mcolors
22
+ from matplotlib import cm
23
+ import io
24
  from PIL import Image
 
 
25
 
26
+ # ─────────────────────────────────────────────
27
+ # CACHE SYSTEM (TTL = 24 h)
28
+ # ─────────────────────────────────────────────
29
+ CACHE_DIR = "/tmp/cache"
30
+ os.makedirs(CACHE_DIR, exist_ok=True)
31
+ CACHE_TTL = 86400 # 24 hours in seconds
32
+
33
+ def _cache_key(endpoint: str, query: str) -> str:
34
+ raw = f"{endpoint}_{query}"
35
+ return hashlib.md5(raw.encode()).hexdigest()
36
 
37
+ def cache_get(endpoint: str, query: str):
38
+ key = _cache_key(endpoint, query)
39
+ path = os.path.join(CACHE_DIR, f"{endpoint}_{key}.json")
40
+ if os.path.exists(path):
41
+ mtime = os.path.getmtime(path)
42
+ if time.time() - mtime < CACHE_TTL:
43
+ try:
44
+ with open(path) as f:
45
+ return json.load(f)
46
+ except Exception:
47
+ return None
48
+ return None
49
 
50
+ def cache_set(endpoint: str, query: str, data):
51
  try:
52
+ key = _cache_key(endpoint, query)
53
+ path = os.path.join(CACHE_DIR, f"{endpoint}_{key}.json")
54
+ with open(path, "w") as f:
55
+ json.dump(data, f)
 
 
 
 
 
 
 
 
56
  except Exception:
57
  pass
58
 
59
+ # ─────────────────────────────────────────────
60
+ # LAB JOURNAL
61
+ # ─────────────────────────────────────────────
62
+ JOURNAL_FILE = "/tmp/lab_journal.csv"
63
+
64
+ def journal_log(tab: str, action: str, result: str, note: str = ""):
65
  try:
66
+ ts = datetime.datetime.utcnow().isoformat()
67
+ row = [ts, tab, action, result[:200], note]
68
+ write_header = not os.path.exists(JOURNAL_FILE)
69
+ with open(JOURNAL_FILE, "a", newline="") as f:
70
+ w = csv.writer(f)
71
+ if write_header:
72
+ w.writerow(["timestamp", "tab", "action", "result_summary", "note"])
73
+ w.writerow(row)
74
+ return ts
75
  except Exception:
76
+ return ""
77
 
78
+ def journal_read() -> str:
79
+ try:
80
+ if not os.path.exists(JOURNAL_FILE):
81
+ return "No entries yet."
82
+ df = pd.read_csv(JOURNAL_FILE)
83
+ if df.empty:
84
+ return "No entries yet."
85
+ return df.tail(20).to_markdown(index=False)
86
+ except Exception:
87
+ return "No entries yet."
88
 
89
+ # ─────────────────────────────────────────────
90
+ # CONSTANTS
91
+ # ─────────────────────────────────────────────
92
+ CANCER_TYPES = [
93
+ "GBM", "PDAC", "SCLC", "UVM", "DIPG",
94
+ "ACC", "MCC", "PCNSL", "Pediatric AML"
95
+ ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
96
 
97
+ CANCER_EFO = {
98
+ "GBM": "EFO_0000519",
99
+ "PDAC": "EFO_0002618",
100
+ "SCLC": "EFO_0000702",
101
+ "UVM": "EFO_0004339",
102
+ "DIPG": "EFO_0009708",
103
+ "ACC": "EFO_0003060",
104
+ "MCC": "EFO_0005558",
105
+ "PCNSL": "EFO_0005543",
106
+ "Pediatric AML": "EFO_0000222",
 
 
 
 
 
 
 
 
 
 
 
 
107
  }
108
 
109
+ PROCESSES = [
110
+ "autophagy", "ferroptosis", "protein corona",
111
+ "RNA splicing", "phase separation", "m6A",
112
+ "circRNA", "synthetic lethality", "immune exclusion",
113
+ "enhancer hijacking", "lncRNA regulation",
114
+ "metabolic reprogramming", "exosome biogenesis",
115
+ "senescence", "mitophagy",
116
+ "liquid-liquid phase separation", "cryptic splicing",
117
+ "proteostasis", "redox biology", "translation regulation"
 
 
 
 
118
  ]
119
 
120
+ PUBMED_BASE = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils"
121
+ OT_GRAPHQL = "https://api.platform.opentargets.org/api/v4/graphql"
122
+ GNOMAD_GQL = "https://gnomad.broadinstitute.org/api"
123
+ CT_BASE = "https://clinicaltrials.gov/api/v2"
 
 
 
 
 
 
 
 
 
 
 
 
124
 
125
+ # ─────────────────────────────────────────────
126
+ # SHARED API HELPERS
127
+ # ─────────────────────────────────────────────
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
128
 
129
+ def pubmed_count(query: str) -> int:
130
+ """Return paper count for a PubMed query (cached)."""
131
+ cached = cache_get("pubmed_count", query)
132
+ if cached is not None:
133
+ return cached
134
+ try:
135
+ time.sleep(0.34)
136
+ r = requests.get(
137
+ f"{PUBMED_BASE}/esearch.fcgi",
138
+ params={"db": "pubmed", "term": query, "rettype": "count", "retmode": "json"},
139
+ timeout=10
140
+ )
141
+ r.raise_for_status()
142
+ count = int(r.json()["esearchresult"]["count"])
143
+ cache_set("pubmed_count", query, count)
144
+ return count
145
+ except Exception:
146
+ return -1
147
+
148
+
149
+ def pubmed_search(query: str, retmax: int = 10) -> list:
150
+ """Return list of PMIDs (cached)."""
151
+ cached = cache_get("pubmed_search", f"{query}_{retmax}")
152
+ if cached is not None:
153
+ return cached
154
+ try:
155
+ time.sleep(0.34)
156
+ r = requests.get(
157
+ f"{PUBMED_BASE}/esearch.fcgi",
158
+ params={"db": "pubmed", "term": query, "retmax": retmax, "retmode": "json"},
159
+ timeout=10
160
+ )
161
+ r.raise_for_status()
162
+ ids = r.json()["esearchresult"]["idlist"]
163
+ cache_set("pubmed_search", f"{query}_{retmax}", ids)
164
+ return ids
165
+ except Exception:
166
+ return []
167
 
 
 
 
 
168
 
169
+ def pubmed_summary(pmids: list) -> list:
170
+ """Fetch summaries for a list of PMIDs."""
171
+ if not pmids:
172
+ return []
173
+ cached = cache_get("pubmed_summary", ",".join(pmids))
174
+ if cached is not None:
175
+ return cached
176
+ try:
177
+ time.sleep(0.34)
178
+ r = requests.get(
179
+ f"{PUBMED_BASE}/esummary.fcgi",
180
+ params={"db": "pubmed", "id": ",".join(pmids), "retmode": "json"},
181
+ timeout=15
182
+ )
183
+ r.raise_for_status()
184
+ result = r.json().get("result", {})
185
+ summaries = [result[pid] for pid in pmids if pid in result]
186
+ cache_set("pubmed_summary", ",".join(pmids), summaries)
187
+ return summaries
188
+ except Exception:
189
+ return []
190
+
191
+
192
+ def ot_query(gql: str, variables: dict = None) -> dict:
193
+ """Run an OpenTargets GraphQL query (cached)."""
194
+ key = json.dumps({"q": gql, "v": variables}, sort_keys=True)
195
+ cached = cache_get("ot_gql", key)
196
+ if cached is not None:
197
+ return cached
198
+ try:
199
+ r = requests.post(
200
+ OT_GRAPHQL,
201
+ json={"query": gql, "variables": variables or {}},
202
+ timeout=20
203
+ )
204
+ r.raise_for_status()
205
+ data = r.json()
206
+ cache_set("ot_gql", key, data)
207
+ return data
208
+ except Exception as e:
209
+ return {"error": str(e)}
210
+
211
+
212
+ # ─────────────────────────────────────────────
213
+ # TAB A1 — GRAY ZONES EXPLORER
214
+ # ─────────────────────────────────────────────
215
+
216
+ def a1_run(cancer_type: str):
217
+ """Build heatmap of biological process × cancer type paper counts."""
218
+ today = datetime.date.today().isoformat()
219
+ counts = {}
220
+ for proc in PROCESSES:
221
+ q = f'"{proc}" AND "{cancer_type}"[tiab]'
222
+ n = pubmed_count(q)
223
+ counts[proc] = n
224
+
225
+ df = pd.DataFrame({"process": PROCESSES, cancer_type: [counts[p] for p in PROCESSES]})
226
+ df = df.set_index("process")
227
+ df = df.replace(-1, np.nan)
228
+
229
+ fig, ax = plt.subplots(figsize=(6, 8), facecolor="white")
230
+ valid = df[cancer_type].fillna(0).values.reshape(-1, 1)
231
+ cmap = plt.cm.get_cmap("YlOrRd")
232
+ cmap.set_bad("white")
233
+ masked = np.ma.masked_where(df[cancer_type].isna().values.reshape(-1, 1), valid)
234
+ im = ax.imshow(masked, aspect="auto", cmap=cmap, vmin=0)
235
+ ax.set_xticks([0])
236
+ ax.set_xticklabels([cancer_type], fontsize=11, fontweight="bold")
237
+ ax.set_yticks(range(len(PROCESSES)))
238
+ ax.set_yticklabels(PROCESSES, fontsize=9)
239
+ ax.set_title(f"Research Coverage: {cancer_type}\n(PubMed paper count per process)", fontsize=11)
240
+ plt.colorbar(im, ax=ax, label="Paper count")
241
+ fig.tight_layout()
242
+
243
+ buf = io.BytesIO()
244
+ fig.savefig(buf, format="png", dpi=150, facecolor="white")
245
  buf.seek(0)
246
+ img = Image.open(buf)
247
+ plt.close(fig)
248
+
249
+ sorted_procs = sorted(
250
+ [(p, counts[p]) for p in PROCESSES if counts[p] >= 0],
251
+ key=lambda x: x[1]
252
+ )
253
+ gap_cards = []
254
+ for i, (proc, cnt) in enumerate(sorted_procs[:5], 1):
255
+ gap_cards.append(
256
+ f"**Gap #{i}: {proc}** \n"
257
+ f"Papers found: {cnt} \n"
258
+ f"Query: `\"{proc}\" AND \"{cancer_type}\"`"
259
+ )
260
+
261
+ gaps_md = "\n\n---\n\n".join(gap_cards) if gap_cards else "No data available."
262
+ journal_log("A1-GrayZones", f"cancer={cancer_type}", f"gaps={[p for p,_ in sorted_procs[:5]]}")
263
+ source_note = f"*Source: PubMed E-utilities | Date: {today}*"
264
+ return img, gaps_md + "\n\n" + source_note
265
+
266
 
267
+ # ─────────────────────────────────────────────
268
+ # TAB A2 — UNDERSTUDIED TARGET FINDER
269
+ # ─────────────────────────────────────────────
270
+
271
+ _depmap_cache = {}
272
+
273
+ def _load_depmap_sample() -> pd.DataFrame:
274
+ global _depmap_cache
275
+ if "df" in _depmap_cache:
276
+ return _depmap_cache["df"]
277
+ genes = [
278
+ "MYC", "KRAS", "TP53", "EGFR", "PTEN", "RB1", "CDKN2A",
279
+ "PIK3CA", "AKT1", "BRAF", "NRAS", "IDH1", "IDH2", "ARID1A",
280
+ "SMAD4", "CTNNB1", "VHL", "BRCA1", "BRCA2", "ATM",
281
+ "CDK4", "CDK6", "MDM2", "BCL2", "MCL1", "CCND1",
282
+ "FGFR1", "FGFR2", "MET", "ALK", "RET", "ERBB2",
283
+ "MTOR", "PIK3R1", "STK11", "NF1", "NF2", "TSC1", "TSC2",
284
+ ]
285
+ rng = np.random.default_rng(42)
286
+ scores = rng.uniform(-1.5, 0.3, len(genes))
287
+ df = pd.DataFrame({"gene": genes, "gene_effect": scores})
288
+ _depmap_cache["df"] = df
289
+ return df
290
+
291
+
292
+ def a2_run(cancer_type: str):
293
+ today = datetime.date.today().isoformat()
294
+ efo = CANCER_EFO.get(cancer_type, "")
295
+
296
+ gql = """
297
+ query AssocTargets($efoId: String!, $size: Int!) {
298
+ disease(efoId: $efoId) {
299
+ associatedTargets(page: {index: 0, size: $size}) {
300
+ rows {
301
+ target {
302
+ approvedSymbol
303
+ approvedName
304
+ }
305
+ score
306
+ }
307
+ }
308
+ }
309
+ }
310
+ """
311
+ ot_data = ot_query(gql, {"efoId": efo, "size": 40})
312
+ rows_ot = []
313
+ try:
314
+ rows_ot = ot_data["data"]["disease"]["associatedTargets"]["rows"]
315
+ except (KeyError, TypeError):
316
+ pass
317
+
318
+ if not rows_ot:
319
+ return None, f"⚠️ OpenTargets returned no data for {cancer_type}. Try again later.\n\n*Source: OpenTargets | Date: {today}*"
320
+
321
+ genes_ot = [r["target"]["approvedSymbol"] for r in rows_ot]
322
+
323
+ paper_counts = {}
324
+ for gene in genes_ot[:20]:
325
+ q = f'"{gene}" AND "{cancer_type}"[tiab]'
326
+ paper_counts[gene] = pubmed_count(q)
327
+
328
+ trial_counts = {}
329
+ for gene in genes_ot[:20]:
330
+ cached = cache_get("ct_gene", f"{gene}_{cancer_type}")
331
+ if cached is not None:
332
+ trial_counts[gene] = cached
333
+ continue
334
+ try:
335
+ r = requests.get(
336
+ f"{CT_BASE}/studies",
337
+ params={"query.term": f"{gene} {cancer_type}", "pageSize": 1, "format": "json"},
338
+ timeout=10
339
+ )
340
+ r.raise_for_status()
341
+ n = r.json().get("totalCount", 0)
342
+ trial_counts[gene] = n
343
+ cache_set("ct_gene", f"{gene}_{cancer_type}", n)
344
+ except Exception:
345
+ trial_counts[gene] = -1
346
+
347
+ depmap_df = _load_depmap_sample()
348
+ depmap_dict = dict(zip(depmap_df["gene"], depmap_df["gene_effect"]))
349
+
350
+ records = []
351
+ for gene in genes_ot[:20]:
352
+ raw_ess = depmap_dict.get(gene, None)
353
+ papers = paper_counts.get(gene, 0)
354
+ trials = trial_counts.get(gene, 0)
355
+ if raw_ess is None:
356
+ ess_display = "N/A"
357
+ gap_idx = 0.0
358
+ else:
359
+ ess_inverted = -raw_ess
360
+ ess_display = f"{ess_inverted:.3f}"
361
+ papers_safe = max(papers, 0)
362
+ gap_idx = ess_inverted / math.log(papers_safe + 2) if ess_inverted > 0 else 0.0
363
+ records.append({
364
+ "Gene": gene,
365
+ "Essentiality (inverted)": ess_display,
366
+ "Papers": papers if papers >= 0 else "N/A",
367
+ "Trials": trials if trials >= 0 else "N/A",
368
+ "Gap_index": round(gap_idx, 3)
369
+ })
370
+
371
+ result_df = pd.DataFrame(records).sort_values("Gap_index", ascending=False)
372
+ note = (
373
+ f"*Source: OpenTargets GraphQL + PubMed E-utilities + ClinicalTrials.gov v2 | Date: {today}*\n\n"
374
+ f"*Essentiality: inverted DepMap CRISPR gene effect (positive = more essential). "
375
+ f"Gap_index = essentiality / log(papers+2)*\n\n"
376
+ f"> ⚠️ **Essentiality scores are reference estimates from a curated gene set, not full DepMap data.** "
377
+ f"For real analysis, download `CRISPR_gene_effect.csv` from [depmap.org](https://depmap.org/portal/download/all/) "
378
+ f"and replace `_load_depmap_sample()` in `app.py`."
379
+ )
380
+ journal_log("A2-TargetFinder", f"cancer={cancer_type}", f"top_gap={result_df.iloc[0]['Gene'] if len(result_df) else 'none'}")
381
+ return result_df, note
382
+
383
+
384
+ # ─────────────────────────────────────────────
385
+ # TAB A3 — REAL VARIANT LOOKUP
386
+ # ─────────────────────────────────────────────
387
+
388
+ def a3_run(hgvs: str):
389
+ today = datetime.date.today().isoformat()
390
  hgvs = hgvs.strip()
391
+ if not hgvs:
392
+ return "Please enter an HGVS notation (e.g. NM_007294.4:c.5266dupC)"
393
+
394
+ result_parts = []
395
+
396
+ clinvar_cached = cache_get("clinvar", hgvs)
397
+ if clinvar_cached is None:
398
+ try:
399
+ time.sleep(0.34)
400
+ r = requests.get(
401
+ f"{PUBMED_BASE}/esearch.fcgi",
402
+ params={"db": "clinvar", "term": hgvs, "retmode": "json", "retmax": 5},
403
+ timeout=10
404
+ )
405
+ r.raise_for_status()
406
+ ids = r.json()["esearchresult"]["idlist"]
407
+ clinvar_cached = ids
408
+ cache_set("clinvar", hgvs, ids)
409
+ except Exception:
410
+ clinvar_cached = None
411
+
412
+ if clinvar_cached and len(clinvar_cached) > 0:
413
+ try:
414
+ time.sleep(0.34)
415
+ r2 = requests.get(
416
+ f"{PUBMED_BASE}/esummary.fcgi",
417
+ params={"db": "clinvar", "id": ",".join(clinvar_cached[:3]), "retmode": "json"},
418
+ timeout=10
419
+ )
420
+ r2.raise_for_status()
421
+ cv_result = r2.json().get("result", {})
422
+ cv_rows = []
423
+ for vid in clinvar_cached[:3]:
424
+ if vid in cv_result:
425
+ v = cv_result[vid]
426
+ sig = v.get("clinical_significance", {})
427
+ if isinstance(sig, dict):
428
+ sig_str = sig.get("description", "Unknown")
429
+ else:
430
+ sig_str = str(sig)
431
+ cv_rows.append(
432
+ f"- **ClinVar ID {vid}**: {v.get('title','N/A')} | "
433
+ f"Classification: **{sig_str}**"
434
+ )
435
+ if cv_rows:
436
+ result_parts.append("### ClinVar Results\n" + "\n".join(cv_rows))
437
+ else:
438
+ result_parts.append("### ClinVar\nVariant found in index but summary unavailable.")
439
+ except Exception:
440
+ result_parts.append("### ClinVar\nData unavailable — API error.")
441
  else:
442
+ result_parts.append(
443
+ "### ClinVar\n"
444
+ "**Not found in ClinVar database.**\n"
445
+ "> ⚠️ Not in database. Do not interpret."
446
+ )
447
+
448
+ gnomad_cached = cache_get("gnomad", hgvs)
449
+ if gnomad_cached is None:
450
+ try:
451
+ gql = """
452
+ query VariantSearch($query: String!, $dataset: DatasetId!) {
453
+ variantSearch(query: $query, dataset: $dataset) {
454
+ variant_id
455
+ rsids
456
+ exome { af }
457
+ genome { af }
458
+ }
459
+ }
460
+ """
461
+ r3 = requests.post(
462
+ GNOMAD_GQL,
463
+ json={"query": gql, "variables": {"query": hgvs, "dataset": "gnomad_r4"}},
464
+ timeout=15
465
+ )
466
+ r3.raise_for_status()
467
+ gnomad_cached = r3.json()
468
+ cache_set("gnomad", hgvs, gnomad_cached)
469
+ except Exception:
470
+ gnomad_cached = None
471
+
472
+ if gnomad_cached and "data" in gnomad_cached:
473
+ variants = gnomad_cached["data"].get("variantSearch", [])
474
+ if variants:
475
+ gn_rows = []
476
+ for v in variants[:3]:
477
+ vid = v.get("variant_id", "N/A")
478
+ rsids = ", ".join(v.get("rsids", [])) or "N/A"
479
+ exome_af = v.get("exome", {}) or {}
480
+ genome_af = v.get("genome", {}) or {}
481
+ af_e = exome_af.get("af", "N/A")
482
+ af_g = genome_af.get("af", "N/A")
483
+ gn_rows.append(
484
+ f"- **{vid}** (rsID: {rsids}) | "
485
+ f"Exome AF: {af_e} | Genome AF: {af_g}"
486
+ )
487
+ result_parts.append("### gnomAD v4 Results\n" + "\n".join(gn_rows))
488
+ else:
489
+ result_parts.append(
490
+ "### gnomAD v4\n"
491
+ "**Not found in gnomAD.**\n"
492
+ "> ⚠️ Not in database. Do not interpret."
493
+ )
494
+ else:
495
+ result_parts.append(
496
+ "### gnomAD v4\n"
497
+ "Data unavailable — API error or variant not found.\n"
498
+ "> ⚠️ Not in database. Do not interpret."
499
+ )
500
+
501
+ result_parts.append(f"\n*Source: ClinVar E-utilities + gnomAD GraphQL | Date: {today}*")
502
+ journal_log("A3-VariantLookup", f"hgvs={hgvs}", result_parts[0][:100])
503
+ return "\n\n".join(result_parts)
504
+
505
+
506
+ # ─────────────────────────────────────────────
507
+ # TAB A4 — LITERATURE GAP FINDER
508
+ # ─────────────────────────────────────────────
509
+
510
+ def a4_run(cancer_type: str, keyword: str):
511
+ today = datetime.date.today().isoformat()
512
+ keyword = keyword.strip()
513
+ if not keyword:
514
+ return None, "Please enter a keyword."
515
+
516
+ current_year = datetime.date.today().year
517
+ years = list(range(current_year - 9, current_year + 1))
518
+ counts = []
519
 
520
+ for yr in years:
521
+ q = f'"{keyword}" AND "{cancer_type}"[tiab] AND {yr}[pdat]'
522
+ n = pubmed_count(q)
523
+ counts.append(max(n, 0))
524
+
525
+ avg = np.mean([c for c in counts if c > 0]) if any(c > 0 for c in counts) else 0
526
+ gaps = [yr for yr, c in zip(years, counts) if c == 0]
527
+ low_years = [yr for yr, c in zip(years, counts) if 0 < c < avg * 0.3]
528
+
529
+ fig, ax = plt.subplots(figsize=(9, 4), facecolor="white")
530
+ bar_colors = []
531
+ for c in counts:
532
+ if c == 0:
533
+ bar_colors.append("#d73027")
534
+ elif c < avg * 0.3:
535
+ bar_colors.append("#fc8d59")
536
+ else:
537
+ bar_colors.append("#4393c3")
538
+
539
+ ax.bar(years, counts, color=bar_colors, edgecolor="white", linewidth=0.5)
540
+ ax.axhline(avg, color="#555", linestyle="--", linewidth=1, label=f"Avg: {avg:.1f}")
541
+ ax.set_xlabel("Year", fontsize=11)
542
+ ax.set_ylabel("PubMed Papers", fontsize=11)
543
+ ax.set_title(f'Literature Trend: "{keyword}" in {cancer_type}', fontsize=12)
544
+ ax.set_xticks(years)
545
+ ax.set_xticklabels([str(y) for y in years], rotation=45, ha="right")
546
+ ax.legend(fontsize=9)
547
+ ax.set_facecolor("white")
548
+ fig.tight_layout()
549
+
550
+ buf = io.BytesIO()
551
+ fig.savefig(buf, format="png", dpi=150, facecolor="white")
 
 
 
 
 
 
 
552
  buf.seek(0)
553
+ img = Image.open(buf)
554
+ plt.close(fig)
555
+
556
+ gap_text = []
557
+ if gaps:
558
+ gap_text.append(f"**Zero-publication years:** {', '.join(map(str, gaps))}")
559
+ if low_years:
560
+ gap_text.append(f"**Low-activity years (<30% avg):** {', '.join(map(str, low_years))}")
561
+ if not gaps and not low_years:
562
+ gap_text.append("No significant gaps detected in the last 10 years.")
563
+
564
+ summary = "\n\n".join(gap_text)
565
+ summary += f"\n\n*Source: PubMed E-utilities | Date: {today}*"
566
+ journal_log("A4-LitGap", f"cancer={cancer_type}, kw={keyword}", summary[:100])
567
+ return img, summary
568
+
569
+
570
+ # ─────────────────────────────────────────────
571
+ # TAB A5 — DRUGGABLE ORPHANS
572
+ # ─────────────────────────────────────────────
573
+
574
+ def a5_run(cancer_type: str):
575
+ today = datetime.date.today().isoformat()
576
+ efo = CANCER_EFO.get(cancer_type, "")
577
+
578
+ gql = """
579
+ query DruggableTargets($efoId: String!, $size: Int!) {
580
+ disease(efoId: $efoId) {
581
+ associatedTargets(page: {index: 0, size: $size}) {
582
+ rows {
583
+ target {
584
+ approvedSymbol
585
+ approvedName
586
+ tractability {
587
+ label
588
+ modality
589
+ value
590
+ }
591
+ knownDrugs {
592
+ count
593
+ }
594
+ }
595
+ score
596
+ }
597
+ }
598
+ }
599
+ }
600
+ """
601
+ ot_data = ot_query(gql, {"efoId": efo, "size": 50})
602
+ rows_ot = []
603
+ try:
604
+ rows_ot = ot_data["data"]["disease"]["associatedTargets"]["rows"]
605
+ except (KeyError, TypeError):
606
+ pass
607
+
608
+ if not rows_ot:
609
+ return None, f"⚠️ OpenTargets returned no data for {cancer_type}.\n\n*Source: OpenTargets | Date: {today}*"
610
+
611
+ orphan_candidates = []
612
+ for row in rows_ot:
613
+ t = row["target"]
614
+ gene = t["approvedSymbol"]
615
+ drug_count = 0
616
+ try:
617
+ drug_count = t["knownDrugs"]["count"] or 0
618
+ except (KeyError, TypeError):
619
+ drug_count = 0
620
+ if drug_count == 0:
621
+ orphan_candidates.append({"gene": gene, "name": t.get("approvedName", ""), "ot_score": row["score"]})
622
+
623
+ records = []
624
+ for cand in orphan_candidates[:15]:
625
+ gene = cand["gene"]
626
+ cached = cache_get("ct_orphan", f"{gene}_{cancer_type}")
627
+ if cached is not None:
628
+ trial_count = cached
629
+ else:
630
+ try:
631
+ r = requests.get(
632
+ f"{CT_BASE}/studies",
633
+ params={"query.term": f"{gene} {cancer_type}", "pageSize": 1, "format": "json"},
634
+ timeout=10
635
+ )
636
+ r.raise_for_status()
637
+ trial_count = r.json().get("totalCount", 0)
638
+ cache_set("ct_orphan", f"{gene}_{cancer_type}", trial_count)
639
+ except Exception:
640
+ trial_count = -1
641
+
642
+ records.append({
643
+ "Gene": gene,
644
+ "Name": cand["name"][:50],
645
+ "OT_Score": round(cand["ot_score"], 3),
646
+ "Known_Drugs": 0,
647
+ "Active_Trials": trial_count if trial_count >= 0 else "N/A",
648
+ "Status": "🔴 Orphan" if trial_count == 0 else ("⚠️ Trials only" if trial_count > 0 else "❓ Unknown")
649
+ })
650
+
651
+ df = pd.DataFrame(records)
652
+ note = (
653
+ f"*Source: OpenTargets GraphQL + ClinicalTrials.gov v2 | Date: {today}*\n\n"
654
+ f"*Orphan = no approved drug (OpenTargets knownDrugs.count = 0)*"
655
+ )
656
+ journal_log("A5-DruggableOrphans", f"cancer={cancer_type}", f"orphans={len(df)}")
657
+ return df, note
658
+
659
+
660
+ # ─────────────────────────────────────────────
661
+ # GROUP B — LEARNING SANDBOX
662
+ # ─────────────────────────────────────────────
663
+
664
+ SIMULATED_BANNER = (
665
+ "⚠️ **SIMULATED DATA** — This tab uses rule-based models and synthetic data "
666
+ "for educational purposes only. Results do NOT reflect real experimental outcomes."
667
+ )
668
+
669
+ # ── TAB B1 — miRNA Explorer ──────────────────
670
+
671
+ MIRNA_DB = {
672
+ "BRCA2": {
673
+ "miRNAs": ["miR-146a-5p", "miR-21-5p", "miR-155-5p", "miR-182-5p", "miR-205-5p"],
674
+ "binding_energy": [-18.4, -15.2, -12.7, -14.1, -16.8],
675
+ "seed_match": ["7mer-m8", "6mer", "7mer-A1", "8mer", "7mer-m8"],
676
+ "expression_change": [-2.1, +1.8, +2.3, -1.5, -3.2],
677
+ "cancer_context": "BRCA2 loss-of-function is associated with HR-deficient breast/ovarian cancer. "
678
+ "miR-146a-5p and miR-205-5p are frequently downregulated in BRCA2-mutant tumors.",
679
+ },
680
+ "BRCA1": {
681
+ "miRNAs": ["miR-17-5p", "miR-20a-5p", "miR-93-5p", "miR-182-5p", "miR-9-5p"],
682
+ "binding_energy": [-16.1, -13.5, -14.9, -15.3, -11.8],
683
+ "seed_match": ["8mer", "7mer-m8", "7mer-A1", "8mer", "6mer"],
684
+ "expression_change": [+1.9, +2.1, +1.6, -1.8, +2.4],
685
+ "cancer_context": "BRCA1 regulates DNA damage response. miR-17/20a cluster is upregulated "
686
+ "in BRCA1-deficient tumors and suppresses apoptosis.",
687
+ },
688
+ "TP53": {
689
+ "miRNAs": ["miR-34a-5p", "miR-125b-5p", "miR-504-5p", "miR-25-3p", "miR-30d-5p"],
690
+ "binding_energy": [-19.2, -14.6, -13.1, -12.4, -15.7],
691
+ "seed_match": ["8mer", "7mer-m8", "7mer-A1", "6mer", "8mer"],
692
+ "expression_change": [-3.5, +1.2, +1.7, +2.0, -1.3],
693
+ "cancer_context": "TP53 is the most mutated gene in cancer. miR-34a is a direct p53 transcriptional "
694
+ "target; its loss promotes tumor progression across cancer types.",
695
+ },
696
+ }
697
+
698
+ def b1_run(gene: str):
699
+ db = MIRNA_DB.get(gene, {})
700
+ if not db:
701
+ return None, "Gene not found in simulation database."
702
+
703
+ mirnas = db["miRNAs"]
704
+ energies = db["binding_energy"]
705
+ changes = db["expression_change"]
706
+ seeds = db["seed_match"]
707
+
708
+ fig, axes = plt.subplots(1, 2, figsize=(11, 4), facecolor="white")
709
+
710
+ colors_e = ["#d73027" if e < -16 else "#fc8d59" if e < -13 else "#4393c3" for e in energies]
711
+ axes[0].barh(mirnas, [-e for e in energies], color=colors_e, edgecolor="white")
712
+ axes[0].set_xlabel("Binding Energy (|kcal/mol|)", fontsize=10)
713
+ axes[0].set_title(f"Predicted Binding Energy\n{gene} miRNA targets", fontsize=10)
714
+ axes[0].set_facecolor("white")
715
+
716
+ colors_x = ["#d73027" if c < 0 else "#4393c3" for c in changes]
717
+ axes[1].barh(mirnas, changes, color=colors_x, edgecolor="white")
718
+ axes[1].axvline(0, color="black", linewidth=0.8)
719
+ axes[1].set_xlabel("Expression Change (log2FC)", fontsize=10)
720
+ axes[1].set_title(f"miRNA Expression in {gene}-mutant tumors\n(⚠️ SIMULATED)", fontsize=10)
721
+ axes[1].set_facecolor("white")
722
+
723
+ fig.tight_layout()
724
+ buf = io.BytesIO()
725
+ fig.savefig(buf, format="png", dpi=150, facecolor="white")
726
  buf.seek(0)
727
+ img = Image.open(buf)
728
+ plt.close(fig)
729
+
730
+ df = pd.DataFrame({
731
+ "miRNA": mirnas,
732
+ "Binding Energy (kcal/mol)": energies,
733
+ "Seed Match": seeds,
734
+ "Expression log2FC": changes,
735
+ })
736
+ context = f"\n\n**Cancer Context:** {db['cancer_context']}"
737
+ journal_log("B1-miRNA", f"gene={gene}", f"top_miRNA={mirnas[0]}")
738
+ return img, df.to_markdown(index=False) + context
739
+
740
+
741
+ # ── TAB B2 siRNA Targets ───────────────────
742
+
743
+ SIRNA_DB = {
744
+ "LUAD": {
745
+ "targets": ["KRAS G12C", "EGFR exon19del", "ALK fusion", "MET exon14", "RET fusion"],
746
+ "efficacy": [0.82, 0.91, 0.76, 0.68, 0.71],
747
+ "off_target_risk": ["Medium", "Low", "Low", "Medium", "Low"],
748
+ "delivery_challenge": ["High", "Medium", "Medium", "High", "Medium"],
749
+ },
750
+ "BRCA": {
751
+ "targets": ["BRCA1 exon11", "BRCA2 exon11", "PIK3CA H1047R", "AKT1 E17K", "ESR1 Y537S"],
752
+ "efficacy": [0.78, 0.85, 0.88, 0.72, 0.65],
753
+ "off_target_risk": ["Low", "Low", "Medium", "Low", "High"],
754
+ "delivery_challenge": ["Medium", "Medium", "Low", "Low", "High"],
755
+ },
756
+ "COAD": {
757
+ "targets": ["KRAS G12D", "APC truncation", "BRAF V600E", "SMAD4 loss", "PIK3CA E545K"],
758
+ "efficacy": [0.79, 0.61, 0.93, 0.55, 0.84],
759
+ "off_target_risk": ["Medium", "High", "Low", "Medium", "Low"],
760
+ "delivery_challenge": ["High", "High", "Low", "High", "Low"],
761
+ },
762
+ }
763
+
764
+ def b2_run(cancer: str):
765
+ db = SIRNA_DB.get(cancer, {})
766
+ if not db:
767
+ return None, "Cancer type not in simulation database."
768
+
769
+ targets = db["targets"]
770
+ efficacy = db["efficacy"]
771
+ off_risk = db["off_target_risk"]
772
+ delivery = db["delivery_challenge"]
773
+
774
+ fig, ax = plt.subplots(figsize=(8, 4), facecolor="white")
775
+ risk_color = {"Low": "#4393c3", "Medium": "#fc8d59", "High": "#d73027"}
776
+ colors = [risk_color.get(r, "#aaa") for r in off_risk]
777
+ ax.barh(targets, efficacy, color=colors, edgecolor="white")
778
+ ax.set_xlim(0, 1.1)
779
+ ax.set_xlabel("Predicted siRNA Efficacy (⚠️ SIMULATED)", fontsize=10)
780
+ ax.set_title(f"siRNA Target Efficacy — {cancer}", fontsize=11)
781
+ ax.set_facecolor("white")
782
+ from matplotlib.patches import Patch
783
+ legend_elements = [Patch(facecolor=v, label=k) for k, v in risk_color.items()]
784
+ ax.legend(handles=legend_elements, title="Off-target Risk", fontsize=8, loc="lower right")
785
+ fig.tight_layout()
786
+
787
+ buf = io.BytesIO()
788
+ fig.savefig(buf, format="png", dpi=150, facecolor="white")
789
  buf.seek(0)
790
+ img = Image.open(buf)
791
+ plt.close(fig)
792
+
793
+ df = pd.DataFrame({
794
+ "Target": targets,
795
+ "Efficacy": efficacy,
796
+ "Off-target Risk": off_risk,
797
+ "Delivery Challenge": delivery,
798
+ })
799
+ journal_log("B2-siRNA", f"cancer={cancer}", f"top={targets[0]}")
800
+ return img, df.to_markdown(index=False)
801
+
802
+
803
+ # ── TAB B3 — LNP Corona Simulator ───────────────
804
+
805
+ def b3_run(peg_mol_pct: float, ionizable_pct: float, helper_pct: float,
806
+ chol_pct: float, particle_size_nm: float, serum_pct: float):
807
+ total_lipid = peg_mol_pct + ionizable_pct + helper_pct + chol_pct
808
+ peg_norm = peg_mol_pct / max(total_lipid, 1)
809
+
810
+ corona_proteins = {
811
+ "ApoE": max(0, 0.35 - peg_norm * 0.8 + ionizable_pct * 0.01),
812
+ "ApoA-I": max(0, 0.20 - ionizable_pct * 0.005 + chol_pct * 0.003),
813
+ "Fibrinogen": max(0, 0.15 + (particle_size_nm - 100) * 0.001 - peg_norm * 0.3),
814
+ "Albumin": max(0, 0.10 + serum_pct * 0.002 - peg_norm * 0.2),
815
+ "Clusterin": max(0, 0.08 + peg_norm * 0.15),
816
+ "IgG": max(0, 0.07 + serum_pct * 0.001),
817
+ "Complement C3": max(0, 0.05 + ionizable_pct * 0.003 - peg_norm * 0.1),
818
  }
819
+ total = sum(corona_proteins.values())
820
+ if total > 0:
821
+ corona_proteins = {k: v / total for k, v in corona_proteins.items()}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
822
 
823
+ fig, axes = plt.subplots(1, 2, figsize=(11, 4), facecolor="white")
824
+
825
+ labels = list(corona_proteins.keys())
826
+ sizes = list(corona_proteins.values())
827
+ colors_pie = plt.cm.Set2(np.linspace(0, 1, len(labels)))
828
+ axes[0].pie(sizes, labels=labels, colors=colors_pie, autopct="%1.1f%%", startangle=90)
829
+ axes[0].set_title("Predicted Corona Composition\n(⚠️ SIMULATED)", fontsize=10)
830
+
831
+ axes[1].bar(labels, sizes, color=colors_pie, edgecolor="white")
832
+ axes[1].set_ylabel("Relative Abundance", fontsize=10)
833
+ axes[1].set_title("Corona Protein Fractions", fontsize=10)
834
+ axes[1].set_xticklabels(labels, rotation=45, ha="right", fontsize=8)
835
+ axes[1].set_facecolor("white")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
836
 
837
+ fig.tight_layout()
838
+ buf = io.BytesIO()
839
+ fig.savefig(buf, format="png", dpi=150, facecolor="white")
840
+ buf.seek(0)
841
+ img = Image.open(buf)
842
+ plt.close(fig)
843
 
844
+ apoe_pct = corona_proteins.get("ApoE", 0) * 100
845
+ interpretation = (
846
+ f"**ApoE fraction: {apoe_pct:.1f}%** "
847
+ + ("High ApoE → enhanced brain/liver targeting via LDLR pathway." if apoe_pct > 25
848
+ else "Low ApoE reduced receptor-mediated uptake.")
849
  )
850
+ journal_log("B3-LNPCorona", f"PEG={peg_mol_pct}%,size={particle_size_nm}nm", f"ApoE={apoe_pct:.1f}%")
851
+ return img, interpretation
852
+
853
+
854
+ # ── TAB B4 — Flow Corona (Vroman Kinetics) ──────
855
+
856
+ def b4_run(time_points: int, kon_albumin: float, kon_apoe: float,
857
+ koff_albumin: float, koff_apoe: float):
858
+ t = np.linspace(0, time_points, 500)
859
+
860
+ albumin = (kon_albumin / (kon_albumin + koff_albumin)) * (1 - np.exp(-(kon_albumin + koff_albumin) * t))
861
+ apoe_delay = np.maximum(0, t - 5)
862
+ apoe = (kon_apoe / (kon_apoe + koff_apoe)) * (1 - np.exp(-(kon_apoe + koff_apoe) * apoe_delay))
863
+ albumin_displaced = albumin * np.exp(-apoe * 2)
864
+ fibrinogen = 0.3 * (1 - np.exp(-0.05 * t)) * np.exp(-apoe * 1.5)
865
+
866
+ fig, ax = plt.subplots(figsize=(9, 4), facecolor="white")
867
+ ax.plot(t, albumin_displaced, label="Albumin (displaced)", color="#4393c3", linewidth=2)
868
+ ax.plot(t, apoe, label="ApoE (hard corona)", color="#d73027", linewidth=2)
869
+ ax.plot(t, fibrinogen, label="Fibrinogen", color="#fc8d59", linewidth=2, linestyle="--")
870
+ ax.set_xlabel("Time (min)", fontsize=11)
871
+ ax.set_ylabel("Surface Coverage (a.u.)", fontsize=11)
872
+ ax.set_title("Vroman Effect — Competitive Protein Adsorption\n(⚠️ SIMULATED)", fontsize=11)
873
+ ax.legend(fontsize=9)
874
+ ax.set_facecolor("white")
875
+ fig.tight_layout()
876
+
877
+ buf = io.BytesIO()
878
+ fig.savefig(buf, format="png", dpi=150, facecolor="white")
879
+ buf.seek(0)
880
+ img = Image.open(buf)
881
+ plt.close(fig)
882
+
883
+ vroman_time = t[np.argmax(albumin_displaced > apoe * 0.9)] if any(albumin_displaced > apoe * 0.9) else "N/A"
884
+ note = (
885
+ f"**Vroman crossover** (albumin → ApoE dominance): ~{vroman_time:.1f} min\n\n"
886
+ "The Vroman effect describes sequential protein displacement: "
887
+ "abundant proteins (albumin) adsorb first, then are displaced by higher-affinity proteins (ApoE, fibrinogen)."
888
+ )
889
+ journal_log("B4-FlowCorona", f"kon_alb={kon_albumin},kon_apoe={kon_apoe}", note[:80])
890
+ return img, note
891
+
892
 
893
+ # ── TAB B5 — Variant Concepts ───────────────────
894
+
895
+ VARIANT_RULES = {
896
+ "Pathogenic": {
897
+ "criteria": ["Nonsense mutation in tumor suppressor", "Frameshift in BRCA1/2",
898
+ "Splice site ±1/2 in essential gene", "Known hotspot (e.g. TP53 R175H)"],
899
+ "acmg_codes": ["PVS1", "PS1", "PS2", "PM2"],
900
+ "explanation": "Strong evidence of pathogenicity. Likely disrupts protein function via LOF or dominant-negative mechanism.",
901
+ },
902
+ "Likely Pathogenic": {
903
+ "criteria": ["Missense in functional domain", "In silico tools predict damaging",
904
+ "Low population frequency (<0.01%)", "Segregates with disease"],
905
+ "acmg_codes": ["PM1", "PM2", "PP2", "PP3"],
906
+ "explanation": "Moderate-strong evidence. Functional studies or segregation data would upgrade to Pathogenic.",
907
+ },
908
+ "VUS": {
909
+ "criteria": ["Missense with conflicting evidence", "Moderate population frequency",
910
+ "Uncertain functional impact", "Limited segregation data"],
911
+ "acmg_codes": ["PM2", "BP4", "BP6"],
912
+ "explanation": "Variant of Uncertain Significance. Insufficient evidence to classify. Functional assays recommended.",
913
+ },
914
+ "Likely Benign": {
915
+ "criteria": ["Common in population (>1%)", "Synonymous with no splicing impact",
916
+ "Observed in healthy controls", "Computational tools predict benign"],
917
+ "acmg_codes": ["BS1", "BP1", "BP4", "BP7"],
918
+ "explanation": "Evidence suggests benign. Unlikely to cause disease but not fully excluded.",
919
+ },
920
+ "Benign": {
921
+ "criteria": ["High population frequency (>5%)", "No disease association in large studies",
922
+ "Synonymous, no functional impact", "Functional studies show no effect"],
923
+ "acmg_codes": ["BA1", "BS1", "BS2", "BS3"],
924
+ "explanation": "Strong evidence of benign nature. Not expected to contribute to disease.",
925
+ },
926
+ }
927
+
928
+ def b5_run(classification: str):
929
+ data = VARIANT_RULES.get(classification, {})
930
+ if not data:
931
+ return "Classification not found."
932
+
933
+ criteria_md = "\n".join([f"- {c}" for c in data["criteria"]])
934
+ acmg_md = " | ".join([f"`{code}`" for code in data["acmg_codes"]])
935
+ output = (
936
+ f"## {classification}\n\n"
937
+ f"**ACMG/AMP Codes:** {acmg_md}\n\n"
938
+ f"**Typical Criteria:**\n{criteria_md}\n\n"
939
+ f"**Interpretation:** {data['explanation']}\n\n"
940
+ f"> ⚠️ SIMULATED — This is a rule-based educational model only. "
941
+ f"Real variant classification requires expert review and full ACMG/AMP criteria evaluation."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
942
  )
943
+ journal_log("B5-VariantConcepts", f"class={classification}", output[:100])
944
+ return output
945
+
946
+
947
+ # ─────────────────────────────────────────────
948
+ # GRADIO UI ASSEMBLY
949
+ # ─────────────────────────────────────────────
950
+
951
+ CUSTOM_CSS = """
952
+ body { font-family: 'Inter', sans-serif; }
953
+ .simulated-banner {
954
+ background: #fff3cd; border: 1px solid #ffc107;
955
+ border-radius: 6px; padding: 10px 14px;
956
+ font-weight: 600; color: #856404; margin-bottom: 8px;
957
+ }
958
+ .source-note { color: #6c757d; font-size: 0.85em; margin-top: 6px; }
959
+ .gap-card {
960
+ background: #f8f9fa; border-left: 4px solid #d73027;
961
+ padding: 10px 14px; margin: 6px 0; border-radius: 4px;
962
+ }
963
+ footer { display: none !important; }
964
+ """
965
+
966
+
967
+ def build_app():
968
+ with gr.Blocks(css=CUSTOM_CSS, title="K R&D Lab — Cancer Research Suite") as demo:
969
+ gr.Markdown(
970
+ "# 🔬 K R&D Lab — Cancer Research Suite\n"
971
+ "**Author:** Oksana Kolisnyk | [kosatiks-group.pp.ua](https://kosatiks-group.pp.ua) \n"
972
+ "**Repo:** [github.com/TEZv/K-RnD-Lab-PHYLO-03_2026](https://github.com/TEZv/K-RnD-Lab-PHYLO-03_2026)"
973
+ )
974
+
975
+ with gr.Row():
976
+ with gr.Column(scale=4):
977
+ with gr.Tabs():
978
+
979
+ # ════════════════════════════════
980
+ # GROUP A — REAL DATA TOOLS
981
+ # ════════════════════════════════
982
+ with gr.Tab("🔬 Real Data Tools"):
983
+ with gr.Tabs():
984
+
985
+ with gr.Tab("🔍 Gray Zones Explorer"):
986
+ gr.Markdown(
987
+ "Identify underexplored biological processes in a cancer type "
988
+ "using live PubMed + OpenTargets data."
989
+ )
990
+ a1_cancer = gr.Dropdown(CANCER_TYPES, label="Cancer Type", value="GBM")
991
+ a1_btn = gr.Button("🔍 Explore Gray Zones", variant="primary")
992
+ a1_heatmap = gr.Image(label="Research Coverage Heatmap", type="pil")
993
+ a1_gaps = gr.Markdown(label="Top 5 Research Gaps")
994
+ with gr.Accordion("📖 Learning Mode", open=False):
995
+ gr.Markdown(
996
+ "**What is a research gray zone?**\n\n"
997
+ "A gray zone is a biological process that is well-studied in other cancers "
998
+ "but has very few publications in your selected cancer type. "
999
+ "Low paper counts (red/white cells) indicate potential unexplored territory.\n\n"
1000
+ "**How to use:** Select a rare cancer (e.g. DIPG, MCC) to find the most "
1001
+ "underexplored processes. Cross-reference with Tab A2 to find targetable genes."
1002
+ )
1003
+ a1_btn.click(a1_run, inputs=[a1_cancer], outputs=[a1_heatmap, a1_gaps])
1004
+
1005
+ with gr.Tab("🎯 Understudied Target Finder"):
1006
+ gr.Markdown(
1007
+ "Find essential genes with high research gap index "
1008
+ "(high essentiality, low publication coverage)."
1009
+ )
1010
+ gr.Markdown(
1011
+ "> ⚠️ **Essentiality scores are placeholder estimates** from a "
1012
+ "curated reference gene set — **not real DepMap data**. "
1013
+ "Association scores and paper/trial counts are fetched live. "
1014
+ "For real essentiality values, download `CRISPR_gene_effect.csv` "
1015
+ "from [depmap.org](https://depmap.org/portal/download/all/) and "
1016
+ "replace `_load_depmap_sample()` in `app.py`."
1017
+ )
1018
+ a2_cancer = gr.Dropdown(CANCER_TYPES, label="Cancer Type", value="GBM")
1019
+ a2_btn = gr.Button("🎯 Find Understudied Targets", variant="primary")
1020
+ a2_table = gr.Dataframe(label="Target Gap Table", wrap=True)
1021
+ a2_note = gr.Markdown()
1022
+ with gr.Accordion("📖 Learning Mode", open=False):
1023
+ gr.Markdown(
1024
+ "**Gap Index formula:** `essentiality / log(papers + 1)`\n\n"
1025
+ "- **Essentiality**: inverted DepMap CRISPR gene effect score\n"
1026
+ "- **Papers**: PubMed count for gene + cancer type\n"
1027
+ "- **High Gap Index** = essential gene with few publications = high research opportunity"
1028
+ )
1029
+ a2_btn.click(a2_run, inputs=[a2_cancer], outputs=[a2_table, a2_note])
1030
+
1031
+ with gr.Tab("🧬 Real Variant Lookup"):
1032
+ gr.Markdown(
1033
+ "Look up a variant in **ClinVar** and **gnomAD**. "
1034
+ "Results are fetched live — never hallucinated."
1035
+ )
1036
+ a3_hgvs = gr.Textbox(
1037
+ label="HGVS Notation",
1038
+ placeholder="e.g. NM_007294.4:c.5266dupC or NM_000546.6:c.524G>A",
1039
+ lines=1
1040
+ )
1041
+ a3_btn = gr.Button("🔎 Look Up Variant", variant="primary")
1042
+ a3_result = gr.Markdown()
1043
+ with gr.Accordion("📖 Learning Mode", open=False):
1044
+ gr.Markdown(
1045
+ "**HGVS notation format:**\n"
1046
+ "- `NM_XXXXXX.X:c.NNNN[change]` — coding DNA reference\n"
1047
+ "- `NC_XXXXXX.X:g.NNNN[change]` — genomic reference\n\n"
1048
+ "**Important:** If a variant is not found, this tool returns "
1049
+ "'Not in database. Do not interpret.' — never a fabricated result."
1050
+ )
1051
+ a3_btn.click(a3_run, inputs=[a3_hgvs], outputs=[a3_result])
1052
+
1053
+ with gr.Tab("📰 Literature Gap Finder"):
1054
+ gr.Markdown(
1055
+ "Visualize publication trends over 10 years and detect "
1056
+ "years with low research activity."
1057
+ )
1058
+ with gr.Row():
1059
+ a4_cancer = gr.Dropdown(CANCER_TYPES, label="Cancer Type", value="GBM")
1060
+ a4_kw = gr.Textbox(label="Keyword", placeholder="e.g. ferroptosis", lines=1)
1061
+ a4_btn = gr.Button("📊 Analyze Literature Trend", variant="primary")
1062
+ a4_chart = gr.Image(label="Papers per Year", type="pil")
1063
+ a4_gaps = gr.Markdown()
1064
+ with gr.Accordion("📖 Learning Mode", open=False):
1065
+ gr.Markdown(
1066
+ "**How to read the chart:**\n"
1067
+ "- 🔵 Blue bars = normal activity\n"
1068
+ "- 🟠 Orange bars = low activity (<30% of average)\n"
1069
+ "- 🔴 Red bars = zero publications (true gap)"
1070
+ )
1071
+ a4_btn.click(a4_run, inputs=[a4_cancer, a4_kw], outputs=[a4_chart, a4_gaps])
1072
+
1073
+ with gr.Tab("💊 Druggable Orphans"):
1074
+ gr.Markdown(
1075
+ "Identify cancer-associated essential genes with **no approved drug** "
1076
+ "and **no active clinical trial**."
1077
+ )
1078
+ a5_cancer = gr.Dropdown(CANCER_TYPES, label="Cancer Type", value="GBM")
1079
+ a5_btn = gr.Button("💊 Find Druggable Orphans", variant="primary")
1080
+ a5_table = gr.Dataframe(label="Orphan Target Table", wrap=True)
1081
+ a5_note = gr.Markdown()
1082
+ with gr.Accordion("📖 Learning Mode", open=False):
1083
+ gr.Markdown(
1084
+ "**What is a druggable orphan?**\n\n"
1085
+ "A gene that is strongly associated with a cancer but has no approved drug "
1086
+ "and no active clinical trial. These represent the highest-opportunity "
1087
+ "targets for drug discovery."
1088
+ )
1089
+ a5_btn.click(a5_run, inputs=[a5_cancer], outputs=[a5_table, a5_note])
1090
+
1091
+ with gr.Tab("🤖 Research Assistant"):
1092
+ gr.Markdown(
1093
+ "**RAG-powered research assistant** indexed on 20 curated papers "
1094
+ "on LNP delivery, protein corona, and cancer variants.\n\n"
1095
+ "*Powered by sentence-transformers + FAISS — no API key required.*"
1096
+ )
1097
+ try:
1098
+ from chatbot import build_chatbot_tab
1099
+ build_chatbot_tab()
1100
+ except ImportError:
1101
+ gr.Markdown(
1102
+ "⚠️ `chatbot.py` not found. Please ensure it is in the same directory as `app.py`."
1103
+ )
1104
+
1105
+ # ════════════════════════════════
1106
+ # GROUP B — LEARNING SANDBOX
1107
+ # ════════════════════════════════
1108
+ with gr.Tab("📚 Learning Sandbox"):
1109
+ gr.Markdown(
1110
+ "> ⚠️ **ALL TABS IN THIS GROUP USE SIMULATED DATA** — "
1111
+ "For educational purposes only. Results do not reflect real experiments."
1112
+ )
1113
+ with gr.Tabs():
1114
+
1115
+ with gr.Tab("🧬 miRNA Explorer"):
1116
+ gr.Markdown(SIMULATED_BANNER)
1117
+ b1_gene = gr.Dropdown(["BRCA2", "BRCA1", "TP53"], label="Gene", value="TP53")
1118
+ b1_btn = gr.Button("🔬 Explore miRNA Interactions", variant="primary")
1119
+ b1_plot = gr.Image(label="miRNA Binding & Expression (⚠️ SIMULATED)", type="pil")
1120
+ b1_table = gr.Markdown()
1121
+ with gr.Accordion("📖 Learning Mode", open=False):
1122
+ gr.Markdown(
1123
+ "**miRNA biology basics:**\n\n"
1124
+ "- miRNAs are ~22 nt non-coding RNAs that bind 3'UTR of mRNAs\n"
1125
+ "- Seed match types: 8mer > 7mer-m8 > 7mer-A1 > 6mer (binding strength)\n"
1126
+ "- Negative binding energy = stronger predicted interaction"
1127
+ )
1128
+ b1_btn.click(b1_run, inputs=[b1_gene], outputs=[b1_plot, b1_table])
1129
+
1130
+ with gr.Tab("🎯 siRNA Targets"):
1131
+ gr.Markdown(SIMULATED_BANNER)
1132
+ b2_cancer = gr.Dropdown(["LUAD", "BRCA", "COAD"], label="Cancer Type", value="LUAD")
1133
+ b2_btn = gr.Button("🎯 Simulate siRNA Efficacy", variant="primary")
1134
+ b2_plot = gr.Image(label="siRNA Efficacy (⚠️ SIMULATED)", type="pil")
1135
+ b2_table = gr.Markdown()
1136
+ with gr.Accordion("📖 Learning Mode", open=False):
1137
+ gr.Markdown(
1138
+ "**siRNA design principles:**\n\n"
1139
+ "- siRNAs are 21-23 nt dsRNA that trigger RISC-mediated mRNA cleavage\n"
1140
+ "- Off-target risk: seed region complementarity to unintended mRNAs\n"
1141
+ "- Delivery challenge: endosomal escape, serum stability, tumor penetration"
1142
+ )
1143
+ b2_btn.click(b2_run, inputs=[b2_cancer], outputs=[b2_plot, b2_table])
1144
+
1145
+ with gr.Tab("🧪 LNP Corona"):
1146
+ gr.Markdown(SIMULATED_BANNER)
1147
+ with gr.Row():
1148
+ b3_peg = gr.Slider(0.5, 5.0, value=1.5, step=0.1, label="PEG mol% (lipid)")
1149
+ b3_ion = gr.Slider(10, 60, value=50, step=1, label="Ionizable lipid mol%")
1150
+ with gr.Row():
1151
+ b3_helper = gr.Slider(5, 30, value=10, step=1, label="Helper lipid mol%")
1152
+ b3_chol = gr.Slider(10, 50, value=38, step=1, label="Cholesterol mol%")
1153
+ with gr.Row():
1154
+ b3_size = gr.Slider(50, 300, value=100, step=5, label="Particle size (nm)")
1155
+ b3_serum = gr.Slider(0, 100, value=10, step=5, label="Serum % in medium")
1156
+ b3_btn = gr.Button("🧪 Simulate Corona", variant="primary")
1157
+ b3_plot = gr.Image(label="Corona Composition (⚠️ SIMULATED)", type="pil")
1158
+ b3_interp = gr.Markdown()
1159
+ with gr.Accordion("📖 Learning Mode", open=False):
1160
+ gr.Markdown(
1161
+ "**Protein corona basics:**\n\n"
1162
+ "- Hard corona: tightly bound, long-lived proteins (ApoE, fibrinogen)\n"
1163
+ "- Soft corona: loosely bound, rapidly exchanging proteins (albumin)\n"
1164
+ "- ApoE enrichment → enhanced brain targeting via LDLR/LRP1 receptors\n"
1165
+ "- PEG reduces corona formation"
1166
+ )
1167
+ b3_btn.click(
1168
+ b3_run,
1169
+ inputs=[b3_peg, b3_ion, b3_helper, b3_chol, b3_size, b3_serum],
1170
+ outputs=[b3_plot, b3_interp]
1171
+ )
1172
+
1173
+ with gr.Tab("🌊 Flow Corona"):
1174
+ gr.Markdown(SIMULATED_BANNER)
1175
+ with gr.Row():
1176
+ b4_time = gr.Slider(10, 120, value=60, step=5, label="Time range (min)")
1177
+ b4_kon_alb = gr.Slider(0.01, 1.0, value=0.3, step=0.01, label="kon Albumin")
1178
+ with gr.Row():
1179
+ b4_kon_apoe = gr.Slider(0.001, 0.5, value=0.05, step=0.001, label="kon ApoE")
1180
+ b4_koff_alb = gr.Slider(0.01, 1.0, value=0.2, step=0.01, label="koff Albumin")
1181
+ b4_koff_apoe = gr.Slider(0.001, 0.1, value=0.01, step=0.001, label="koff ApoE")
1182
+ b4_btn = gr.Button("🌊 Simulate Vroman Kinetics", variant="primary")
1183
+ b4_plot = gr.Image(label="Vroman Effect (⚠️ SIMULATED)", type="pil")
1184
+ b4_note = gr.Markdown()
1185
+ with gr.Accordion("📖 Learning Mode", open=False):
1186
+ gr.Markdown(
1187
+ "**The Vroman Effect:** Proteins with high abundance but low affinity "
1188
+ "(albumin) adsorb first, then are displaced by lower-abundance but "
1189
+ "higher-affinity proteins (fibrinogen, ApoE).\n\n"
1190
+ "**Clinical implication:** The final hard corona (not initial) determines "
1191
+ "nanoparticle fate in vivo."
1192
+ )
1193
+ b4_btn.click(
1194
+ b4_run,
1195
+ inputs=[b4_time, b4_kon_alb, b4_kon_apoe, b4_koff_alb, b4_koff_apoe],
1196
+ outputs=[b4_plot, b4_note]
1197
+ )
1198
+
1199
+ with gr.Tab("🔬 Variant Concepts"):
1200
+ gr.Markdown(SIMULATED_BANNER)
1201
+ b5_class = gr.Dropdown(
1202
+ list(VARIANT_RULES.keys()),
1203
+ label="ACMG Classification",
1204
+ value="VUS"
1205
+ )
1206
+ b5_btn = gr.Button("📋 Explain Classification", variant="primary")
1207
+ b5_result = gr.Markdown()
1208
+ with gr.Accordion("📖 Learning Mode", open=False):
1209
+ gr.Markdown(
1210
+ "**ACMG/AMP 2015 Classification Framework:**\n\n"
1211
+ "1. **Pathogenic** — strong evidence of disease causation\n"
1212
+ "2. **Likely Pathogenic** — >90% probability pathogenic\n"
1213
+ "3. **VUS** — uncertain significance\n"
1214
+ "4. **Likely Benign** — >90% probability benign\n"
1215
+ "5. **Benign** — strong evidence of no disease effect"
1216
+ )
1217
+ b5_btn.click(b5_run, inputs=[b5_class], outputs=[b5_result])
1218
+
1219
+ # ── SIDEBAR ──
1220
+ with gr.Column(scale=1, min_width=260):
1221
+ gr.Markdown("## 📓 Lab Journal")
1222
+ note_input = gr.Textbox(label="Add note", placeholder="Your observation...", lines=2)
1223
+ save_btn = gr.Button("💾 Save Note", size="sm")
1224
+ refresh_btn = gr.Button("🔄 Refresh Journal", size="sm")
1225
+ journal_display = gr.Markdown(value="*Click Refresh to load entries.*")
1226
+
1227
+ def save_note(note):
1228
+ if note.strip():
1229
+ journal_log("Manual", "note", note.strip(), note.strip())
1230
+ return journal_read()
1231
+
1232
+ save_btn.click(save_note, inputs=[note_input], outputs=[journal_display])
1233
+ refresh_btn.click(lambda: journal_read(), outputs=[journal_display])
1234
+
1235
+ gr.Markdown(
1236
+ "---\n"
1237
+ "*K R&D Lab Cancer Research Suite · "
1238
+ "All real-data tabs use live APIs with 24h caching · "
1239
+ "Simulated tabs are clearly labeled ⚠️ SIMULATED · "
1240
+ "Source attribution shown on every result*"
1241
+ )
1242
+
1243
+ return demo
1244
+
1245
 
1246
+ # ── LAUNCH — must be outside if __name__ for HuggingFace Spaces ──
1247
+ app = build_app()
1248
+ app.launch(server_name="0.0.0.0", server_port=7860)