TEZv commited on
Commit
8a4d1eb
·
verified ·
1 Parent(s): ea18311

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +198 -371
app.py CHANGED
@@ -1,7 +1,7 @@
1
  import gradio as gr
2
  import pandas as pd
3
  import numpy as np
4
- import json, re, csv, os
5
  import matplotlib
6
  matplotlib.use("Agg")
7
  import matplotlib.pyplot as plt
@@ -16,179 +16,125 @@ ACC = "#f97316"
16
  ACC2 = "#38bdf8"
17
  TXT = "#f1f5f9"
18
 
19
- # ── Logging ──────────────────────────────────────────────────────────────────
20
  LOG_PATH = Path("./lab_journal.csv")
21
 
22
  def log_entry(tab, inputs, result, note=""):
23
  write_header = not LOG_PATH.exists()
24
  with open(LOG_PATH, "a", newline="", encoding="utf-8") as f:
25
- w = csv.DictWriter(f,
26
- fieldnames=["timestamp","tab","inputs","result","note"])
27
  if write_header:
28
  w.writeheader()
29
  w.writerow({
30
  "timestamp": datetime.now().strftime("%Y-%m-%d %H:%M"),
31
- "tab": tab,
32
- "inputs": str(inputs),
33
- "result": str(result)[:200],
34
- "note": note
35
  })
36
 
37
  def load_journal():
38
  if not LOG_PATH.exists():
39
- return pd.DataFrame(columns=
40
- ["timestamp","tab","inputs","result","note"])
41
  return pd.read_csv(LOG_PATH)
42
 
43
  def save_note(note, tab, last_result):
44
  log_entry(tab, "", last_result, note)
45
  return "✅ Saved!", load_journal()
46
 
47
- # ── All original DB dicts (unchanged) ────────────────────────────────────────
48
  MIRNA_DB = {
49
  "BRCA2": [
50
- {"miRNA":"hsa-miR-148a-3p","log2FC":-0.70,"padj":0.013,
51
- "targets":"DNMT1, AKT2","pathway":"Epigenetic reprogramming"},
52
- {"miRNA":"hsa-miR-30e-5p","log2FC":-0.49,"padj":0.032,
53
- "targets":"MYC, KRAS","pathway":"Oncogene suppression"},
54
- {"miRNA":"hsa-miR-551b-3p","log2FC":-0.59,"padj":0.048,
55
- "targets":"SMAD4, CDK6","pathway":"TGF-beta / CDK4/6"},
56
- {"miRNA":"hsa-miR-22-3p","log2FC":-0.43,"padj":0.041,
57
- "targets":"HIF1A, PTEN","pathway":"Hypoxia / PI3K"},
58
- {"miRNA":"hsa-miR-200c-3p","log2FC":-0.38,"padj":0.044,
59
- "targets":"ZEB1, ZEB2","pathway":"EMT suppression"},
60
  ],
61
  "BRCA1": [
62
- {"miRNA":"hsa-miR-155-5p","log2FC":-0.81,"padj":0.008,
63
- "targets":"SHIP1, SOCS1","pathway":"Immune evasion"},
64
- {"miRNA":"hsa-miR-146a-5p","log2FC":-0.65,"padj":0.019,
65
- "targets":"TRAF6, IRAK1","pathway":"NF-kB signalling"},
66
- {"miRNA":"hsa-miR-21-5p","log2FC":-0.55,"padj":0.027,
67
- "targets":"PTEN, PDCD4","pathway":"Apoptosis"},
68
- {"miRNA":"hsa-miR-17-5p","log2FC":-0.47,"padj":0.036,
69
- "targets":"RB1, E2F1","pathway":"Cell cycle"},
70
- {"miRNA":"hsa-miR-34a-5p","log2FC":-0.41,"padj":0.049,
71
- "targets":"BCL2, CDK6","pathway":"p53 axis"},
72
  ],
73
  "TP53": [
74
- {"miRNA":"hsa-miR-34a-5p","log2FC":-1.10,"padj":0.001,
75
- "targets":"BCL2, CDK6","pathway":"p53-miR-34 axis"},
76
- {"miRNA":"hsa-miR-192-5p","log2FC":-0.90,"padj":0.005,
77
- "targets":"MDM2, DHFR","pathway":"p53 feedback"},
78
- {"miRNA":"hsa-miR-145-5p","log2FC":-0.75,"padj":0.012,
79
- "targets":"MYC, EGFR","pathway":"Growth suppression"},
80
- {"miRNA":"hsa-miR-107","log2FC":-0.62,"padj":0.023,
81
- "targets":"CDK6, HIF1B","pathway":"Hypoxia / cell cycle"},
82
- {"miRNA":"hsa-miR-215-5p","log2FC":-0.51,"padj":0.038,
83
- "targets":"DTL, DHFR","pathway":"DNA damage response"},
84
  ],
85
  }
86
 
87
  SIRNA_DB = {
88
- "LUAD":[
89
- {"Gene":"SPC24","dCERES":-0.175,"log2FC":1.13,
90
- "Drug_status":"Novel","siRNA":"GCAGCUGAAGAAACUGAAU"},
91
- {"Gene":"BUB1B","dCERES":-0.119,"log2FC":1.12,
92
- "Drug_status":"Novel","siRNA":"CCAAAGAGCUGAAGAACAU"},
93
- {"Gene":"CDC45","dCERES":-0.144,"log2FC":1.26,
94
- "Drug_status":"Novel","siRNA":"GCAUCAAGAUGAAGGAGAU"},
95
- {"Gene":"PLK1","dCERES":-0.239,"log2FC":1.03,
96
- "Drug_status":"Clinical","siRNA":"GACGCUCAAGAUGCAGAUU"},
97
- {"Gene":"CDK1","dCERES":-0.201,"log2FC":1.00,
98
- "Drug_status":"Clinical","siRNA":"GCAGAAGCACUGAAGAUUU"},
99
  ],
100
- "BRCA":[
101
- {"Gene":"AURKA","dCERES":-0.165,"log2FC":1.20,
102
- "Drug_status":"Clinical","siRNA":"GCACUGAAGAUGCAGAAUU"},
103
- {"Gene":"AURKB","dCERES":-0.140,"log2FC":1.15,
104
- "Drug_status":"Clinical","siRNA":"CCUGAAGACGCUCAAGGUU"},
105
- {"Gene":"CENPW","dCERES":-0.125,"log2FC":0.95,
106
- "Drug_status":"Novel","siRNA":"GCAGAAGCACUGAAGAUUU"},
107
- {"Gene":"RFC2","dCERES":-0.136,"log2FC":0.50,
108
- "Drug_status":"Novel","siRNA":"GCAAGAUGCAGAAGCACUU"},
109
- {"Gene":"TYMS","dCERES":-0.131,"log2FC":0.72,
110
- "Drug_status":"Approved","siRNA":"GGACGCUCAAGAUGCAGAU"},
111
  ],
112
- "COAD":[
113
- {"Gene":"KRAS","dCERES":-0.210,"log2FC":0.80,
114
- "Drug_status":"Clinical","siRNA":"GCUGGAGCUGGUGGUAGUU"},
115
- {"Gene":"WEE1","dCERES":-0.180,"log2FC":1.05,
116
- "Drug_status":"Clinical","siRNA":"GCAGCUGAAGAAACUGAAU"},
117
- {"Gene":"CHEK1","dCERES":-0.155,"log2FC":0.90,
118
- "Drug_status":"Clinical","siRNA":"CCAAAGAGCUGAAGAACAU"},
119
- {"Gene":"RFC2","dCERES":-0.130,"log2FC":0.55,
120
- "Drug_status":"Novel","siRNA":"GCAUCAAGAUGAAGGAGAU"},
121
- {"Gene":"PKMYT1","dCERES":-0.122,"log2FC":1.07,
122
- "Drug_status":"Clinical","siRNA":"GACGCUCAAGAUGCAGAUU"},
123
  ],
124
  }
125
 
126
  CERNA = [
127
- {"lncRNA":"CYTOR","miRNA":"hsa-miR-138-5p",
128
- "target":"AKT1","pathway":"TREM2 core signaling"},
129
- {"lncRNA":"CYTOR","miRNA":"hsa-miR-138-5p",
130
- "target":"NFKB1","pathway":"Neuroinflammation"},
131
- {"lncRNA":"GAS5","miRNA":"hsa-miR-21-5p",
132
- "target":"PTEN","pathway":"Neuroinflammation"},
133
- {"lncRNA":"GAS5","miRNA":"hsa-miR-222-3p",
134
- "target":"IL1B","pathway":"Neuroinflammation"},
135
- {"lncRNA":"HOTAIRM1","miRNA":"hsa-miR-9-5p",
136
- "target":"TREM2","pathway":"Direct TREM2 regulation"},
137
  ]
138
  ASO = [
139
- {"lncRNA":"GAS5","position":119,"accessibility":0.653,
140
- "GC_pct":50,"Tm":47.2,"priority":"HIGH"},
141
- {"lncRNA":"CYTOR","position":507,"accessibility":0.653,
142
- "GC_pct":50,"Tm":46.8,"priority":"HIGH"},
143
- {"lncRNA":"HOTAIRM1","position":234,"accessibility":0.621,
144
- "GC_pct":44,"Tm":44.1,"priority":"MEDIUM"},
145
- {"lncRNA":"LINC00847","position":89,"accessibility":0.598,
146
- "GC_pct":56,"Tm":48.3,"priority":"MEDIUM"},
147
- {"lncRNA":"ZFAS1","position":312,"accessibility":0.571,
148
- "GC_pct":48,"Tm":45.5,"priority":"MEDIUM"},
149
  ]
150
 
151
  FGFR3 = {
152
- "P1 (hairpin loop)":[
153
- {"Compound":"CHEMBL1575701","RNA_score":0.809,
154
- "Toxicity":0.01,"Final_score":0.793},
155
- {"Compound":"CHEMBL15727","RNA_score":0.805,
156
- "Toxicity":0.00,"Final_score":0.789},
157
- {"Compound":"Thioguanine","RNA_score":0.888,
158
- "Toxicity":32.5,"Final_score":0.742},
159
- {"Compound":"Deazaguanine","RNA_score":0.888,
160
- "Toxicity":35.0,"Final_score":0.735},
161
- {"Compound":"CHEMBL441","RNA_score":0.775,
162
- "Toxicity":5.2,"Final_score":0.721},
163
  ],
164
- "P10 (G-quadruplex)":[
165
- {"Compound":"CHEMBL15727","RNA_score":0.805,
166
- "Toxicity":0.00,"Final_score":0.789},
167
- {"Compound":"CHEMBL5411515","RNA_score":0.945,
168
- "Toxicity":37.1,"Final_score":0.761},
169
- {"Compound":"CHEMBL90","RNA_score":0.760,
170
- "Toxicity":2.1,"Final_score":0.745},
171
- {"Compound":"CHEMBL102","RNA_score":0.748,
172
- "Toxicity":8.4,"Final_score":0.712},
173
- {"Compound":"Berberine","RNA_score":0.735,
174
- "Toxicity":3.2,"Final_score":0.708},
175
  ],
176
  }
177
 
178
  VARIANT_DB = {
179
- "BRCA1:p.R1699Q":{"score":0.03,"cls":"Benign","conf":"High"},
180
- "BRCA1:p.R1699W":{"score":0.97,"cls":"Pathogenic","conf":"High"},
181
- "BRCA2:p.D2723A":{"score":0.999,"cls":"Pathogenic","conf":"High"},
182
- "TP53:p.R248W": {"score":0.998,"cls":"Pathogenic","conf":"High"},
183
- "TP53:p.R248Q": {"score":0.995,"cls":"Pathogenic","conf":"High"},
184
- "EGFR:p.L858R": {"score":0.96,"cls":"Pathogenic","conf":"High"},
185
- "ALK:p.F1174L": {"score":0.94,"cls":"Pathogenic","conf":"High"},
186
  }
187
  PLAIN = {
188
- "Pathogenic":"This variant is likely to cause disease. Clinical follow-up is strongly recommended.",
189
  "Likely Pathogenic":"This variant is probably harmful. Discuss with your doctor.",
190
- "Benign":"This variant is likely harmless. Common in the general population.",
191
- "Likely Benign":"This variant is probably harmless. No strong reason for concern.",
192
  }
193
  BM_W = {
194
  "CTHRC1":0.18,"FHL2":0.15,"LDHA":0.14,"P4HA1":0.13,
@@ -199,17 +145,14 @@ PROTEINS = ["albumin","apolipoprotein","fibrinogen","vitronectin",
199
  "clusterin","igm","iga","igg","complement","transferrin",
200
  "alpha-2-macroglobulin"]
201
 
202
- # ── Core functions (unchanged logic, + logging) ───────────────────────────────
203
  def predict_mirna(gene):
204
  df = pd.DataFrame(MIRNA_DB.get(gene, []))
205
- log_entry("BRCA2 miRNA", gene,
206
- f"Found {len(df)} miRNAs for {gene}")
207
  return df
208
 
209
  def predict_sirna(cancer):
210
  df = pd.DataFrame(SIRNA_DB.get(cancer, []))
211
- log_entry("TP53 siRNA", cancer,
212
- f"Found {len(df)} targets for {cancer}")
213
  return df
214
 
215
  def get_lncrna():
@@ -231,8 +174,7 @@ def predict_drug(pocket):
231
  plt.savefig(buf, format="png", dpi=120, facecolor=CARD)
232
  plt.close()
233
  buf.seek(0)
234
- top = df.iloc[0]["Compound"] if len(df) else "none"
235
- log_entry("FGFR3 Drug", pocket, f"Top: {top}")
236
  return df, Image.open(buf)
237
 
238
  def predict_variant(hgvs, sift, polyphen, gnomad):
@@ -242,9 +184,9 @@ def predict_variant(hgvs, sift, polyphen, gnomad):
242
  cls, conf, score = r["cls"], r["conf"], r["score"]
243
  else:
244
  score = 0.0
245
- if sift < 0.05: score += 0.4
246
- if polyphen > 0.85: score += 0.35
247
- if gnomad < 0.0001: score += 0.25
248
  score = round(score, 3)
249
  cls = ("Pathogenic" if score > 0.6 else
250
  "Likely Pathogenic" if score > 0.4 else "Benign")
@@ -253,16 +195,14 @@ def predict_variant(hgvs, sift, polyphen, gnomad):
253
  icon = "⚠️ WARNING" if "Pathogenic" in cls else "✅ OK"
254
  bar_w = int(score * 100)
255
  explanation = PLAIN.get(cls, "")
256
- log_entry("OpenVariant", hgvs or f"SIFT={sift}",
257
- f"{cls} score={score}")
258
  return (
259
  f"<div style='background:{CARD};padding:16px;border-radius:8px;"
260
  f"font-family:sans-serif;color:{TXT}'>"
261
  f"<h3 style='color:{colour}'>{icon} {cls}</h3>"
262
  f"<p>Score: <b>{score:.3f}</b> &nbsp;|&nbsp; Confidence: <b>{conf}</b></p>"
263
  f"<div style='background:#334155;border-radius:4px;height:16px'>"
264
- f"<div style='background:{colour};height:16px;border-radius:4px;"
265
- f"width:{bar_w}%'></div></div>"
266
  f"<p style='margin-top:12px'>{explanation}</p>"
267
  f"<p style='font-size:11px;color:#64748b'>Research only. Not clinical.</p>"
268
  f"</div>"
@@ -277,15 +217,12 @@ def predict_corona(size, zeta, peg, lipid):
277
  if size < 100: score += 1
278
  proteins = ["ApoE","Albumin","Fibrinogen","Vitronectin","ApoA-I"]
279
  dominant = proteins[min(score, 4)]
280
- efficacy = ("High" if score >= 4 else
281
- "Medium" if score >= 2 else "Low")
282
- result = (f"**Dominant corona protein:** {dominant}\n\n"
283
- f"**Predicted efficacy class:** {efficacy}\n\n"
284
- f"**Composite score:** {score}/6")
285
- log_entry("LNP Corona",
286
- f"size={size},zeta={zeta},peg={peg},lipid={lipid}",
287
  f"dominant={dominant},efficacy={efficacy}")
288
- return result
 
 
289
 
290
  def predict_cancer(c1,c2,c3,c4,c5,c6,c7,c8,c9,c10):
291
  vals = [c1,c2,c3,c4,c5,c6,c7,c8,c9,c10]
@@ -311,15 +248,12 @@ def predict_cancer(c1,c2,c3,c4,c5,c6,c7,c8,c9,c10):
311
  plt.savefig(buf, format="png", dpi=120, facecolor=CARD)
312
  plt.close()
313
  buf.seek(0)
314
- log_entry("Liquid Biopsy",
315
- f"CTHRC1={c1},FHL2={c2},LDHA={c3}...",
316
- f"{label} prob={prob:.2f}")
317
  return (
318
  f"<div style='background:{CARD};padding:12px;border-radius:8px;"
319
  f"color:{colour};font-size:20px;font-family:sans-serif'>"
320
  f"<b>{label}</b><br>"
321
- f"<span style='color:{TXT};font-size:14px'>"
322
- f"Probability: {prob:.2f}</span></div>"
323
  ), Image.open(buf)
324
 
325
  def predict_flow(size, zeta, peg, charge, flow_rate):
@@ -333,14 +267,10 @@ def predict_flow(size, zeta, peg, charge, flow_rate):
333
  ks = 0.038 * (1 + flow_rate/40)
334
  fig, ax = plt.subplots(figsize=(6, 3.5), facecolor=CARD)
335
  ax.set_facecolor(CARD)
336
- ax.plot(t, 60*np.exp(-0.03*t)+20,
337
- color="#60a5fa", ls="--", label="Albumin (static)")
338
- ax.plot(t, 60*np.exp(-kf*t)+10,
339
- color="#60a5fa", label="Albumin (flow)")
340
- ax.plot(t, 14*(1-np.exp(-0.038*t))+5,
341
- color=ACC, ls="--", label="ApoE (static)")
342
- ax.plot(t, 20*(1-np.exp(-ks*t))+5,
343
- color=ACC, label="ApoE (flow)")
344
  ax.set_xlabel("Time (min)", color=TXT)
345
  ax.set_ylabel("% Corona", color=TXT)
346
  ax.tick_params(colors=TXT)
@@ -353,28 +283,21 @@ def predict_flow(size, zeta, peg, charge, flow_rate):
353
  plt.savefig(buf, format="png", dpi=120, facecolor=CARD)
354
  plt.close()
355
  buf.seek(0)
356
- log_entry("Flow Corona",
357
- f"flow={flow_rate},charge={charge}",
358
- f"CSI={csi},{stability}")
359
- return (f"**Corona Shift Index: {csi}** — {stability}",
360
- Image.open(buf))
361
 
362
  def predict_bbb(smiles, pka, zeta):
363
  logp = smiles.count("C")*0.3 - smiles.count("O")*0.5 + 1.5
364
- apoe_pct = max(0, min(40,
365
- (7.0-pka)*8 + abs(zeta)*0.5 + logp*0.8))
366
  bbb_prob = min(0.95, apoe_pct/30)
367
- tier = ("HIGH (>20%)" if apoe_pct > 20 else
368
  "MEDIUM (10-20%)" if apoe_pct > 10 else "LOW (<10%)")
369
  cats = ["ApoE%","BBB","logP","pKa fit","Zeta"]
370
- vals = [apoe_pct/40, bbb_prob,
371
- min(logp/5,1), (7-abs(pka-6.5))/7,
372
- (10-abs(zeta))/10]
373
  angles = np.linspace(0, 2*np.pi, len(cats), endpoint=False).tolist()
374
  v2, a2 = vals+[vals[0]], angles+[angles[0]]
375
- fig, ax = plt.subplots(figsize=(5, 4),
376
- subplot_kw={"polar":True},
377
- facecolor=CARD)
378
  ax.set_facecolor(CARD)
379
  ax.plot(a2, v2, color=ACC, linewidth=2)
380
  ax.fill(a2, v2, color=ACC, alpha=0.2)
@@ -386,18 +309,15 @@ def predict_bbb(smiles, pka, zeta):
386
  plt.savefig(buf, format="png", dpi=120, facecolor=CARD)
387
  plt.close()
388
  buf.seek(0)
389
- log_entry("LNP Brain",
390
- f"pka={pka},zeta={zeta}",
391
- f"ApoE={apoe_pct:.1f}%,BBB={bbb_prob:.2f}")
392
- result_text = (f"**Predicted ApoE:** {apoe_pct:.1f}% — {tier}\n\n"
393
- f"**BBB Probability:** {bbb_prob:.2f}")
394
- return result_text, Image.open(buf)
395
 
396
  def extract_corona(text):
397
  out = {
398
- "nanoparticle_composition":"",
399
- "size_nm":None,"zeta_mv":None,"PDI":None,
400
- "protein_source":"","corona_proteins":[],"confidence":{}
401
  }
402
  m = re.search(r"(\d+\.?\d*)\s*(?:nm|nanometer)", text, re.I)
403
  if m:
@@ -411,41 +331,31 @@ def extract_corona(text):
411
  if m:
412
  out["PDI"] = float(m.group(1))
413
  out["confidence"]["PDI"] = "HIGH"
414
- for src in ["human plasma","human serum",
415
- "fetal bovine serum","FBS","PBS"]:
416
  if src.lower() in text.lower():
417
  out["protein_source"] = src
418
  out["confidence"]["protein_source"] = "HIGH"
419
  break
420
  out["corona_proteins"] = [
421
- {"name":p,"confidence":"MEDIUM"}
422
- for p in PROTEINS if p in text.lower()
423
  ]
424
- for lip in ["DSPC","DOPE","MC3","DLin",
425
- "cholesterol","PEG","DOTAP"]:
426
  if lip in text:
427
  out["nanoparticle_composition"] += lip + " "
428
- out["nanoparticle_composition"] = \
429
- out["nanoparticle_composition"].strip()
430
  flags = []
431
  if not out["size_nm"]: flags.append("size_nm not found")
432
  if not out["zeta_mv"]: flags.append("zeta_mv not found")
433
  if not out["corona_proteins"]: flags.append("no proteins detected")
434
- summary = ("All key fields extracted"
435
- if not flags else " | ".join(flags))
436
- log_entry("AutoCorona NLP",
437
- text[:80]+"...",
438
  f"proteins={len(out['corona_proteins'])},{summary}")
439
  return json.dumps(out, indent=2), summary
440
 
441
- # ── CSS ───────────────────────────────────────────────────────────────────────
442
  css = (
443
- f"body,.gradio-container{{background:{BG}!important;"
444
- f"color:{TXT}!important}}"
445
- f".tab-nav button{{color:{TXT}!important;"
446
- f"background:{CARD}!important}}"
447
- f".tab-nav button.selected{{border-bottom:2px solid {ACC}!important;"
448
- f"color:{ACC}!important}}"
449
  f"h1,h2,h3{{color:{ACC}!important}}"
450
  f".gr-button-primary{{background:{ACC}!important;border:none!important}}"
451
  f"footer{{display:none!important}}"
@@ -454,105 +364,76 @@ css = (
454
  LEARNING_CASES = """
455
  ## 🧪 Top 5 Guided Investigations
456
 
457
- ---
458
  ### Case 1 — Beginner 🟢
459
  **Question:** Why is the same gene position benign vs pathogenic?
460
-
461
- **Steps:**
462
- 1. OpenVariant tab enter `BRCA1:p.R1699Q` note: Benign
463
- 2. Enter `BRCA1:p.R1699W` → note: Pathogenic
464
- 3. Same position (R1699), different amino acid change
465
- 4. Write in your notes: *what changed and why?*
466
 
467
  **Key concept:** Amino acid polarity determines protein folding impact.
468
 
469
  ---
470
  ### Case 2 — Beginner 🟢
471
  **Question:** How does PEG% change what protein sticks to LNPs?
 
 
 
472
 
473
- **Steps:**
474
- 1. LNP Corona tab → Lipid=Ionizable, Zeta=-5, Size=100
475
- 2. Set PEG=0.5% → note dominant protein
476
- 3. Set PEG=2.5% → compare
477
- 4. LNP Brain tab → same pKa=6.5, compare ApoE%
478
-
479
- **Key concept:** PEG shields the surface → less Fibrinogen, more ApoE.
480
 
481
  ---
482
  ### Case 3 — Intermediate 🟡
483
  **Question:** Does blood flow change corona composition?
 
 
 
484
 
485
- **Steps:**
486
- 1. Flow Corona tab → Flow=0, Ionizable → screenshot mentally
487
- 2. Flow=40 (arterial) → same lipid → compare ApoE curve
488
- 3. Note: at what minute does ApoE plateau in each case?
489
- 4. Question: why does brain delivery need ApoE in the corona?
490
-
491
- **Key concept:** Vroman effect — fast proteins (albumin) displaced by
492
- slow but higher-affinity proteins (ApoE) under flow.
493
 
494
  ---
495
  ### Case 4 — Intermediate 🟡
496
- **Question:** Which cancer type has the most novel siRNA targets?
497
-
498
- **Steps:**
499
- 1. TP53 siRNA tab LUAD count "Novel" in Drug_status
500
- 2. Repeat for BRCA and COAD
501
- 3. Which has most untapped targets?
502
- 4. Pick one "Novel" gene → Google: "[gene] cancer therapeutic target"
503
-
504
- **Key concept:** Novel = no approved drug yet = research opportunity.
505
 
506
  ---
507
  ### Case 5 — Advanced 🔴
508
- **Question:** Can you identify a cancer sample from protein levels?
509
-
510
- **Steps:**
511
- 1. Liquid Biopsy tab → all sliders at 0 → should say HEALTHY
512
  2. Set CTHRC1=2.5, FHL2=2.0, LDHA=1.8 → observe
513
- 3. Try to find the minimum CTHRC1 value that tips to CANCER
514
- 4. AutoCorona NLP → paste a PubMed abstract about cancer proteomics
515
- 5. Check: does the abstract mention any of the 10 biomarkers?
516
 
517
- **Key concept:** CTHRC1 is the single strongest cancer indicator
518
- in the panel — its weight (0.18) dominates the score.
519
  """
520
 
521
- # ── BUILD ─────────────────────────────────────────────────────────────────────
522
  with gr.Blocks(css=css, title="K R&D Lab") as demo:
523
- last_result_state = gr.State("")
524
 
525
  gr.Markdown(
526
  "# 🧬 K R&D Lab — Computational Biology Suite\n"
527
  "**Oksana Kolisnyk** · ML Engineer · "
528
  "[KOSATIKS GROUP](https://kosatiks-group.pp.ua)\n"
529
- "> 10 open-source tools + lab journal. "
530
- "Hypothesis testing across disciplines."
531
  )
532
 
533
  with gr.Tabs():
534
 
535
- # ── Tab 1 ──
536
  with gr.TabItem("🧬 BRCA2 miRNA"):
537
  gr.Markdown("### Tumor Suppressor miRNAs")
538
- g1 = gr.Dropdown(["BRCA2","BRCA1","TP53"],
539
- value="BRCA2", label="Gene")
540
  b1 = gr.Button("Find miRNAs", variant="primary")
541
  o1 = gr.Dataframe(label="Top 5 downregulated miRNAs")
542
  gr.Examples([["BRCA2"],["TP53"]], inputs=[g1])
543
  b1.click(predict_mirna, g1, o1)
544
 
545
- # ── Tab 2 ──
546
  with gr.TabItem("💉 TP53 siRNA"):
547
  gr.Markdown("### Synthetic Lethal siRNA Targets")
548
- g2 = gr.Dropdown(["LUAD","BRCA","COAD"],
549
- value="LUAD", label="Cancer type")
550
  b2 = gr.Button("Find Targets", variant="primary")
551
  o2 = gr.Dataframe(label="Top 5 siRNA targets")
552
  gr.Examples([["LUAD"],["BRCA"]], inputs=[g2])
553
  b2.click(predict_sirna, g2, o2)
554
 
555
- # ── Tab 3 ──
556
  with gr.TabItem("🧠 lncRNA-TREM2"):
557
  gr.Markdown("### lncRNA Networks in Alzheimer's")
558
  b3 = gr.Button("Load Results", variant="primary")
@@ -560,35 +441,25 @@ with gr.Blocks(css=css, title="K R&D Lab") as demo:
560
  o3b = gr.Dataframe(label="ASO Candidates")
561
  b3.click(get_lncrna, [], [o3a, o3b])
562
 
563
- # ── Tab 4 ──
564
  with gr.TabItem("💊 FGFR3 Drug"):
565
  gr.Markdown("### RNA-Directed Drug Discovery: FGFR3")
566
- g4 = gr.Radio(
567
- ["P1 (hairpin loop)","P10 (G-quadruplex)"],
568
- value="P1 (hairpin loop)", label="Target pocket")
569
  b4 = gr.Button("Screen Compounds", variant="primary")
570
  o4t = gr.Dataframe(label="Top 5 candidates")
571
  o4p = gr.Image(label="Binding scores")
572
- gr.Examples(
573
- [["P1 (hairpin loop)"],["P10 (G-quadruplex)"]],
574
- inputs=[g4])
575
  b4.click(predict_drug, g4, [o4t, o4p])
576
 
577
- # ── Tab 5 ──
578
  with gr.TabItem("🔬 OpenVariant"):
579
- gr.Markdown(
580
- "### OpenVariant Pathogenicity Classifier\n"
581
- "AUC=0.939 on ClinVar 2026.")
582
- hgvs = gr.Textbox(label="HGVS notation",
583
- placeholder="BRCA1:p.R1699Q")
584
  gr.Markdown("**Or enter scores manually:**")
585
  with gr.Row():
586
- sift = gr.Slider(0,1,0.5,label="SIFT (0=damaging)")
587
- pp = gr.Slider(0,1,0.5,label="PolyPhen-2")
588
- gn = gr.Slider(0,0.01,0.001,
589
- label="gnomAD AF",step=0.0001)
590
- b5 = gr.Button("Predict Pathogenicity",
591
- variant="primary")
592
  o5 = gr.HTML(label="Result")
593
  gr.Examples(
594
  [["BRCA1:p.R1699Q",0.82,0.05,0.0012],
@@ -597,30 +468,22 @@ with gr.Blocks(css=css, title="K R&D Lab") as demo:
597
  inputs=[hgvs,sift,pp,gn])
598
  b5.click(predict_variant, [hgvs,sift,pp,gn], o5)
599
 
600
- # ── Tab 6 ──
601
  with gr.TabItem("🧪 LNP Corona"):
602
  gr.Markdown("### LNP Protein Corona Prediction")
603
  with gr.Row():
604
- sz = gr.Slider(50,300,100,label="Size (nm)")
605
- zt = gr.Slider(-40,10,-5, label="Zeta (mV)")
606
  with gr.Row():
607
- pg = gr.Slider(0,5,1.5,label="PEG mol%")
608
- lp = gr.Dropdown(
609
- ["Ionizable","Cationic","Anionic","Neutral"],
610
- value="Ionizable", label="Lipid type")
611
  b6 = gr.Button("Predict", variant="primary")
612
  o6 = gr.Markdown()
613
- gr.Examples(
614
- [[100,-5,1.5,"Ionizable"],
615
- [80,5,0.5,"Cationic"]],
616
- inputs=[sz,zt,pg,lp])
617
  b6.click(predict_corona, [sz,zt,pg,lp], o6)
618
 
619
- # ── Tab 7 ──
620
  with gr.TabItem("🩸 Liquid Biopsy"):
621
- gr.Markdown(
622
- "### Protein Corona Cancer Diagnostics\n"
623
- "Classify cancer vs healthy from protein z-scores.")
624
  with gr.Row():
625
  p1=gr.Slider(-3,3,0,label="CTHRC1")
626
  p2=gr.Slider(-3,3,0,label="FHL2")
@@ -640,135 +503,99 @@ with gr.Blocks(css=css, title="K R&D Lab") as demo:
640
  [[2,2,1.5,1.8,1.6,-1,-1.2,-0.8,1.4,-1.1],
641
  [0,0,0,0,0,0,0,0,0,0]],
642
  inputs=[p1,p2,p3,p4,p5,p6,p7,p8,p9,p10])
643
- b7.click(predict_cancer,
644
- [p1,p2,p3,p4,p5,p6,p7,p8,p9,p10],
645
- [o7t, o7p])
646
 
647
- # ── Tab 8 ──
648
  with gr.TabItem("🌊 Flow Corona"):
649
  gr.Markdown("### Corona Remodeling Under Blood Flow")
650
  with gr.Row():
651
- s8 = gr.Slider(50,300,100,label="Size (nm)")
652
- z8 = gr.Slider(-40,10,-5, label="Zeta (mV)")
653
- pg8 = gr.Slider(0,5,1.5, label="PEG mol%")
654
  with gr.Row():
655
- ch8 = gr.Dropdown(
656
- ["Ionizable","Cationic","Anionic","Neutral"],
657
- value="Ionizable", label="Charge type")
658
- fl8 = gr.Slider(0,40,20,
659
- label="Flow rate cm/s (aorta=40)")
660
  b8 = gr.Button("Model Vroman Effect", variant="primary")
661
  o8t = gr.Markdown()
662
  o8p = gr.Image(label="Kinetics plot")
663
- gr.Examples(
664
- [[100,-5,1.5,"Ionizable",40],
665
- [150,5,0.5,"Cationic",10]],
666
- inputs=[s8,z8,pg8,ch8,fl8])
667
  b8.click(predict_flow, [s8,z8,pg8,ch8,fl8], [o8t,o8p])
668
 
669
- # ── Tab 9 ──
670
  with gr.TabItem("🧠 LNP Brain"):
671
  gr.Markdown("### LNP Brain Delivery Predictor")
672
- smi = gr.Textbox(
673
- label="Ionizable lipid SMILES",
674
- value="CC(C)CC(=O)OCC(COC(=O)CC(C)C)OC(=O)CC(C)C")
675
  with gr.Row():
676
- pk = gr.Slider(4,8,6.5,step=0.1,label="pKa")
677
- zt9 = gr.Slider(-20,10,-3, label="Zeta (mV)")
678
  b9 = gr.Button("Predict BBB Crossing", variant="primary")
679
  o9t = gr.Markdown()
680
  o9p = gr.Image(label="Radar profile")
681
- gr.Examples(
682
- [["CC(C)CC(=O)OCC(COC(=O)CC(C)C)"
683
- "OC(=O)CC(C)C", 6.5, -3]],
684
- inputs=[smi,pk,zt9])
685
  b9.click(predict_bbb, [smi,pk,zt9], [o9t,o9p])
686
 
687
- # ── Tab 10 ──
688
  with gr.TabItem("📄 AutoCorona NLP"):
689
- gr.Markdown(
690
- "### AutoCorona NLP Extraction\n"
691
- "Paste any paper abstract to extract corona data.")
692
- txt = gr.Textbox(lines=6, label="Paper abstract",
693
- placeholder="Paste text here...")
694
  b10 = gr.Button("Extract Data", variant="primary")
695
  o10j = gr.Code(label="Extracted JSON", language="json")
696
  o10f = gr.Textbox(label="Validation flags")
697
  gr.Examples([[
698
- "LNPs composed of MC3, DSPC, Cholesterol "
699
- "(50:10:40 mol%) with 1.5% PEG-DMG. "
700
- "Hydrodynamic diameter was 98 nm, "
701
- "zeta potential -3.2 mV, PDI 0.12. "
702
- "Incubated in human plasma. "
703
- "Corona: albumin, apolipoprotein E, fibrinogen."
704
  ]], inputs=[txt])
705
  b10.click(extract_corona, txt, [o10j, o10f])
706
 
707
- # ── Tab 11 — Lab Journal ──────────────────────────────────
708
  with gr.TabItem("📓 Lab Journal"):
709
- gr.Markdown(
710
- "### Your Research Log\n"
711
- "Every query is auto-saved. "
712
- "Add notes to any result below.")
713
  with gr.Row():
714
  note_text = gr.Textbox(
715
  label="📝 Add observation / conclusion",
716
- placeholder=
717
- "What did you discover? What's your next question?",
718
  lines=3)
719
- note_tab = gr.Textbox(
720
- label="Which tool? (auto-fill or type)",
721
- value="General")
722
- note_last = gr.Textbox(
723
- label="Result to annotate", visible=False)
724
- save_btn = gr.Button("💾 Save Observation",
725
- variant="primary")
726
  save_msg = gr.Markdown()
727
  journal_df = gr.Dataframe(
728
  label="📋 Full History",
729
  value=load_journal,
730
- every=30,
731
  interactive=False)
732
  refresh_btn = gr.Button("🔄 Refresh")
733
  refresh_btn.click(load_journal, [], journal_df)
734
- save_btn.click(
735
- save_note,
736
- [note_text, note_tab, note_last],
737
- [save_msg, journal_df])
738
- gr.Markdown(
739
- "📥 **Download your log:** "
740
- "file saved as `lab_journal.csv` in the app folder.")
741
-
742
- # ── Tab 12 — Learning Mode ────────────────────────────────
743
  with gr.TabItem("📚 Learning Mode"):
744
  gr.Markdown(LEARNING_CASES)
745
- gr.Markdown("---")
746
- gr.Markdown("### 📖 Quick Reference — What each tool does")
747
  gr.Markdown("""
748
- | Tool | What it predicts | Key input | Why it matters |
749
- |------|-----------------|-----------|----------------|
750
- | OpenVariant | Pathogenic / Benign | Gene mutation | Clinical genetics |
751
- | LNP Corona | Dominant corona protein | Formulation params | Drug delivery |
752
- | Flow Corona | Vroman exchange kinetics | Flow rate | In vivo realism |
753
- | LNP Brain | ApoE% + BBB probability | pKa + zeta | GBM therapy |
754
- | Liquid Biopsy | Cancer vs Healthy | Protein z-scores | Diagnostics |
755
- | BRCA2 miRNA | Downregulated miRNAs | Gene name | RNA therapy |
756
- | TP53 siRNA | Synthetic lethal targets | Cancer type | Target discovery |
757
- | lncRNA-TREM2 | ceRNA network + ASOs | — | Alzheimer's |
758
- | FGFR3 Drug | Small molecule candidates | Pocket type | RNA drug design |
759
- | AutoCorona NLP | Structured data from text | Abstract | Literature mining |
760
  """)
761
  gr.Markdown("""
762
- ### 🔗 Essential Resources for Beginners
763
- - **PubMed:** https://pubmed.ncbi.nlm.nih.gov
764
- - **ClinVar (real variants):** https://www.ncbi.nlm.nih.gov/clinvar/
765
- - **UniProt (proteins):** https://www.uniprot.org
766
- - **ChEMBL (compounds):** https://www.ebi.ac.uk/chembl/
767
- - **KEGG (pathways):** https://www.genome.jp/kegg/
768
  """)
769
 
770
  gr.Markdown(
771
- "---\n**K R&D Lab** | Research tool only — not clinical | "
772
  "[GitHub](https://github.com/TEZv/K-RnD-Lab-PHYLO-03_2026) | "
773
  "[KOSATIKS GROUP 🦈](https://kosatiks-group.pp.ua)"
774
  )
 
1
  import gradio as gr
2
  import pandas as pd
3
  import numpy as np
4
+ import json, re, csv
5
  import matplotlib
6
  matplotlib.use("Agg")
7
  import matplotlib.pyplot as plt
 
16
  ACC2 = "#38bdf8"
17
  TXT = "#f1f5f9"
18
 
 
19
  LOG_PATH = Path("./lab_journal.csv")
20
 
21
  def log_entry(tab, inputs, result, note=""):
22
  write_header = not LOG_PATH.exists()
23
  with open(LOG_PATH, "a", newline="", encoding="utf-8") as f:
24
+ w = csv.DictWriter(f, fieldnames=["timestamp","tab","inputs","result","note"])
 
25
  if write_header:
26
  w.writeheader()
27
  w.writerow({
28
  "timestamp": datetime.now().strftime("%Y-%m-%d %H:%M"),
29
+ "tab": tab,
30
+ "inputs": str(inputs),
31
+ "result": str(result)[:200],
32
+ "note": note
33
  })
34
 
35
  def load_journal():
36
  if not LOG_PATH.exists():
37
+ return pd.DataFrame(columns=["timestamp","tab","inputs","result","note"])
 
38
  return pd.read_csv(LOG_PATH)
39
 
40
  def save_note(note, tab, last_result):
41
  log_entry(tab, "", last_result, note)
42
  return "✅ Saved!", load_journal()
43
 
 
44
  MIRNA_DB = {
45
  "BRCA2": [
46
+ {"miRNA":"hsa-miR-148a-3p","log2FC":-0.70,"padj":0.013,"targets":"DNMT1, AKT2","pathway":"Epigenetic reprogramming"},
47
+ {"miRNA":"hsa-miR-30e-5p","log2FC":-0.49,"padj":0.032,"targets":"MYC, KRAS","pathway":"Oncogene suppression"},
48
+ {"miRNA":"hsa-miR-551b-3p","log2FC":-0.59,"padj":0.048,"targets":"SMAD4, CDK6","pathway":"TGF-beta / CDK4/6"},
49
+ {"miRNA":"hsa-miR-22-3p","log2FC":-0.43,"padj":0.041,"targets":"HIF1A, PTEN","pathway":"Hypoxia / PI3K"},
50
+ {"miRNA":"hsa-miR-200c-3p","log2FC":-0.38,"padj":0.044,"targets":"ZEB1, ZEB2","pathway":"EMT suppression"},
 
 
 
 
 
51
  ],
52
  "BRCA1": [
53
+ {"miRNA":"hsa-miR-155-5p","log2FC":-0.81,"padj":0.008,"targets":"SHIP1, SOCS1","pathway":"Immune evasion"},
54
+ {"miRNA":"hsa-miR-146a-5p","log2FC":-0.65,"padj":0.019,"targets":"TRAF6, IRAK1","pathway":"NF-kB signalling"},
55
+ {"miRNA":"hsa-miR-21-5p","log2FC":-0.55,"padj":0.027,"targets":"PTEN, PDCD4","pathway":"Apoptosis"},
56
+ {"miRNA":"hsa-miR-17-5p","log2FC":-0.47,"padj":0.036,"targets":"RB1, E2F1","pathway":"Cell cycle"},
57
+ {"miRNA":"hsa-miR-34a-5p","log2FC":-0.41,"padj":0.049,"targets":"BCL2, CDK6","pathway":"p53 axis"},
 
 
 
 
 
58
  ],
59
  "TP53": [
60
+ {"miRNA":"hsa-miR-34a-5p","log2FC":-1.10,"padj":0.001,"targets":"BCL2, CDK6","pathway":"p53-miR-34 axis"},
61
+ {"miRNA":"hsa-miR-192-5p","log2FC":-0.90,"padj":0.005,"targets":"MDM2, DHFR","pathway":"p53 feedback"},
62
+ {"miRNA":"hsa-miR-145-5p","log2FC":-0.75,"padj":0.012,"targets":"MYC, EGFR","pathway":"Growth suppression"},
63
+ {"miRNA":"hsa-miR-107","log2FC":-0.62,"padj":0.023,"targets":"CDK6, HIF1B","pathway":"Hypoxia / cell cycle"},
64
+ {"miRNA":"hsa-miR-215-5p","log2FC":-0.51,"padj":0.038,"targets":"DTL, DHFR","pathway":"DNA damage response"},
 
 
 
 
 
65
  ],
66
  }
67
 
68
  SIRNA_DB = {
69
+ "LUAD": [
70
+ {"Gene":"SPC24","dCERES":-0.175,"log2FC":1.13,"Drug_status":"Novel","siRNA":"GCAGCUGAAGAAACUGAAU"},
71
+ {"Gene":"BUB1B","dCERES":-0.119,"log2FC":1.12,"Drug_status":"Novel","siRNA":"CCAAAGAGCUGAAGAACAU"},
72
+ {"Gene":"CDC45","dCERES":-0.144,"log2FC":1.26,"Drug_status":"Novel","siRNA":"GCAUCAAGAUGAAGGAGAU"},
73
+ {"Gene":"PLK1","dCERES":-0.239,"log2FC":1.03,"Drug_status":"Clinical","siRNA":"GACGCUCAAGAUGCAGAUU"},
74
+ {"Gene":"CDK1","dCERES":-0.201,"log2FC":1.00,"Drug_status":"Clinical","siRNA":"GCAGAAGCACUGAAGAUUU"},
 
 
 
 
 
75
  ],
76
+ "BRCA": [
77
+ {"Gene":"AURKA","dCERES":-0.165,"log2FC":1.20,"Drug_status":"Clinical","siRNA":"GCACUGAAGAUGCAGAAUU"},
78
+ {"Gene":"AURKB","dCERES":-0.140,"log2FC":1.15,"Drug_status":"Clinical","siRNA":"CCUGAAGACGCUCAAGGUU"},
79
+ {"Gene":"CENPW","dCERES":-0.125,"log2FC":0.95,"Drug_status":"Novel","siRNA":"GCAGAAGCACUGAAGAUUU"},
80
+ {"Gene":"RFC2","dCERES":-0.136,"log2FC":0.50,"Drug_status":"Novel","siRNA":"GCAAGAUGCAGAAGCACUU"},
81
+ {"Gene":"TYMS","dCERES":-0.131,"log2FC":0.72,"Drug_status":"Approved","siRNA":"GGACGCUCAAGAUGCAGAU"},
 
 
 
 
 
82
  ],
83
+ "COAD": [
84
+ {"Gene":"KRAS","dCERES":-0.210,"log2FC":0.80,"Drug_status":"Clinical","siRNA":"GCUGGAGCUGGUGGUAGUU"},
85
+ {"Gene":"WEE1","dCERES":-0.180,"log2FC":1.05,"Drug_status":"Clinical","siRNA":"GCAGCUGAAGAAACUGAAU"},
86
+ {"Gene":"CHEK1","dCERES":-0.155,"log2FC":0.90,"Drug_status":"Clinical","siRNA":"CCAAAGAGCUGAAGAACAU"},
87
+ {"Gene":"RFC2","dCERES":-0.130,"log2FC":0.55,"Drug_status":"Novel","siRNA":"GCAUCAAGAUGAAGGAGAU"},
88
+ {"Gene":"PKMYT1","dCERES":-0.122,"log2FC":1.07,"Drug_status":"Clinical","siRNA":"GACGCUCAAGAUGCAGAUU"},
 
 
 
 
 
89
  ],
90
  }
91
 
92
  CERNA = [
93
+ {"lncRNA":"CYTOR","miRNA":"hsa-miR-138-5p","target":"AKT1","pathway":"TREM2 core signaling"},
94
+ {"lncRNA":"CYTOR","miRNA":"hsa-miR-138-5p","target":"NFKB1","pathway":"Neuroinflammation"},
95
+ {"lncRNA":"GAS5","miRNA":"hsa-miR-21-5p","target":"PTEN","pathway":"Neuroinflammation"},
96
+ {"lncRNA":"GAS5","miRNA":"hsa-miR-222-3p","target":"IL1B","pathway":"Neuroinflammation"},
97
+ {"lncRNA":"HOTAIRM1","miRNA":"hsa-miR-9-5p","target":"TREM2","pathway":"Direct TREM2 regulation"},
 
 
 
 
 
98
  ]
99
  ASO = [
100
+ {"lncRNA":"GAS5","position":119,"accessibility":0.653,"GC_pct":50,"Tm":47.2,"priority":"HIGH"},
101
+ {"lncRNA":"CYTOR","position":507,"accessibility":0.653,"GC_pct":50,"Tm":46.8,"priority":"HIGH"},
102
+ {"lncRNA":"HOTAIRM1","position":234,"accessibility":0.621,"GC_pct":44,"Tm":44.1,"priority":"MEDIUM"},
103
+ {"lncRNA":"LINC00847","position":89,"accessibility":0.598,"GC_pct":56,"Tm":48.3,"priority":"MEDIUM"},
104
+ {"lncRNA":"ZFAS1","position":312,"accessibility":0.571,"GC_pct":48,"Tm":45.5,"priority":"MEDIUM"},
 
 
 
 
 
105
  ]
106
 
107
  FGFR3 = {
108
+ "P1 (hairpin loop)": [
109
+ {"Compound":"CHEMBL1575701","RNA_score":0.809,"Toxicity":0.01,"Final_score":0.793},
110
+ {"Compound":"CHEMBL15727","RNA_score":0.805,"Toxicity":0.00,"Final_score":0.789},
111
+ {"Compound":"Thioguanine","RNA_score":0.888,"Toxicity":32.5,"Final_score":0.742},
112
+ {"Compound":"Deazaguanine","RNA_score":0.888,"Toxicity":35.0,"Final_score":0.735},
113
+ {"Compound":"CHEMBL441","RNA_score":0.775,"Toxicity":5.2,"Final_score":0.721},
 
 
 
 
 
114
  ],
115
+ "P10 (G-quadruplex)": [
116
+ {"Compound":"CHEMBL15727","RNA_score":0.805,"Toxicity":0.00,"Final_score":0.789},
117
+ {"Compound":"CHEMBL5411515","RNA_score":0.945,"Toxicity":37.1,"Final_score":0.761},
118
+ {"Compound":"CHEMBL90","RNA_score":0.760,"Toxicity":2.1,"Final_score":0.745},
119
+ {"Compound":"CHEMBL102","RNA_score":0.748,"Toxicity":8.4,"Final_score":0.712},
120
+ {"Compound":"Berberine","RNA_score":0.735,"Toxicity":3.2,"Final_score":0.708},
 
 
 
 
 
121
  ],
122
  }
123
 
124
  VARIANT_DB = {
125
+ "BRCA1:p.R1699Q": {"score":0.03,"cls":"Benign","conf":"High"},
126
+ "BRCA1:p.R1699W": {"score":0.97,"cls":"Pathogenic","conf":"High"},
127
+ "BRCA2:p.D2723A": {"score":0.999,"cls":"Pathogenic","conf":"High"},
128
+ "TP53:p.R248W": {"score":0.998,"cls":"Pathogenic","conf":"High"},
129
+ "TP53:p.R248Q": {"score":0.995,"cls":"Pathogenic","conf":"High"},
130
+ "EGFR:p.L858R": {"score":0.96,"cls":"Pathogenic","conf":"High"},
131
+ "ALK:p.F1174L": {"score":0.94,"cls":"Pathogenic","conf":"High"},
132
  }
133
  PLAIN = {
134
+ "Pathogenic": "This variant is likely to cause disease. Clinical follow-up is strongly recommended.",
135
  "Likely Pathogenic":"This variant is probably harmful. Discuss with your doctor.",
136
+ "Benign": "This variant is likely harmless. Common in the general population.",
137
+ "Likely Benign": "This variant is probably harmless. No strong reason for concern.",
138
  }
139
  BM_W = {
140
  "CTHRC1":0.18,"FHL2":0.15,"LDHA":0.14,"P4HA1":0.13,
 
145
  "clusterin","igm","iga","igg","complement","transferrin",
146
  "alpha-2-macroglobulin"]
147
 
 
148
  def predict_mirna(gene):
149
  df = pd.DataFrame(MIRNA_DB.get(gene, []))
150
+ log_entry("BRCA2 miRNA", gene, f"Found {len(df)} miRNAs for {gene}")
 
151
  return df
152
 
153
  def predict_sirna(cancer):
154
  df = pd.DataFrame(SIRNA_DB.get(cancer, []))
155
+ log_entry("TP53 siRNA", cancer, f"Found {len(df)} targets for {cancer}")
 
156
  return df
157
 
158
  def get_lncrna():
 
174
  plt.savefig(buf, format="png", dpi=120, facecolor=CARD)
175
  plt.close()
176
  buf.seek(0)
177
+ log_entry("FGFR3 Drug", pocket, f"Top: {df.iloc[0]['Compound'] if len(df) else 'none'}")
 
178
  return df, Image.open(buf)
179
 
180
  def predict_variant(hgvs, sift, polyphen, gnomad):
 
184
  cls, conf, score = r["cls"], r["conf"], r["score"]
185
  else:
186
  score = 0.0
187
+ if sift < 0.05: score += 0.4
188
+ if polyphen > 0.85: score += 0.35
189
+ if gnomad < 0.0001: score += 0.25
190
  score = round(score, 3)
191
  cls = ("Pathogenic" if score > 0.6 else
192
  "Likely Pathogenic" if score > 0.4 else "Benign")
 
195
  icon = "⚠️ WARNING" if "Pathogenic" in cls else "✅ OK"
196
  bar_w = int(score * 100)
197
  explanation = PLAIN.get(cls, "")
198
+ log_entry("OpenVariant", hgvs or f"SIFT={sift}", f"{cls} score={score}")
 
199
  return (
200
  f"<div style='background:{CARD};padding:16px;border-radius:8px;"
201
  f"font-family:sans-serif;color:{TXT}'>"
202
  f"<h3 style='color:{colour}'>{icon} {cls}</h3>"
203
  f"<p>Score: <b>{score:.3f}</b> &nbsp;|&nbsp; Confidence: <b>{conf}</b></p>"
204
  f"<div style='background:#334155;border-radius:4px;height:16px'>"
205
+ f"<div style='background:{colour};height:16px;border-radius:4px;width:{bar_w}%'></div></div>"
 
206
  f"<p style='margin-top:12px'>{explanation}</p>"
207
  f"<p style='font-size:11px;color:#64748b'>Research only. Not clinical.</p>"
208
  f"</div>"
 
217
  if size < 100: score += 1
218
  proteins = ["ApoE","Albumin","Fibrinogen","Vitronectin","ApoA-I"]
219
  dominant = proteins[min(score, 4)]
220
+ efficacy = ("High" if score >= 4 else "Medium" if score >= 2 else "Low")
221
+ log_entry("LNP Corona", f"size={size},zeta={zeta},peg={peg},lipid={lipid}",
 
 
 
 
 
222
  f"dominant={dominant},efficacy={efficacy}")
223
+ return (f"**Dominant corona protein:** {dominant}\n\n"
224
+ f"**Predicted efficacy class:** {efficacy}\n\n"
225
+ f"**Composite score:** {score}/6")
226
 
227
  def predict_cancer(c1,c2,c3,c4,c5,c6,c7,c8,c9,c10):
228
  vals = [c1,c2,c3,c4,c5,c6,c7,c8,c9,c10]
 
248
  plt.savefig(buf, format="png", dpi=120, facecolor=CARD)
249
  plt.close()
250
  buf.seek(0)
251
+ log_entry("Liquid Biopsy", f"CTHRC1={c1},FHL2={c2}...", f"{label} prob={prob:.2f}")
 
 
252
  return (
253
  f"<div style='background:{CARD};padding:12px;border-radius:8px;"
254
  f"color:{colour};font-size:20px;font-family:sans-serif'>"
255
  f"<b>{label}</b><br>"
256
+ f"<span style='color:{TXT};font-size:14px'>Probability: {prob:.2f}</span></div>"
 
257
  ), Image.open(buf)
258
 
259
  def predict_flow(size, zeta, peg, charge, flow_rate):
 
267
  ks = 0.038 * (1 + flow_rate/40)
268
  fig, ax = plt.subplots(figsize=(6, 3.5), facecolor=CARD)
269
  ax.set_facecolor(CARD)
270
+ ax.plot(t, 60*np.exp(-0.03*t)+20, color="#60a5fa", ls="--", label="Albumin (static)")
271
+ ax.plot(t, 60*np.exp(-kf*t)+10, color="#60a5fa", label="Albumin (flow)")
272
+ ax.plot(t, 14*(1-np.exp(-0.038*t))+5, color=ACC, ls="--", label="ApoE (static)")
273
+ ax.plot(t, 20*(1-np.exp(-ks*t))+5, color=ACC, label="ApoE (flow)")
 
 
 
 
274
  ax.set_xlabel("Time (min)", color=TXT)
275
  ax.set_ylabel("% Corona", color=TXT)
276
  ax.tick_params(colors=TXT)
 
283
  plt.savefig(buf, format="png", dpi=120, facecolor=CARD)
284
  plt.close()
285
  buf.seek(0)
286
+ log_entry("Flow Corona", f"flow={flow_rate},charge={charge}", f"CSI={csi},{stability}")
287
+ return f"**Corona Shift Index: {csi}** — {stability}", Image.open(buf)
 
 
 
288
 
289
  def predict_bbb(smiles, pka, zeta):
290
  logp = smiles.count("C")*0.3 - smiles.count("O")*0.5 + 1.5
291
+ apoe_pct = max(0, min(40, (7.0-pka)*8 + abs(zeta)*0.5 + logp*0.8))
 
292
  bbb_prob = min(0.95, apoe_pct/30)
293
+ tier = ("HIGH (>20%)" if apoe_pct > 20 else
294
  "MEDIUM (10-20%)" if apoe_pct > 10 else "LOW (<10%)")
295
  cats = ["ApoE%","BBB","logP","pKa fit","Zeta"]
296
+ vals = [apoe_pct/40, bbb_prob, min(logp/5,1),
297
+ (7-abs(pka-6.5))/7, (10-abs(zeta))/10]
 
298
  angles = np.linspace(0, 2*np.pi, len(cats), endpoint=False).tolist()
299
  v2, a2 = vals+[vals[0]], angles+[angles[0]]
300
+ fig, ax = plt.subplots(figsize=(5, 4), subplot_kw={"polar":True}, facecolor=CARD)
 
 
301
  ax.set_facecolor(CARD)
302
  ax.plot(a2, v2, color=ACC, linewidth=2)
303
  ax.fill(a2, v2, color=ACC, alpha=0.2)
 
309
  plt.savefig(buf, format="png", dpi=120, facecolor=CARD)
310
  plt.close()
311
  buf.seek(0)
312
+ log_entry("LNP Brain", f"pka={pka},zeta={zeta}", f"ApoE={apoe_pct:.1f}%,BBB={bbb_prob:.2f}")
313
+ return (f"**Predicted ApoE:** {apoe_pct:.1f}% — {tier}\n\n"
314
+ f"**BBB Probability:** {bbb_prob:.2f}"), Image.open(buf)
 
 
 
315
 
316
  def extract_corona(text):
317
  out = {
318
+ "nanoparticle_composition": "",
319
+ "size_nm": None, "zeta_mv": None, "PDI": None,
320
+ "protein_source": "", "corona_proteins": [], "confidence": {}
321
  }
322
  m = re.search(r"(\d+\.?\d*)\s*(?:nm|nanometer)", text, re.I)
323
  if m:
 
331
  if m:
332
  out["PDI"] = float(m.group(1))
333
  out["confidence"]["PDI"] = "HIGH"
334
+ for src in ["human plasma","human serum","fetal bovine serum","FBS","PBS"]:
 
335
  if src.lower() in text.lower():
336
  out["protein_source"] = src
337
  out["confidence"]["protein_source"] = "HIGH"
338
  break
339
  out["corona_proteins"] = [
340
+ {"name": p, "confidence": "MEDIUM"} for p in PROTEINS if p in text.lower()
 
341
  ]
342
+ for lip in ["DSPC","DOPE","MC3","DLin","cholesterol","PEG","DOTAP"]:
 
343
  if lip in text:
344
  out["nanoparticle_composition"] += lip + " "
345
+ out["nanoparticle_composition"] = out["nanoparticle_composition"].strip()
 
346
  flags = []
347
  if not out["size_nm"]: flags.append("size_nm not found")
348
  if not out["zeta_mv"]: flags.append("zeta_mv not found")
349
  if not out["corona_proteins"]: flags.append("no proteins detected")
350
+ summary = "All key fields extracted" if not flags else " | ".join(flags)
351
+ log_entry("AutoCorona NLP", text[:80]+"...",
 
 
352
  f"proteins={len(out['corona_proteins'])},{summary}")
353
  return json.dumps(out, indent=2), summary
354
 
 
355
  css = (
356
+ f"body,.gradio-container{{background:{BG}!important;color:{TXT}!important}}"
357
+ f".tab-nav button{{color:{TXT}!important;background:{CARD}!important}}"
358
+ f".tab-nav button.selected{{border-bottom:2px solid {ACC}!important;color:{ACC}!important}}"
 
 
 
359
  f"h1,h2,h3{{color:{ACC}!important}}"
360
  f".gr-button-primary{{background:{ACC}!important;border:none!important}}"
361
  f"footer{{display:none!important}}"
 
364
  LEARNING_CASES = """
365
  ## 🧪 Top 5 Guided Investigations
366
 
 
367
  ### Case 1 — Beginner 🟢
368
  **Question:** Why is the same gene position benign vs pathogenic?
369
+ 1. OpenVariant → enter `BRCA1:p.R1699Q` → Benign
370
+ 2. Enter `BRCA1:p.R1699W` → Pathogenic
371
+ 3. Same position, different amino acid what changed?
 
 
 
372
 
373
  **Key concept:** Amino acid polarity determines protein folding impact.
374
 
375
  ---
376
  ### Case 2 — Beginner 🟢
377
  **Question:** How does PEG% change what protein sticks to LNPs?
378
+ 1. LNP Corona → Ionizable, Zeta=-5, Size=100, PEG=0.5% → note protein
379
+ 2. PEG=2.5% → compare
380
+ 3. LNP Brain → pKa=6.5 → compare ApoE%
381
 
382
+ **Key concept:** More PEG → less Fibrinogen, more ApoE.
 
 
 
 
 
 
383
 
384
  ---
385
  ### Case 3 — Intermediate 🟡
386
  **Question:** Does blood flow change corona composition?
387
+ 1. Flow Corona → Flow=0, Ionizable
388
+ 2. Flow=40 (arterial) → compare ApoE curve
389
+ 3. At what minute does ApoE plateau?
390
 
391
+ **Key concept:** Vroman effect — albumin displaced by ApoE under flow.
 
 
 
 
 
 
 
392
 
393
  ---
394
  ### Case 4 — Intermediate 🟡
395
+ **Question:** Which cancer has the most novel siRNA targets?
396
+ 1. TP53 siRNA → LUAD → count "Novel"
397
+ 2. Repeat BRCA, COAD
398
+ 3. Pick one Novel geneGoogle: "[gene] cancer therapeutic target"
 
 
 
 
 
399
 
400
  ---
401
  ### Case 5 — Advanced 🔴
402
+ **Question:** Can you identify cancer from protein levels?
403
+ 1. Liquid Biopsy → all sliders=0 → HEALTHY
 
 
404
  2. Set CTHRC1=2.5, FHL2=2.0, LDHA=1.8 → observe
405
+ 3. Find minimum CTHRC1 that tips to CANCER
 
 
406
 
407
+ **Key concept:** CTHRC1 weight (0.18) dominates the score.
 
408
  """
409
 
 
410
  with gr.Blocks(css=css, title="K R&D Lab") as demo:
 
411
 
412
  gr.Markdown(
413
  "# 🧬 K R&D Lab — Computational Biology Suite\n"
414
  "**Oksana Kolisnyk** · ML Engineer · "
415
  "[KOSATIKS GROUP](https://kosatiks-group.pp.ua)\n"
416
+ "> 10 open-source tools + lab journal."
 
417
  )
418
 
419
  with gr.Tabs():
420
 
 
421
  with gr.TabItem("🧬 BRCA2 miRNA"):
422
  gr.Markdown("### Tumor Suppressor miRNAs")
423
+ g1 = gr.Dropdown(["BRCA2","BRCA1","TP53"], value="BRCA2", label="Gene")
 
424
  b1 = gr.Button("Find miRNAs", variant="primary")
425
  o1 = gr.Dataframe(label="Top 5 downregulated miRNAs")
426
  gr.Examples([["BRCA2"],["TP53"]], inputs=[g1])
427
  b1.click(predict_mirna, g1, o1)
428
 
 
429
  with gr.TabItem("💉 TP53 siRNA"):
430
  gr.Markdown("### Synthetic Lethal siRNA Targets")
431
+ g2 = gr.Dropdown(["LUAD","BRCA","COAD"], value="LUAD", label="Cancer type")
 
432
  b2 = gr.Button("Find Targets", variant="primary")
433
  o2 = gr.Dataframe(label="Top 5 siRNA targets")
434
  gr.Examples([["LUAD"],["BRCA"]], inputs=[g2])
435
  b2.click(predict_sirna, g2, o2)
436
 
 
437
  with gr.TabItem("🧠 lncRNA-TREM2"):
438
  gr.Markdown("### lncRNA Networks in Alzheimer's")
439
  b3 = gr.Button("Load Results", variant="primary")
 
441
  o3b = gr.Dataframe(label="ASO Candidates")
442
  b3.click(get_lncrna, [], [o3a, o3b])
443
 
 
444
  with gr.TabItem("💊 FGFR3 Drug"):
445
  gr.Markdown("### RNA-Directed Drug Discovery: FGFR3")
446
+ g4 = gr.Radio(["P1 (hairpin loop)","P10 (G-quadruplex)"],
447
+ value="P1 (hairpin loop)", label="Target pocket")
 
448
  b4 = gr.Button("Screen Compounds", variant="primary")
449
  o4t = gr.Dataframe(label="Top 5 candidates")
450
  o4p = gr.Image(label="Binding scores")
451
+ gr.Examples([["P1 (hairpin loop)"],["P10 (G-quadruplex)"]], inputs=[g4])
 
 
452
  b4.click(predict_drug, g4, [o4t, o4p])
453
 
 
454
  with gr.TabItem("🔬 OpenVariant"):
455
+ gr.Markdown("### OpenVariant — Pathogenicity Classifier\nAUC=0.939 on ClinVar 2026.")
456
+ hgvs = gr.Textbox(label="HGVS notation", placeholder="BRCA1:p.R1699Q")
 
 
 
457
  gr.Markdown("**Or enter scores manually:**")
458
  with gr.Row():
459
+ sift = gr.Slider(0,1,0.5, label="SIFT (0=damaging)")
460
+ pp = gr.Slider(0,1,0.5, label="PolyPhen-2")
461
+ gn = gr.Slider(0,0.01,0.001, label="gnomAD AF", step=0.0001)
462
+ b5 = gr.Button("Predict Pathogenicity", variant="primary")
 
 
463
  o5 = gr.HTML(label="Result")
464
  gr.Examples(
465
  [["BRCA1:p.R1699Q",0.82,0.05,0.0012],
 
468
  inputs=[hgvs,sift,pp,gn])
469
  b5.click(predict_variant, [hgvs,sift,pp,gn], o5)
470
 
 
471
  with gr.TabItem("🧪 LNP Corona"):
472
  gr.Markdown("### LNP Protein Corona Prediction")
473
  with gr.Row():
474
+ sz = gr.Slider(50,300,100, label="Size (nm)")
475
+ zt = gr.Slider(-40,10,-5, label="Zeta (mV)")
476
  with gr.Row():
477
+ pg = gr.Slider(0,5,1.5, label="PEG mol%")
478
+ lp = gr.Dropdown(["Ionizable","Cationic","Anionic","Neutral"],
479
+ value="Ionizable", label="Lipid type")
 
480
  b6 = gr.Button("Predict", variant="primary")
481
  o6 = gr.Markdown()
482
+ gr.Examples([[100,-5,1.5,"Ionizable"],[80,5,0.5,"Cationic"]], inputs=[sz,zt,pg,lp])
 
 
 
483
  b6.click(predict_corona, [sz,zt,pg,lp], o6)
484
 
 
485
  with gr.TabItem("🩸 Liquid Biopsy"):
486
+ gr.Markdown("### Protein Corona Cancer Diagnostics\nClassify cancer vs healthy.")
 
 
487
  with gr.Row():
488
  p1=gr.Slider(-3,3,0,label="CTHRC1")
489
  p2=gr.Slider(-3,3,0,label="FHL2")
 
503
  [[2,2,1.5,1.8,1.6,-1,-1.2,-0.8,1.4,-1.1],
504
  [0,0,0,0,0,0,0,0,0,0]],
505
  inputs=[p1,p2,p3,p4,p5,p6,p7,p8,p9,p10])
506
+ b7.click(predict_cancer, [p1,p2,p3,p4,p5,p6,p7,p8,p9,p10], [o7t,o7p])
 
 
507
 
 
508
  with gr.TabItem("🌊 Flow Corona"):
509
  gr.Markdown("### Corona Remodeling Under Blood Flow")
510
  with gr.Row():
511
+ s8 = gr.Slider(50,300,100, label="Size (nm)")
512
+ z8 = gr.Slider(-40,10,-5, label="Zeta (mV)")
513
+ pg8 = gr.Slider(0,5,1.5, label="PEG mol%")
514
  with gr.Row():
515
+ ch8 = gr.Dropdown(["Ionizable","Cationic","Anionic","Neutral"],
516
+ value="Ionizable", label="Charge type")
517
+ fl8 = gr.Slider(0,40,20, label="Flow rate cm/s (aorta=40)")
 
 
518
  b8 = gr.Button("Model Vroman Effect", variant="primary")
519
  o8t = gr.Markdown()
520
  o8p = gr.Image(label="Kinetics plot")
521
+ gr.Examples([[100,-5,1.5,"Ionizable",40],[150,5,0.5,"Cationic",10]],
522
+ inputs=[s8,z8,pg8,ch8,fl8])
 
 
523
  b8.click(predict_flow, [s8,z8,pg8,ch8,fl8], [o8t,o8p])
524
 
 
525
  with gr.TabItem("🧠 LNP Brain"):
526
  gr.Markdown("### LNP Brain Delivery Predictor")
527
+ smi = gr.Textbox(label="Ionizable lipid SMILES",
528
+ value="CC(C)CC(=O)OCC(COC(=O)CC(C)C)OC(=O)CC(C)C")
 
529
  with gr.Row():
530
+ pk = gr.Slider(4,8,6.5, step=0.1, label="pKa")
531
+ zt9 = gr.Slider(-20,10,-3, label="Zeta (mV)")
532
  b9 = gr.Button("Predict BBB Crossing", variant="primary")
533
  o9t = gr.Markdown()
534
  o9p = gr.Image(label="Radar profile")
535
+ gr.Examples([["CC(C)CC(=O)OCC(COC(=O)CC(C)C)OC(=O)CC(C)C", 6.5, -3]],
536
+ inputs=[smi,pk,zt9])
 
 
537
  b9.click(predict_bbb, [smi,pk,zt9], [o9t,o9p])
538
 
 
539
  with gr.TabItem("📄 AutoCorona NLP"):
540
+ gr.Markdown("### AutoCorona NLP Extraction\nPaste any paper abstract.")
541
+ txt = gr.Textbox(lines=6, label="Paper abstract", placeholder="Paste text here...")
 
 
 
542
  b10 = gr.Button("Extract Data", variant="primary")
543
  o10j = gr.Code(label="Extracted JSON", language="json")
544
  o10f = gr.Textbox(label="Validation flags")
545
  gr.Examples([[
546
+ "LNPs composed of MC3, DSPC, Cholesterol (50:10:40 mol%) with 1.5% PEG-DMG. "
547
+ "Hydrodynamic diameter was 98 nm, zeta potential -3.2 mV, PDI 0.12. "
548
+ "Incubated in human plasma. Corona: albumin, apolipoprotein E, fibrinogen."
 
 
 
549
  ]], inputs=[txt])
550
  b10.click(extract_corona, txt, [o10j, o10f])
551
 
 
552
  with gr.TabItem("📓 Lab Journal"):
553
+ gr.Markdown("### Your Research Log\nEvery query is auto-saved.")
 
 
 
554
  with gr.Row():
555
  note_text = gr.Textbox(
556
  label="📝 Add observation / conclusion",
557
+ placeholder="What did you discover? What's your next question?",
 
558
  lines=3)
559
+ note_tab = gr.Textbox(label="Which tool?", value="General")
560
+ note_last = gr.Textbox(label="Result to annotate", visible=False)
561
+ save_btn = gr.Button("💾 Save Observation", variant="primary")
 
 
 
 
562
  save_msg = gr.Markdown()
563
  journal_df = gr.Dataframe(
564
  label="📋 Full History",
565
  value=load_journal,
 
566
  interactive=False)
567
  refresh_btn = gr.Button("🔄 Refresh")
568
  refresh_btn.click(load_journal, [], journal_df)
569
+ save_btn.click(save_note, [note_text, note_tab, note_last], [save_msg, journal_df])
570
+ gr.Markdown("📥 Log saved as `lab_journal.csv` in the app folder.")
571
+
 
 
 
 
 
 
572
  with gr.TabItem("📚 Learning Mode"):
573
  gr.Markdown(LEARNING_CASES)
574
+ gr.Markdown("---\n### 📖 Quick Reference")
 
575
  gr.Markdown("""
576
+ | Tool | Predicts | Key input |
577
+ |------|----------|-----------|
578
+ | OpenVariant | Pathogenic/Benign | Gene mutation |
579
+ | LNP Corona | Dominant protein | Formulation |
580
+ | Flow Corona | Vroman kinetics | Flow rate |
581
+ | LNP Brain | ApoE% + BBB prob | pKa + zeta |
582
+ | Liquid Biopsy | Cancer/Healthy | Protein z-scores |
583
+ | BRCA2 miRNA | Downregulated miRNAs | Gene name |
584
+ | TP53 siRNA | Synthetic lethal targets | Cancer type |
585
+ | lncRNA-TREM2 | ceRNA + ASOs | — |
586
+ | FGFR3 Drug | Small molecules | Pocket type |
587
+ | AutoCorona NLP | Structured data | Abstract text |
588
  """)
589
  gr.Markdown("""
590
+ ### 🔗 Resources
591
+ - [PubMed](https://pubmed.ncbi.nlm.nih.gov)
592
+ - [ClinVar](https://www.ncbi.nlm.nih.gov/clinvar/)
593
+ - [UniProt](https://www.uniprot.org)
594
+ - [ChEMBL](https://www.ebi.ac.uk/chembl/)
 
595
  """)
596
 
597
  gr.Markdown(
598
+ "---\n**K R&D Lab** | Research only — not clinical | "
599
  "[GitHub](https://github.com/TEZv/K-RnD-Lab-PHYLO-03_2026) | "
600
  "[KOSATIKS GROUP 🦈](https://kosatiks-group.pp.ua)"
601
  )