TEZv commited on
Commit
953e26a
·
verified ·
1 Parent(s): 1b9269f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +558 -1206
app.py CHANGED
@@ -1,1248 +1,600 @@
1
- """
2
- K R&D Lab — Cancer Research Suite
3
- Author: Oksana Kolisnyk | kosatiks-group.pp.ua
4
- Repo: github.com/TEZv/K-RnD-Lab-PHYLO-03_2026
5
- """
6
-
7
  import gradio as gr
8
- import requests
9
- import json
10
- import os
11
- import time
12
- import csv
13
- import math
14
- import hashlib
15
- import datetime
16
- import numpy as np
17
  import pandas as pd
 
 
18
  import matplotlib
19
  matplotlib.use("Agg")
20
  import matplotlib.pyplot as plt
21
- import matplotlib.colors as mcolors
22
- from matplotlib import cm
23
- import io
24
  from PIL import Image
 
 
25
 
26
- # ─────────────────────────────────────────────
27
- # CACHE SYSTEM (TTL = 24 h)
28
- # ─────────────────────────────────────────────
29
- CACHE_DIR = "/tmp/cache"
30
- os.makedirs(CACHE_DIR, exist_ok=True)
31
- CACHE_TTL = 86400 # 24 hours in seconds
32
-
33
- def _cache_key(endpoint: str, query: str) -> str:
34
- raw = f"{endpoint}_{query}"
35
- return hashlib.md5(raw.encode()).hexdigest()
36
 
37
- def cache_get(endpoint: str, query: str):
38
- key = _cache_key(endpoint, query)
39
- path = os.path.join(CACHE_DIR, f"{endpoint}_{key}.json")
40
- if os.path.exists(path):
41
- mtime = os.path.getmtime(path)
42
- if time.time() - mtime < CACHE_TTL:
43
- try:
44
- with open(path) as f:
45
- return json.load(f)
46
- except Exception:
47
- return None
48
- return None
49
 
50
- def cache_set(endpoint: str, query: str, data):
51
  try:
52
- key = _cache_key(endpoint, query)
53
- path = os.path.join(CACHE_DIR, f"{endpoint}_{key}.json")
54
- with open(path, "w") as f:
55
- json.dump(data, f)
 
 
 
 
 
 
 
 
56
  except Exception:
57
  pass
58
 
59
- # ─────────────────────────────────────────────
60
- # LAB JOURNAL
61
- # ─────────────────────────────────────────────
62
- JOURNAL_FILE = "/tmp/lab_journal.csv"
63
-
64
- def journal_log(tab: str, action: str, result: str, note: str = ""):
65
  try:
66
- ts = datetime.datetime.utcnow().isoformat()
67
- row = [ts, tab, action, result[:200], note]
68
- write_header = not os.path.exists(JOURNAL_FILE)
69
- with open(JOURNAL_FILE, "a", newline="") as f:
70
- w = csv.writer(f)
71
- if write_header:
72
- w.writerow(["timestamp", "tab", "action", "result_summary", "note"])
73
- w.writerow(row)
74
- return ts
75
  except Exception:
76
- return ""
77
 
78
- def journal_read() -> str:
79
- try:
80
- if not os.path.exists(JOURNAL_FILE):
81
- return "No entries yet."
82
- df = pd.read_csv(JOURNAL_FILE)
83
- if df.empty:
84
- return "No entries yet."
85
- return df.tail(20).to_markdown(index=False)
86
- except Exception:
87
- return "No entries yet."
88
 
89
- # ─────────────────────────────────────────────
90
- # CONSTANTS
91
- # ─────────────────────────────────────────────
92
- CANCER_TYPES = [
93
- "GBM", "PDAC", "SCLC", "UVM", "DIPG",
94
- "ACC", "MCC", "PCNSL", "Pediatric AML"
95
- ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
96
 
97
- CANCER_EFO = {
98
- "GBM": "EFO_0000519",
99
- "PDAC": "EFO_0002618",
100
- "SCLC": "EFO_0000702",
101
- "UVM": "EFO_0004339",
102
- "DIPG": "EFO_0009708",
103
- "ACC": "EFO_0003060",
104
- "MCC": "EFO_0005558",
105
- "PCNSL": "EFO_0005543",
106
- "Pediatric AML": "EFO_0000222",
 
 
 
 
 
 
 
 
 
 
 
 
107
  }
108
 
109
- PROCESSES = [
110
- "autophagy", "ferroptosis", "protein corona",
111
- "RNA splicing", "phase separation", "m6A",
112
- "circRNA", "synthetic lethality", "immune exclusion",
113
- "enhancer hijacking", "lncRNA regulation",
114
- "metabolic reprogramming", "exosome biogenesis",
115
- "senescence", "mitophagy",
116
- "liquid-liquid phase separation", "cryptic splicing",
117
- "proteostasis", "redox biology", "translation regulation"
 
 
 
 
118
  ]
119
 
120
- PUBMED_BASE = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils"
121
- OT_GRAPHQL = "https://api.platform.opentargets.org/api/v4/graphql"
122
- GNOMAD_GQL = "https://gnomad.broadinstitute.org/api"
123
- CT_BASE = "https://clinicaltrials.gov/api/v2"
124
-
125
- # ─────────────────────────────────────────────
126
- # SHARED API HELPERS
127
- # ─────────────────────────────────────────────
128
-
129
- def pubmed_count(query: str) -> int:
130
- """Return paper count for a PubMed query (cached)."""
131
- cached = cache_get("pubmed_count", query)
132
- if cached is not None:
133
- return cached
134
- try:
135
- time.sleep(0.34)
136
- r = requests.get(
137
- f"{PUBMED_BASE}/esearch.fcgi",
138
- params={"db": "pubmed", "term": query, "rettype": "count", "retmode": "json"},
139
- timeout=10
140
- )
141
- r.raise_for_status()
142
- count = int(r.json()["esearchresult"]["count"])
143
- cache_set("pubmed_count", query, count)
144
- return count
145
- except Exception:
146
- return -1
147
-
148
-
149
- def pubmed_search(query: str, retmax: int = 10) -> list:
150
- """Return list of PMIDs (cached)."""
151
- cached = cache_get("pubmed_search", f"{query}_{retmax}")
152
- if cached is not None:
153
- return cached
154
- try:
155
- time.sleep(0.34)
156
- r = requests.get(
157
- f"{PUBMED_BASE}/esearch.fcgi",
158
- params={"db": "pubmed", "term": query, "retmax": retmax, "retmode": "json"},
159
- timeout=10
160
- )
161
- r.raise_for_status()
162
- ids = r.json()["esearchresult"]["idlist"]
163
- cache_set("pubmed_search", f"{query}_{retmax}", ids)
164
- return ids
165
- except Exception:
166
- return []
167
-
168
-
169
- def pubmed_summary(pmids: list) -> list:
170
- """Fetch summaries for a list of PMIDs."""
171
- if not pmids:
172
- return []
173
- cached = cache_get("pubmed_summary", ",".join(pmids))
174
- if cached is not None:
175
- return cached
176
- try:
177
- time.sleep(0.34)
178
- r = requests.get(
179
- f"{PUBMED_BASE}/esummary.fcgi",
180
- params={"db": "pubmed", "id": ",".join(pmids), "retmode": "json"},
181
- timeout=15
182
- )
183
- r.raise_for_status()
184
- result = r.json().get("result", {})
185
- summaries = [result[pid] for pid in pmids if pid in result]
186
- cache_set("pubmed_summary", ",".join(pmids), summaries)
187
- return summaries
188
- except Exception:
189
- return []
190
-
191
-
192
- def ot_query(gql: str, variables: dict = None) -> dict:
193
- """Run an OpenTargets GraphQL query (cached)."""
194
- key = json.dumps({"q": gql, "v": variables}, sort_keys=True)
195
- cached = cache_get("ot_gql", key)
196
- if cached is not None:
197
- return cached
198
- try:
199
- r = requests.post(
200
- OT_GRAPHQL,
201
- json={"query": gql, "variables": variables or {}},
202
- timeout=20
203
- )
204
- r.raise_for_status()
205
- data = r.json()
206
- cache_set("ot_gql", key, data)
207
- return data
208
- except Exception as e:
209
- return {"error": str(e)}
210
-
211
-
212
- # ─────────────────────────────────────────────
213
- # TAB A1 — GRAY ZONES EXPLORER
214
- # ─────────────────────────────────────────────
215
-
216
- def a1_run(cancer_type: str):
217
- """Build heatmap of biological process × cancer type paper counts."""
218
- today = datetime.date.today().isoformat()
219
- counts = {}
220
- for proc in PROCESSES:
221
- q = f'"{proc}" AND "{cancer_type}"[tiab]'
222
- n = pubmed_count(q)
223
- counts[proc] = n
224
-
225
- df = pd.DataFrame({"process": PROCESSES, cancer_type: [counts[p] for p in PROCESSES]})
226
- df = df.set_index("process")
227
- df = df.replace(-1, np.nan)
228
-
229
- fig, ax = plt.subplots(figsize=(6, 8), facecolor="white")
230
- valid = df[cancer_type].fillna(0).values.reshape(-1, 1)
231
- cmap = plt.cm.get_cmap("YlOrRd")
232
- cmap.set_bad("white")
233
- masked = np.ma.masked_where(df[cancer_type].isna().values.reshape(-1, 1), valid)
234
- im = ax.imshow(masked, aspect="auto", cmap=cmap, vmin=0)
235
- ax.set_xticks([0])
236
- ax.set_xticklabels([cancer_type], fontsize=11, fontweight="bold")
237
- ax.set_yticks(range(len(PROCESSES)))
238
- ax.set_yticklabels(PROCESSES, fontsize=9)
239
- ax.set_title(f"Research Coverage: {cancer_type}\n(PubMed paper count per process)", fontsize=11)
240
- plt.colorbar(im, ax=ax, label="Paper count")
241
- fig.tight_layout()
242
-
243
- buf = io.BytesIO()
244
- fig.savefig(buf, format="png", dpi=150, facecolor="white")
245
- buf.seek(0)
246
- img = Image.open(buf)
247
- plt.close(fig)
248
-
249
- sorted_procs = sorted(
250
- [(p, counts[p]) for p in PROCESSES if counts[p] >= 0],
251
- key=lambda x: x[1]
252
- )
253
- gap_cards = []
254
- for i, (proc, cnt) in enumerate(sorted_procs[:5], 1):
255
- gap_cards.append(
256
- f"**Gap #{i}: {proc}** \n"
257
- f"Papers found: {cnt} \n"
258
- f"Query: `\"{proc}\" AND \"{cancer_type}\"`"
259
- )
260
-
261
- gaps_md = "\n\n---\n\n".join(gap_cards) if gap_cards else "No data available."
262
- journal_log("A1-GrayZones", f"cancer={cancer_type}", f"gaps={[p for p,_ in sorted_procs[:5]]}")
263
- source_note = f"*Source: PubMed E-utilities | Date: {today}*"
264
- return img, gaps_md + "\n\n" + source_note
265
-
266
-
267
- # ─────────────────────────────────────────────
268
- # TAB A2 — UNDERSTUDIED TARGET FINDER
269
- # ─────────────────────────────────────────────
270
 
271
- _depmap_cache = {}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
272
 
273
- def _load_depmap_sample() -> pd.DataFrame:
274
- global _depmap_cache
275
- if "df" in _depmap_cache:
276
- return _depmap_cache["df"]
277
- genes = [
278
- "MYC", "KRAS", "TP53", "EGFR", "PTEN", "RB1", "CDKN2A",
279
- "PIK3CA", "AKT1", "BRAF", "NRAS", "IDH1", "IDH2", "ARID1A",
280
- "SMAD4", "CTNNB1", "VHL", "BRCA1", "BRCA2", "ATM",
281
- "CDK4", "CDK6", "MDM2", "BCL2", "MCL1", "CCND1",
282
- "FGFR1", "FGFR2", "MET", "ALK", "RET", "ERBB2",
283
- "MTOR", "PIK3R1", "STK11", "NF1", "NF2", "TSC1", "TSC2",
284
- ]
285
- rng = np.random.default_rng(42)
286
- scores = rng.uniform(-1.5, 0.3, len(genes))
287
- df = pd.DataFrame({"gene": genes, "gene_effect": scores})
288
- _depmap_cache["df"] = df
289
  return df
290
 
 
 
 
 
291
 
292
- def a2_run(cancer_type: str):
293
- today = datetime.date.today().isoformat()
294
- efo = CANCER_EFO.get(cancer_type, "")
295
-
296
- gql = """
297
- query AssocTargets($efoId: String!, $size: Int!) {
298
- disease(efoId: $efoId) {
299
- associatedTargets(page: {index: 0, size: $size}) {
300
- rows {
301
- target {
302
- approvedSymbol
303
- approvedName
304
- }
305
- score
306
- }
307
- }
308
- }
309
- }
310
- """
311
- ot_data = ot_query(gql, {"efoId": efo, "size": 40})
312
- rows_ot = []
313
- try:
314
- rows_ot = ot_data["data"]["disease"]["associatedTargets"]["rows"]
315
- except (KeyError, TypeError):
316
- pass
317
-
318
- if not rows_ot:
319
- return None, f"⚠️ OpenTargets returned no data for {cancer_type}. Try again later.\n\n*Source: OpenTargets | Date: {today}*"
320
-
321
- genes_ot = [r["target"]["approvedSymbol"] for r in rows_ot]
322
-
323
- paper_counts = {}
324
- for gene in genes_ot[:20]:
325
- q = f'"{gene}" AND "{cancer_type}"[tiab]'
326
- paper_counts[gene] = pubmed_count(q)
327
-
328
- trial_counts = {}
329
- for gene in genes_ot[:20]:
330
- cached = cache_get("ct_gene", f"{gene}_{cancer_type}")
331
- if cached is not None:
332
- trial_counts[gene] = cached
333
- continue
334
- try:
335
- r = requests.get(
336
- f"{CT_BASE}/studies",
337
- params={"query.term": f"{gene} {cancer_type}", "pageSize": 1, "format": "json"},
338
- timeout=10
339
- )
340
- r.raise_for_status()
341
- n = r.json().get("totalCount", 0)
342
- trial_counts[gene] = n
343
- cache_set("ct_gene", f"{gene}_{cancer_type}", n)
344
- except Exception:
345
- trial_counts[gene] = -1
346
-
347
- depmap_df = _load_depmap_sample()
348
- depmap_dict = dict(zip(depmap_df["gene"], depmap_df["gene_effect"]))
349
-
350
- records = []
351
- for gene in genes_ot[:20]:
352
- raw_ess = depmap_dict.get(gene, None)
353
- papers = paper_counts.get(gene, 0)
354
- trials = trial_counts.get(gene, 0)
355
- if raw_ess is None:
356
- ess_display = "N/A"
357
- gap_idx = 0.0
358
- else:
359
- ess_inverted = -raw_ess
360
- ess_display = f"{ess_inverted:.3f}"
361
- papers_safe = max(papers, 0)
362
- gap_idx = ess_inverted / math.log(papers_safe + 2) if ess_inverted > 0 else 0.0
363
- records.append({
364
- "Gene": gene,
365
- "Essentiality (inverted)": ess_display,
366
- "Papers": papers if papers >= 0 else "N/A",
367
- "Trials": trials if trials >= 0 else "N/A",
368
- "Gap_index": round(gap_idx, 3)
369
- })
370
-
371
- result_df = pd.DataFrame(records).sort_values("Gap_index", ascending=False)
372
- note = (
373
- f"*Source: OpenTargets GraphQL + PubMed E-utilities + ClinicalTrials.gov v2 | Date: {today}*\n\n"
374
- f"*Essentiality: inverted DepMap CRISPR gene effect (positive = more essential). "
375
- f"Gap_index = essentiality / log(papers+2)*\n\n"
376
- f"> ⚠️ **Essentiality scores are reference estimates from a curated gene set, not full DepMap data.** "
377
- f"For real analysis, download `CRISPR_gene_effect.csv` from [depmap.org](https://depmap.org/portal/download/all/) "
378
- f"and replace `_load_depmap_sample()` in `app.py`."
379
- )
380
- journal_log("A2-TargetFinder", f"cancer={cancer_type}", f"top_gap={result_df.iloc[0]['Gene'] if len(result_df) else 'none'}")
381
- return result_df, note
382
-
383
-
384
- # ─────────────────────────────────────────────
385
- # TAB A3 — REAL VARIANT LOOKUP
386
- # ─────────────────────────────────────────────
387
 
388
- def a3_run(hgvs: str):
389
- today = datetime.date.today().isoformat()
390
  hgvs = hgvs.strip()
391
- if not hgvs:
392
- return "Please enter an HGVS notation (e.g. NM_007294.4:c.5266dupC)"
393
-
394
- result_parts = []
395
-
396
- clinvar_cached = cache_get("clinvar", hgvs)
397
- if clinvar_cached is None:
398
- try:
399
- time.sleep(0.34)
400
- r = requests.get(
401
- f"{PUBMED_BASE}/esearch.fcgi",
402
- params={"db": "clinvar", "term": hgvs, "retmode": "json", "retmax": 5},
403
- timeout=10
404
- )
405
- r.raise_for_status()
406
- ids = r.json()["esearchresult"]["idlist"]
407
- clinvar_cached = ids
408
- cache_set("clinvar", hgvs, ids)
409
- except Exception:
410
- clinvar_cached = None
411
-
412
- if clinvar_cached and len(clinvar_cached) > 0:
413
- try:
414
- time.sleep(0.34)
415
- r2 = requests.get(
416
- f"{PUBMED_BASE}/esummary.fcgi",
417
- params={"db": "clinvar", "id": ",".join(clinvar_cached[:3]), "retmode": "json"},
418
- timeout=10
419
- )
420
- r2.raise_for_status()
421
- cv_result = r2.json().get("result", {})
422
- cv_rows = []
423
- for vid in clinvar_cached[:3]:
424
- if vid in cv_result:
425
- v = cv_result[vid]
426
- sig = v.get("clinical_significance", {})
427
- if isinstance(sig, dict):
428
- sig_str = sig.get("description", "Unknown")
429
- else:
430
- sig_str = str(sig)
431
- cv_rows.append(
432
- f"- **ClinVar ID {vid}**: {v.get('title','N/A')} | "
433
- f"Classification: **{sig_str}**"
434
- )
435
- if cv_rows:
436
- result_parts.append("### ClinVar Results\n" + "\n".join(cv_rows))
437
- else:
438
- result_parts.append("### ClinVar\nVariant found in index but summary unavailable.")
439
- except Exception:
440
- result_parts.append("### ClinVar\nData unavailable — API error.")
441
- else:
442
- result_parts.append(
443
- "### ClinVar\n"
444
- "**Not found in ClinVar database.**\n"
445
- "> ⚠️ Not in database. Do not interpret."
446
- )
447
-
448
- gnomad_cached = cache_get("gnomad", hgvs)
449
- if gnomad_cached is None:
450
- try:
451
- gql = """
452
- query VariantSearch($query: String!, $dataset: DatasetId!) {
453
- variantSearch(query: $query, dataset: $dataset) {
454
- variant_id
455
- rsids
456
- exome { af }
457
- genome { af }
458
- }
459
- }
460
- """
461
- r3 = requests.post(
462
- GNOMAD_GQL,
463
- json={"query": gql, "variables": {"query": hgvs, "dataset": "gnomad_r4"}},
464
- timeout=15
465
- )
466
- r3.raise_for_status()
467
- gnomad_cached = r3.json()
468
- cache_set("gnomad", hgvs, gnomad_cached)
469
- except Exception:
470
- gnomad_cached = None
471
-
472
- if gnomad_cached and "data" in gnomad_cached:
473
- variants = gnomad_cached["data"].get("variantSearch", [])
474
- if variants:
475
- gn_rows = []
476
- for v in variants[:3]:
477
- vid = v.get("variant_id", "N/A")
478
- rsids = ", ".join(v.get("rsids", [])) or "N/A"
479
- exome_af = v.get("exome", {}) or {}
480
- genome_af = v.get("genome", {}) or {}
481
- af_e = exome_af.get("af", "N/A")
482
- af_g = genome_af.get("af", "N/A")
483
- gn_rows.append(
484
- f"- **{vid}** (rsID: {rsids}) | "
485
- f"Exome AF: {af_e} | Genome AF: {af_g}"
486
- )
487
- result_parts.append("### gnomAD v4 Results\n" + "\n".join(gn_rows))
488
- else:
489
- result_parts.append(
490
- "### gnomAD v4\n"
491
- "**Not found in gnomAD.**\n"
492
- "> ⚠️ Not in database. Do not interpret."
493
- )
494
  else:
495
- result_parts.append(
496
- "### gnomAD v4\n"
497
- "Data unavailable API error or variant not found.\n"
498
- "> ⚠️ Not in database. Do not interpret."
499
- )
500
-
501
- result_parts.append(f"\n*Source: ClinVar E-utilities + gnomAD GraphQL | Date: {today}*")
502
- journal_log("A3-VariantLookup", f"hgvs={hgvs}", result_parts[0][:100])
503
- return "\n\n".join(result_parts)
504
-
505
-
506
- # ─────────────────────────────────────────────
507
- # TAB A4 LITERATURE GAP FINDER
508
- # ─────────────────────────────────────────────
509
-
510
- def a4_run(cancer_type: str, keyword: str):
511
- today = datetime.date.today().isoformat()
512
- keyword = keyword.strip()
513
- if not keyword:
514
- return None, "Please enter a keyword."
515
-
516
- current_year = datetime.date.today().year
517
- years = list(range(current_year - 9, current_year + 1))
518
- counts = []
519
-
520
- for yr in years:
521
- q = f'"{keyword}" AND "{cancer_type}"[tiab] AND {yr}[pdat]'
522
- n = pubmed_count(q)
523
- counts.append(max(n, 0))
524
-
525
- avg = np.mean([c for c in counts if c > 0]) if any(c > 0 for c in counts) else 0
526
- gaps = [yr for yr, c in zip(years, counts) if c == 0]
527
- low_years = [yr for yr, c in zip(years, counts) if 0 < c < avg * 0.3]
528
-
529
- fig, ax = plt.subplots(figsize=(9, 4), facecolor="white")
530
- bar_colors = []
531
- for c in counts:
532
- if c == 0:
533
- bar_colors.append("#d73027")
534
- elif c < avg * 0.3:
535
- bar_colors.append("#fc8d59")
536
- else:
537
- bar_colors.append("#4393c3")
538
-
539
- ax.bar(years, counts, color=bar_colors, edgecolor="white", linewidth=0.5)
540
- ax.axhline(avg, color="#555", linestyle="--", linewidth=1, label=f"Avg: {avg:.1f}")
541
- ax.set_xlabel("Year", fontsize=11)
542
- ax.set_ylabel("PubMed Papers", fontsize=11)
543
- ax.set_title(f'Literature Trend: "{keyword}" in {cancer_type}', fontsize=12)
544
- ax.set_xticks(years)
545
- ax.set_xticklabels([str(y) for y in years], rotation=45, ha="right")
546
- ax.legend(fontsize=9)
547
- ax.set_facecolor("white")
548
- fig.tight_layout()
549
-
550
- buf = io.BytesIO()
551
- fig.savefig(buf, format="png", dpi=150, facecolor="white")
552
- buf.seek(0)
553
- img = Image.open(buf)
554
- plt.close(fig)
555
-
556
- gap_text = []
557
- if gaps:
558
- gap_text.append(f"**Zero-publication years:** {', '.join(map(str, gaps))}")
559
- if low_years:
560
- gap_text.append(f"**Low-activity years (<30% avg):** {', '.join(map(str, low_years))}")
561
- if not gaps and not low_years:
562
- gap_text.append("No significant gaps detected in the last 10 years.")
563
-
564
- summary = "\n\n".join(gap_text)
565
- summary += f"\n\n*Source: PubMed E-utilities | Date: {today}*"
566
- journal_log("A4-LitGap", f"cancer={cancer_type}, kw={keyword}", summary[:100])
567
- return img, summary
568
-
569
-
570
- # ─────────────────────────────────────────────
571
- # TAB A5 — DRUGGABLE ORPHANS
572
- # ─────────────────────────────────────────────
573
-
574
- def a5_run(cancer_type: str):
575
- today = datetime.date.today().isoformat()
576
- efo = CANCER_EFO.get(cancer_type, "")
577
-
578
- gql = """
579
- query DruggableTargets($efoId: String!, $size: Int!) {
580
- disease(efoId: $efoId) {
581
- associatedTargets(page: {index: 0, size: $size}) {
582
- rows {
583
- target {
584
- approvedSymbol
585
- approvedName
586
- tractability {
587
- label
588
- modality
589
- value
590
- }
591
- knownDrugs {
592
- count
593
- }
594
- }
595
- score
596
- }
597
- }
598
- }
599
- }
600
- """
601
- ot_data = ot_query(gql, {"efoId": efo, "size": 50})
602
- rows_ot = []
603
- try:
604
- rows_ot = ot_data["data"]["disease"]["associatedTargets"]["rows"]
605
- except (KeyError, TypeError):
606
- pass
607
-
608
- if not rows_ot:
609
- return None, f"⚠️ OpenTargets returned no data for {cancer_type}.\n\n*Source: OpenTargets | Date: {today}*"
610
-
611
- orphan_candidates = []
612
- for row in rows_ot:
613
- t = row["target"]
614
- gene = t["approvedSymbol"]
615
- drug_count = 0
616
- try:
617
- drug_count = t["knownDrugs"]["count"] or 0
618
- except (KeyError, TypeError):
619
- drug_count = 0
620
- if drug_count == 0:
621
- orphan_candidates.append({"gene": gene, "name": t.get("approvedName", ""), "ot_score": row["score"]})
622
-
623
- records = []
624
- for cand in orphan_candidates[:15]:
625
- gene = cand["gene"]
626
- cached = cache_get("ct_orphan", f"{gene}_{cancer_type}")
627
- if cached is not None:
628
- trial_count = cached
629
- else:
630
- try:
631
- r = requests.get(
632
- f"{CT_BASE}/studies",
633
- params={"query.term": f"{gene} {cancer_type}", "pageSize": 1, "format": "json"},
634
- timeout=10
635
- )
636
- r.raise_for_status()
637
- trial_count = r.json().get("totalCount", 0)
638
- cache_set("ct_orphan", f"{gene}_{cancer_type}", trial_count)
639
- except Exception:
640
- trial_count = -1
641
-
642
- records.append({
643
- "Gene": gene,
644
- "Name": cand["name"][:50],
645
- "OT_Score": round(cand["ot_score"], 3),
646
- "Known_Drugs": 0,
647
- "Active_Trials": trial_count if trial_count >= 0 else "N/A",
648
- "Status": "🔴 Orphan" if trial_count == 0 else ("⚠️ Trials only" if trial_count > 0 else "❓ Unknown")
649
- })
650
-
651
- df = pd.DataFrame(records)
652
- note = (
653
- f"*Source: OpenTargets GraphQL + ClinicalTrials.gov v2 | Date: {today}*\n\n"
654
- f"*Orphan = no approved drug (OpenTargets knownDrugs.count = 0)*"
655
  )
656
- journal_log("A5-DruggableOrphans", f"cancer={cancer_type}", f"orphans={len(df)}")
657
- return df, note
658
-
659
-
660
- # ─────────────────────────────────────────────
661
- # GROUP B — LEARNING SANDBOX
662
- # ─────────────────────────────────────────────
663
-
664
- SIMULATED_BANNER = (
665
- "⚠️ **SIMULATED DATA** — This tab uses rule-based models and synthetic data "
666
- "for educational purposes only. Results do NOT reflect real experimental outcomes."
667
- )
668
-
669
- # ── TAB B1 — miRNA Explorer ──────────────────
670
-
671
- MIRNA_DB = {
672
- "BRCA2": {
673
- "miRNAs": ["miR-146a-5p", "miR-21-5p", "miR-155-5p", "miR-182-5p", "miR-205-5p"],
674
- "binding_energy": [-18.4, -15.2, -12.7, -14.1, -16.8],
675
- "seed_match": ["7mer-m8", "6mer", "7mer-A1", "8mer", "7mer-m8"],
676
- "expression_change": [-2.1, +1.8, +2.3, -1.5, -3.2],
677
- "cancer_context": "BRCA2 loss-of-function is associated with HR-deficient breast/ovarian cancer. "
678
- "miR-146a-5p and miR-205-5p are frequently downregulated in BRCA2-mutant tumors.",
679
- },
680
- "BRCA1": {
681
- "miRNAs": ["miR-17-5p", "miR-20a-5p", "miR-93-5p", "miR-182-5p", "miR-9-5p"],
682
- "binding_energy": [-16.1, -13.5, -14.9, -15.3, -11.8],
683
- "seed_match": ["8mer", "7mer-m8", "7mer-A1", "8mer", "6mer"],
684
- "expression_change": [+1.9, +2.1, +1.6, -1.8, +2.4],
685
- "cancer_context": "BRCA1 regulates DNA damage response. miR-17/20a cluster is upregulated "
686
- "in BRCA1-deficient tumors and suppresses apoptosis.",
687
- },
688
- "TP53": {
689
- "miRNAs": ["miR-34a-5p", "miR-125b-5p", "miR-504-5p", "miR-25-3p", "miR-30d-5p"],
690
- "binding_energy": [-19.2, -14.6, -13.1, -12.4, -15.7],
691
- "seed_match": ["8mer", "7mer-m8", "7mer-A1", "6mer", "8mer"],
692
- "expression_change": [-3.5, +1.2, +1.7, +2.0, -1.3],
693
- "cancer_context": "TP53 is the most mutated gene in cancer. miR-34a is a direct p53 transcriptional "
694
- "target; its loss promotes tumor progression across cancer types.",
695
- },
696
- }
697
-
698
- def b1_run(gene: str):
699
- db = MIRNA_DB.get(gene, {})
700
- if not db:
701
- return None, "Gene not found in simulation database."
702
-
703
- mirnas = db["miRNAs"]
704
- energies = db["binding_energy"]
705
- changes = db["expression_change"]
706
- seeds = db["seed_match"]
707
-
708
- fig, axes = plt.subplots(1, 2, figsize=(11, 4), facecolor="white")
709
-
710
- colors_e = ["#d73027" if e < -16 else "#fc8d59" if e < -13 else "#4393c3" for e in energies]
711
- axes[0].barh(mirnas, [-e for e in energies], color=colors_e, edgecolor="white")
712
- axes[0].set_xlabel("Binding Energy (|kcal/mol|)", fontsize=10)
713
- axes[0].set_title(f"Predicted Binding Energy\n{gene} miRNA targets", fontsize=10)
714
- axes[0].set_facecolor("white")
715
-
716
- colors_x = ["#d73027" if c < 0 else "#4393c3" for c in changes]
717
- axes[1].barh(mirnas, changes, color=colors_x, edgecolor="white")
718
- axes[1].axvline(0, color="black", linewidth=0.8)
719
- axes[1].set_xlabel("Expression Change (log2FC)", fontsize=10)
720
- axes[1].set_title(f"miRNA Expression in {gene}-mutant tumors\n(⚠️ SIMULATED)", fontsize=10)
721
- axes[1].set_facecolor("white")
722
 
723
- fig.tight_layout()
724
- buf = io.BytesIO()
725
- fig.savefig(buf, format="png", dpi=150, facecolor="white")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
726
  buf.seek(0)
727
- img = Image.open(buf)
728
- plt.close(fig)
729
-
730
- df = pd.DataFrame({
731
- "miRNA": mirnas,
732
- "Binding Energy (kcal/mol)": energies,
733
- "Seed Match": seeds,
734
- "Expression log2FC": changes,
735
- })
736
- context = f"\n\n**Cancer Context:** {db['cancer_context']}"
737
- journal_log("B1-miRNA", f"gene={gene}", f"top_miRNA={mirnas[0]}")
738
- return img, df.to_markdown(index=False) + context
739
-
740
-
741
- # ── TAB B2 — siRNA Targets ───────────────────
742
-
743
- SIRNA_DB = {
744
- "LUAD": {
745
- "targets": ["KRAS G12C", "EGFR exon19del", "ALK fusion", "MET exon14", "RET fusion"],
746
- "efficacy": [0.82, 0.91, 0.76, 0.68, 0.71],
747
- "off_target_risk": ["Medium", "Low", "Low", "Medium", "Low"],
748
- "delivery_challenge": ["High", "Medium", "Medium", "High", "Medium"],
749
- },
750
- "BRCA": {
751
- "targets": ["BRCA1 exon11", "BRCA2 exon11", "PIK3CA H1047R", "AKT1 E17K", "ESR1 Y537S"],
752
- "efficacy": [0.78, 0.85, 0.88, 0.72, 0.65],
753
- "off_target_risk": ["Low", "Low", "Medium", "Low", "High"],
754
- "delivery_challenge": ["Medium", "Medium", "Low", "Low", "High"],
755
- },
756
- "COAD": {
757
- "targets": ["KRAS G12D", "APC truncation", "BRAF V600E", "SMAD4 loss", "PIK3CA E545K"],
758
- "efficacy": [0.79, 0.61, 0.93, 0.55, 0.84],
759
- "off_target_risk": ["Medium", "High", "Low", "Medium", "Low"],
760
- "delivery_challenge": ["High", "High", "Low", "High", "Low"],
761
- },
762
- }
763
-
764
- def b2_run(cancer: str):
765
- db = SIRNA_DB.get(cancer, {})
766
- if not db:
767
- return None, "Cancer type not in simulation database."
768
-
769
- targets = db["targets"]
770
- efficacy = db["efficacy"]
771
- off_risk = db["off_target_risk"]
772
- delivery = db["delivery_challenge"]
773
-
774
- fig, ax = plt.subplots(figsize=(8, 4), facecolor="white")
775
- risk_color = {"Low": "#4393c3", "Medium": "#fc8d59", "High": "#d73027"}
776
- colors = [risk_color.get(r, "#aaa") for r in off_risk]
777
- ax.barh(targets, efficacy, color=colors, edgecolor="white")
778
- ax.set_xlim(0, 1.1)
779
- ax.set_xlabel("Predicted siRNA Efficacy (⚠️ SIMULATED)", fontsize=10)
780
- ax.set_title(f"siRNA Target Efficacy — {cancer}", fontsize=11)
781
- ax.set_facecolor("white")
782
- from matplotlib.patches import Patch
783
- legend_elements = [Patch(facecolor=v, label=k) for k, v in risk_color.items()]
784
- ax.legend(handles=legend_elements, title="Off-target Risk", fontsize=8, loc="lower right")
785
- fig.tight_layout()
786
-
787
- buf = io.BytesIO()
788
- fig.savefig(buf, format="png", dpi=150, facecolor="white")
789
  buf.seek(0)
790
- img = Image.open(buf)
791
- plt.close(fig)
792
-
793
- df = pd.DataFrame({
794
- "Target": targets,
795
- "Efficacy": efficacy,
796
- "Off-target Risk": off_risk,
797
- "Delivery Challenge": delivery,
798
- })
799
- journal_log("B2-siRNA", f"cancer={cancer}", f"top={targets[0]}")
800
- return img, df.to_markdown(index=False)
801
-
802
-
803
- # ── TAB B3 — LNP Corona Simulator ───────────────
804
-
805
- def b3_run(peg_mol_pct: float, ionizable_pct: float, helper_pct: float,
806
- chol_pct: float, particle_size_nm: float, serum_pct: float):
807
- total_lipid = peg_mol_pct + ionizable_pct + helper_pct + chol_pct
808
- peg_norm = peg_mol_pct / max(total_lipid, 1)
809
-
810
- corona_proteins = {
811
- "ApoE": max(0, 0.35 - peg_norm * 0.8 + ionizable_pct * 0.01),
812
- "ApoA-I": max(0, 0.20 - ionizable_pct * 0.005 + chol_pct * 0.003),
813
- "Fibrinogen": max(0, 0.15 + (particle_size_nm - 100) * 0.001 - peg_norm * 0.3),
814
- "Albumin": max(0, 0.10 + serum_pct * 0.002 - peg_norm * 0.2),
815
- "Clusterin": max(0, 0.08 + peg_norm * 0.15),
816
- "IgG": max(0, 0.07 + serum_pct * 0.001),
817
- "Complement C3": max(0, 0.05 + ionizable_pct * 0.003 - peg_norm * 0.1),
818
- }
819
- total = sum(corona_proteins.values())
820
- if total > 0:
821
- corona_proteins = {k: v / total for k, v in corona_proteins.items()}
822
-
823
- fig, axes = plt.subplots(1, 2, figsize=(11, 4), facecolor="white")
824
-
825
- labels = list(corona_proteins.keys())
826
- sizes = list(corona_proteins.values())
827
- colors_pie = plt.cm.Set2(np.linspace(0, 1, len(labels)))
828
- axes[0].pie(sizes, labels=labels, colors=colors_pie, autopct="%1.1f%%", startangle=90)
829
- axes[0].set_title("Predicted Corona Composition\n(⚠️ SIMULATED)", fontsize=10)
830
-
831
- axes[1].bar(labels, sizes, color=colors_pie, edgecolor="white")
832
- axes[1].set_ylabel("Relative Abundance", fontsize=10)
833
- axes[1].set_title("Corona Protein Fractions", fontsize=10)
834
- axes[1].set_xticklabels(labels, rotation=45, ha="right", fontsize=8)
835
- axes[1].set_facecolor("white")
836
-
837
- fig.tight_layout()
838
- buf = io.BytesIO()
839
- fig.savefig(buf, format="png", dpi=150, facecolor="white")
840
  buf.seek(0)
841
- img = Image.open(buf)
842
- plt.close(fig)
843
-
844
- apoe_pct = corona_proteins.get("ApoE", 0) * 100
845
- interpretation = (
846
- f"**ApoE fraction: {apoe_pct:.1f}%** — "
847
- + ("High ApoE → enhanced brain/liver targeting via LDLR pathway." if apoe_pct > 25
848
- else "Low ApoE reduced receptor-mediated uptake.")
849
- )
850
- journal_log("B3-LNPCorona", f"PEG={peg_mol_pct}%,size={particle_size_nm}nm", f"ApoE={apoe_pct:.1f}%")
851
- return img, interpretation
852
-
853
-
854
- # ── TAB B4 — Flow Corona (Vroman Kinetics) ──────
855
-
856
- def b4_run(time_points: int, kon_albumin: float, kon_apoe: float,
857
- koff_albumin: float, koff_apoe: float):
858
- t = np.linspace(0, time_points, 500)
859
-
860
- albumin = (kon_albumin / (kon_albumin + koff_albumin)) * (1 - np.exp(-(kon_albumin + koff_albumin) * t))
861
- apoe_delay = np.maximum(0, t - 5)
862
- apoe = (kon_apoe / (kon_apoe + koff_apoe)) * (1 - np.exp(-(kon_apoe + koff_apoe) * apoe_delay))
863
- albumin_displaced = albumin * np.exp(-apoe * 2)
864
- fibrinogen = 0.3 * (1 - np.exp(-0.05 * t)) * np.exp(-apoe * 1.5)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
865
 
866
- fig, ax = plt.subplots(figsize=(9, 4), facecolor="white")
867
- ax.plot(t, albumin_displaced, label="Albumin (displaced)", color="#4393c3", linewidth=2)
868
- ax.plot(t, apoe, label="ApoE (hard corona)", color="#d73027", linewidth=2)
869
- ax.plot(t, fibrinogen, label="Fibrinogen", color="#fc8d59", linewidth=2, linestyle="--")
870
- ax.set_xlabel("Time (min)", fontsize=11)
871
- ax.set_ylabel("Surface Coverage (a.u.)", fontsize=11)
872
- ax.set_title("Vroman Effect Competitive Protein Adsorption\n(⚠️ SIMULATED)", fontsize=11)
873
- ax.legend(fontsize=9)
874
- ax.set_facecolor("white")
875
- fig.tight_layout()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
876
 
877
- buf = io.BytesIO()
878
- fig.savefig(buf, format="png", dpi=150, facecolor="white")
879
- buf.seek(0)
880
- img = Image.open(buf)
881
- plt.close(fig)
882
 
883
- vroman_time = t[np.argmax(albumin_displaced > apoe * 0.9)] if any(albumin_displaced > apoe * 0.9) else "N/A"
884
- note = (
885
- f"**Vroman crossover** (albumin ApoE dominance): ~{vroman_time:.1f} min\n\n"
886
- "The Vroman effect describes sequential protein displacement: "
887
- "abundant proteins (albumin) adsorb first, then are displaced by higher-affinity proteins (ApoE, fibrinogen)."
888
  )
889
- journal_log("B4-FlowCorona", f"kon_alb={kon_albumin},kon_apoe={kon_apoe}", note[:80])
890
- return img, note
891
-
892
 
893
- # ── TAB B5 — Variant Concepts ───────────────────
894
-
895
- VARIANT_RULES = {
896
- "Pathogenic": {
897
- "criteria": ["Nonsense mutation in tumor suppressor", "Frameshift in BRCA1/2",
898
- "Splice site ±1/2 in essential gene", "Known hotspot (e.g. TP53 R175H)"],
899
- "acmg_codes": ["PVS1", "PS1", "PS2", "PM2"],
900
- "explanation": "Strong evidence of pathogenicity. Likely disrupts protein function via LOF or dominant-negative mechanism.",
901
- },
902
- "Likely Pathogenic": {
903
- "criteria": ["Missense in functional domain", "In silico tools predict damaging",
904
- "Low population frequency (<0.01%)", "Segregates with disease"],
905
- "acmg_codes": ["PM1", "PM2", "PP2", "PP3"],
906
- "explanation": "Moderate-strong evidence. Functional studies or segregation data would upgrade to Pathogenic.",
907
- },
908
- "VUS": {
909
- "criteria": ["Missense with conflicting evidence", "Moderate population frequency",
910
- "Uncertain functional impact", "Limited segregation data"],
911
- "acmg_codes": ["PM2", "BP4", "BP6"],
912
- "explanation": "Variant of Uncertain Significance. Insufficient evidence to classify. Functional assays recommended.",
913
- },
914
- "Likely Benign": {
915
- "criteria": ["Common in population (>1%)", "Synonymous with no splicing impact",
916
- "Observed in healthy controls", "Computational tools predict benign"],
917
- "acmg_codes": ["BS1", "BP1", "BP4", "BP7"],
918
- "explanation": "Evidence suggests benign. Unlikely to cause disease but not fully excluded.",
919
- },
920
- "Benign": {
921
- "criteria": ["High population frequency (>5%)", "No disease association in large studies",
922
- "Synonymous, no functional impact", "Functional studies show no effect"],
923
- "acmg_codes": ["BA1", "BS1", "BS2", "BS3"],
924
- "explanation": "Strong evidence of benign nature. Not expected to contribute to disease.",
925
- },
926
- }
927
-
928
- def b5_run(classification: str):
929
- data = VARIANT_RULES.get(classification, {})
930
- if not data:
931
- return "Classification not found."
932
-
933
- criteria_md = "\n".join([f"- {c}" for c in data["criteria"]])
934
- acmg_md = " | ".join([f"`{code}`" for code in data["acmg_codes"]])
935
- output = (
936
- f"## {classification}\n\n"
937
- f"**ACMG/AMP Codes:** {acmg_md}\n\n"
938
- f"**Typical Criteria:**\n{criteria_md}\n\n"
939
- f"**Interpretation:** {data['explanation']}\n\n"
940
- f"> ⚠️ SIMULATED — This is a rule-based educational model only. "
941
- f"Real variant classification requires expert review and full ACMG/AMP criteria evaluation."
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
942
  )
943
- journal_log("B5-VariantConcepts", f"class={classification}", output[:100])
944
- return output
945
-
946
-
947
- # ─────────────────────────────────────────────
948
- # GRADIO UI ASSEMBLY
949
- # ─────────────────────────────────────────────
950
-
951
- CUSTOM_CSS = """
952
- body { font-family: 'Inter', sans-serif; }
953
- .simulated-banner {
954
- background: #fff3cd; border: 1px solid #ffc107;
955
- border-radius: 6px; padding: 10px 14px;
956
- font-weight: 600; color: #856404; margin-bottom: 8px;
957
- }
958
- .source-note { color: #6c757d; font-size: 0.85em; margin-top: 6px; }
959
- .gap-card {
960
- background: #f8f9fa; border-left: 4px solid #d73027;
961
- padding: 10px 14px; margin: 6px 0; border-radius: 4px;
962
- }
963
- footer { display: none !important; }
964
- """
965
-
966
-
967
- def build_app():
968
- with gr.Blocks(css=CUSTOM_CSS, title="K R&D Lab — Cancer Research Suite") as demo:
969
- gr.Markdown(
970
- "# 🔬 K R&D Lab — Cancer Research Suite\n"
971
- "**Author:** Oksana Kolisnyk | [kosatiks-group.pp.ua](https://kosatiks-group.pp.ua) \n"
972
- "**Repo:** [github.com/TEZv/K-RnD-Lab-PHYLO-03_2026](https://github.com/TEZv/K-RnD-Lab-PHYLO-03_2026)"
973
- )
974
-
975
- with gr.Row():
976
- with gr.Column(scale=4):
977
- with gr.Tabs():
978
-
979
- # ════════════════════════════════
980
- # GROUP A — REAL DATA TOOLS
981
- # ════════════════════════════════
982
- with gr.Tab("🔬 Real Data Tools"):
983
- with gr.Tabs():
984
-
985
- with gr.Tab("🔍 Gray Zones Explorer"):
986
- gr.Markdown(
987
- "Identify underexplored biological processes in a cancer type "
988
- "using live PubMed + OpenTargets data."
989
- )
990
- a1_cancer = gr.Dropdown(CANCER_TYPES, label="Cancer Type", value="GBM")
991
- a1_btn = gr.Button("🔍 Explore Gray Zones", variant="primary")
992
- a1_heatmap = gr.Image(label="Research Coverage Heatmap", type="pil")
993
- a1_gaps = gr.Markdown(label="Top 5 Research Gaps")
994
- with gr.Accordion("📖 Learning Mode", open=False):
995
- gr.Markdown(
996
- "**What is a research gray zone?**\n\n"
997
- "A gray zone is a biological process that is well-studied in other cancers "
998
- "but has very few publications in your selected cancer type. "
999
- "Low paper counts (red/white cells) indicate potential unexplored territory.\n\n"
1000
- "**How to use:** Select a rare cancer (e.g. DIPG, MCC) to find the most "
1001
- "underexplored processes. Cross-reference with Tab A2 to find targetable genes."
1002
- )
1003
- a1_btn.click(a1_run, inputs=[a1_cancer], outputs=[a1_heatmap, a1_gaps])
1004
-
1005
- with gr.Tab("🎯 Understudied Target Finder"):
1006
- gr.Markdown(
1007
- "Find essential genes with high research gap index "
1008
- "(high essentiality, low publication coverage)."
1009
- )
1010
- gr.Markdown(
1011
- "> ⚠️ **Essentiality scores are placeholder estimates** from a "
1012
- "curated reference gene set — **not real DepMap data**. "
1013
- "Association scores and paper/trial counts are fetched live. "
1014
- "For real essentiality values, download `CRISPR_gene_effect.csv` "
1015
- "from [depmap.org](https://depmap.org/portal/download/all/) and "
1016
- "replace `_load_depmap_sample()` in `app.py`."
1017
- )
1018
- a2_cancer = gr.Dropdown(CANCER_TYPES, label="Cancer Type", value="GBM")
1019
- a2_btn = gr.Button("🎯 Find Understudied Targets", variant="primary")
1020
- a2_table = gr.Dataframe(label="Target Gap Table", wrap=True)
1021
- a2_note = gr.Markdown()
1022
- with gr.Accordion("📖 Learning Mode", open=False):
1023
- gr.Markdown(
1024
- "**Gap Index formula:** `essentiality / log(papers + 1)`\n\n"
1025
- "- **Essentiality**: inverted DepMap CRISPR gene effect score\n"
1026
- "- **Papers**: PubMed count for gene + cancer type\n"
1027
- "- **High Gap Index** = essential gene with few publications = high research opportunity"
1028
- )
1029
- a2_btn.click(a2_run, inputs=[a2_cancer], outputs=[a2_table, a2_note])
1030
-
1031
- with gr.Tab("🧬 Real Variant Lookup"):
1032
- gr.Markdown(
1033
- "Look up a variant in **ClinVar** and **gnomAD**. "
1034
- "Results are fetched live — never hallucinated."
1035
- )
1036
- a3_hgvs = gr.Textbox(
1037
- label="HGVS Notation",
1038
- placeholder="e.g. NM_007294.4:c.5266dupC or NM_000546.6:c.524G>A",
1039
- lines=1
1040
- )
1041
- a3_btn = gr.Button("🔎 Look Up Variant", variant="primary")
1042
- a3_result = gr.Markdown()
1043
- with gr.Accordion("📖 Learning Mode", open=False):
1044
- gr.Markdown(
1045
- "**HGVS notation format:**\n"
1046
- "- `NM_XXXXXX.X:c.NNNN[change]` — coding DNA reference\n"
1047
- "- `NC_XXXXXX.X:g.NNNN[change]` — genomic reference\n\n"
1048
- "**Important:** If a variant is not found, this tool returns "
1049
- "'Not in database. Do not interpret.' — never a fabricated result."
1050
- )
1051
- a3_btn.click(a3_run, inputs=[a3_hgvs], outputs=[a3_result])
1052
-
1053
- with gr.Tab("📰 Literature Gap Finder"):
1054
- gr.Markdown(
1055
- "Visualize publication trends over 10 years and detect "
1056
- "years with low research activity."
1057
- )
1058
- with gr.Row():
1059
- a4_cancer = gr.Dropdown(CANCER_TYPES, label="Cancer Type", value="GBM")
1060
- a4_kw = gr.Textbox(label="Keyword", placeholder="e.g. ferroptosis", lines=1)
1061
- a4_btn = gr.Button("📊 Analyze Literature Trend", variant="primary")
1062
- a4_chart = gr.Image(label="Papers per Year", type="pil")
1063
- a4_gaps = gr.Markdown()
1064
- with gr.Accordion("📖 Learning Mode", open=False):
1065
- gr.Markdown(
1066
- "**How to read the chart:**\n"
1067
- "- 🔵 Blue bars = normal activity\n"
1068
- "- 🟠 Orange bars = low activity (<30% of average)\n"
1069
- "- 🔴 Red bars = zero publications (true gap)"
1070
- )
1071
- a4_btn.click(a4_run, inputs=[a4_cancer, a4_kw], outputs=[a4_chart, a4_gaps])
1072
-
1073
- with gr.Tab("💊 Druggable Orphans"):
1074
- gr.Markdown(
1075
- "Identify cancer-associated essential genes with **no approved drug** "
1076
- "and **no active clinical trial**."
1077
- )
1078
- a5_cancer = gr.Dropdown(CANCER_TYPES, label="Cancer Type", value="GBM")
1079
- a5_btn = gr.Button("💊 Find Druggable Orphans", variant="primary")
1080
- a5_table = gr.Dataframe(label="Orphan Target Table", wrap=True)
1081
- a5_note = gr.Markdown()
1082
- with gr.Accordion("📖 Learning Mode", open=False):
1083
- gr.Markdown(
1084
- "**What is a druggable orphan?**\n\n"
1085
- "A gene that is strongly associated with a cancer but has no approved drug "
1086
- "and no active clinical trial. These represent the highest-opportunity "
1087
- "targets for drug discovery."
1088
- )
1089
- a5_btn.click(a5_run, inputs=[a5_cancer], outputs=[a5_table, a5_note])
1090
-
1091
- with gr.Tab("🤖 Research Assistant"):
1092
- gr.Markdown(
1093
- "**RAG-powered research assistant** indexed on 20 curated papers "
1094
- "on LNP delivery, protein corona, and cancer variants.\n\n"
1095
- "*Powered by sentence-transformers + FAISS — no API key required.*"
1096
- )
1097
- try:
1098
- from chatbot import build_chatbot_tab
1099
- build_chatbot_tab()
1100
- except ImportError:
1101
- gr.Markdown(
1102
- "⚠️ `chatbot.py` not found. Please ensure it is in the same directory as `app.py`."
1103
- )
1104
-
1105
- # ════════════════════════════════
1106
- # GROUP B — LEARNING SANDBOX
1107
- # ════════════════════════════════
1108
- with gr.Tab("📚 Learning Sandbox"):
1109
- gr.Markdown(
1110
- "> ⚠️ **ALL TABS IN THIS GROUP USE SIMULATED DATA** — "
1111
- "For educational purposes only. Results do not reflect real experiments."
1112
- )
1113
- with gr.Tabs():
1114
-
1115
- with gr.Tab("🧬 miRNA Explorer"):
1116
- gr.Markdown(SIMULATED_BANNER)
1117
- b1_gene = gr.Dropdown(["BRCA2", "BRCA1", "TP53"], label="Gene", value="TP53")
1118
- b1_btn = gr.Button("🔬 Explore miRNA Interactions", variant="primary")
1119
- b1_plot = gr.Image(label="miRNA Binding & Expression (⚠️ SIMULATED)", type="pil")
1120
- b1_table = gr.Markdown()
1121
- with gr.Accordion("📖 Learning Mode", open=False):
1122
- gr.Markdown(
1123
- "**miRNA biology basics:**\n\n"
1124
- "- miRNAs are ~22 nt non-coding RNAs that bind 3'UTR of mRNAs\n"
1125
- "- Seed match types: 8mer > 7mer-m8 > 7mer-A1 > 6mer (binding strength)\n"
1126
- "- Negative binding energy = stronger predicted interaction"
1127
- )
1128
- b1_btn.click(b1_run, inputs=[b1_gene], outputs=[b1_plot, b1_table])
1129
-
1130
- with gr.Tab("🎯 siRNA Targets"):
1131
- gr.Markdown(SIMULATED_BANNER)
1132
- b2_cancer = gr.Dropdown(["LUAD", "BRCA", "COAD"], label="Cancer Type", value="LUAD")
1133
- b2_btn = gr.Button("🎯 Simulate siRNA Efficacy", variant="primary")
1134
- b2_plot = gr.Image(label="siRNA Efficacy (⚠️ SIMULATED)", type="pil")
1135
- b2_table = gr.Markdown()
1136
- with gr.Accordion("📖 Learning Mode", open=False):
1137
- gr.Markdown(
1138
- "**siRNA design principles:**\n\n"
1139
- "- siRNAs are 21-23 nt dsRNA that trigger RISC-mediated mRNA cleavage\n"
1140
- "- Off-target risk: seed region complementarity to unintended mRNAs\n"
1141
- "- Delivery challenge: endosomal escape, serum stability, tumor penetration"
1142
- )
1143
- b2_btn.click(b2_run, inputs=[b2_cancer], outputs=[b2_plot, b2_table])
1144
-
1145
- with gr.Tab("🧪 LNP Corona"):
1146
- gr.Markdown(SIMULATED_BANNER)
1147
- with gr.Row():
1148
- b3_peg = gr.Slider(0.5, 5.0, value=1.5, step=0.1, label="PEG mol% (lipid)")
1149
- b3_ion = gr.Slider(10, 60, value=50, step=1, label="Ionizable lipid mol%")
1150
- with gr.Row():
1151
- b3_helper = gr.Slider(5, 30, value=10, step=1, label="Helper lipid mol%")
1152
- b3_chol = gr.Slider(10, 50, value=38, step=1, label="Cholesterol mol%")
1153
- with gr.Row():
1154
- b3_size = gr.Slider(50, 300, value=100, step=5, label="Particle size (nm)")
1155
- b3_serum = gr.Slider(0, 100, value=10, step=5, label="Serum % in medium")
1156
- b3_btn = gr.Button("🧪 Simulate Corona", variant="primary")
1157
- b3_plot = gr.Image(label="Corona Composition (⚠️ SIMULATED)", type="pil")
1158
- b3_interp = gr.Markdown()
1159
- with gr.Accordion("📖 Learning Mode", open=False):
1160
- gr.Markdown(
1161
- "**Protein corona basics:**\n\n"
1162
- "- Hard corona: tightly bound, long-lived proteins (ApoE, fibrinogen)\n"
1163
- "- Soft corona: loosely bound, rapidly exchanging proteins (albumin)\n"
1164
- "- ApoE enrichment → enhanced brain targeting via LDLR/LRP1 receptors\n"
1165
- "- PEG reduces corona formation"
1166
- )
1167
- b3_btn.click(
1168
- b3_run,
1169
- inputs=[b3_peg, b3_ion, b3_helper, b3_chol, b3_size, b3_serum],
1170
- outputs=[b3_plot, b3_interp]
1171
- )
1172
-
1173
- with gr.Tab("🌊 Flow Corona"):
1174
- gr.Markdown(SIMULATED_BANNER)
1175
- with gr.Row():
1176
- b4_time = gr.Slider(10, 120, value=60, step=5, label="Time range (min)")
1177
- b4_kon_alb = gr.Slider(0.01, 1.0, value=0.3, step=0.01, label="kon Albumin")
1178
- with gr.Row():
1179
- b4_kon_apoe = gr.Slider(0.001, 0.5, value=0.05, step=0.001, label="kon ApoE")
1180
- b4_koff_alb = gr.Slider(0.01, 1.0, value=0.2, step=0.01, label="koff Albumin")
1181
- b4_koff_apoe = gr.Slider(0.001, 0.1, value=0.01, step=0.001, label="koff ApoE")
1182
- b4_btn = gr.Button("🌊 Simulate Vroman Kinetics", variant="primary")
1183
- b4_plot = gr.Image(label="Vroman Effect (⚠️ SIMULATED)", type="pil")
1184
- b4_note = gr.Markdown()
1185
- with gr.Accordion("📖 Learning Mode", open=False):
1186
- gr.Markdown(
1187
- "**The Vroman Effect:** Proteins with high abundance but low affinity "
1188
- "(albumin) adsorb first, then are displaced by lower-abundance but "
1189
- "higher-affinity proteins (fibrinogen, ApoE).\n\n"
1190
- "**Clinical implication:** The final hard corona (not initial) determines "
1191
- "nanoparticle fate in vivo."
1192
- )
1193
- b4_btn.click(
1194
- b4_run,
1195
- inputs=[b4_time, b4_kon_alb, b4_kon_apoe, b4_koff_alb, b4_koff_apoe],
1196
- outputs=[b4_plot, b4_note]
1197
- )
1198
-
1199
- with gr.Tab("🔬 Variant Concepts"):
1200
- gr.Markdown(SIMULATED_BANNER)
1201
- b5_class = gr.Dropdown(
1202
- list(VARIANT_RULES.keys()),
1203
- label="ACMG Classification",
1204
- value="VUS"
1205
- )
1206
- b5_btn = gr.Button("📋 Explain Classification", variant="primary")
1207
- b5_result = gr.Markdown()
1208
- with gr.Accordion("📖 Learning Mode", open=False):
1209
- gr.Markdown(
1210
- "**ACMG/AMP 2015 Classification Framework:**\n\n"
1211
- "1. **Pathogenic** — strong evidence of disease causation\n"
1212
- "2. **Likely Pathogenic** — >90% probability pathogenic\n"
1213
- "3. **VUS** — uncertain significance\n"
1214
- "4. **Likely Benign** — >90% probability benign\n"
1215
- "5. **Benign** — strong evidence of no disease effect"
1216
- )
1217
- b5_btn.click(b5_run, inputs=[b5_class], outputs=[b5_result])
1218
-
1219
- # ── SIDEBAR ──
1220
- with gr.Column(scale=1, min_width=260):
1221
- gr.Markdown("## 📓 Lab Journal")
1222
- note_input = gr.Textbox(label="Add note", placeholder="Your observation...", lines=2)
1223
- save_btn = gr.Button("💾 Save Note", size="sm")
1224
- refresh_btn = gr.Button("🔄 Refresh Journal", size="sm")
1225
- journal_display = gr.Markdown(value="*Click Refresh to load entries.*")
1226
-
1227
- def save_note(note):
1228
- if note.strip():
1229
- journal_log("Manual", "note", note.strip(), note.strip())
1230
- return journal_read()
1231
-
1232
- save_btn.click(save_note, inputs=[note_input], outputs=[journal_display])
1233
- refresh_btn.click(lambda: journal_read(), outputs=[journal_display])
1234
-
1235
- gr.Markdown(
1236
- "---\n"
1237
- "*K R&D Lab Cancer Research Suite · "
1238
- "All real-data tabs use live APIs with 24h caching · "
1239
- "Simulated tabs are clearly labeled ⚠️ SIMULATED · "
1240
- "Source attribution shown on every result*"
1241
- )
1242
-
1243
- return demo
1244
-
1245
 
1246
- # ── LAUNCH — must be outside if __name__ for HuggingFace Spaces ──
1247
- app = build_app()
1248
- app.launch(server_name="0.0.0.0", server_port=7860)
 
 
 
 
 
 
 
1
  import gradio as gr
 
 
 
 
 
 
 
 
 
2
  import pandas as pd
3
+ import numpy as np
4
+ import json, re, csv
5
  import matplotlib
6
  matplotlib.use("Agg")
7
  import matplotlib.pyplot as plt
8
+ from io import BytesIO
 
 
9
  from PIL import Image
10
+ from datetime import datetime
11
+ from pathlib import Path
12
 
13
+ BG = "#0f172a"
14
+ CARD = "#1e293b"
15
+ ACC = "#f97316"
16
+ ACC2 = "#38bdf8"
17
+ TXT = "#f1f5f9"
 
 
 
 
 
18
 
19
+ LOG_PATH = Path("/tmp/lab_journal.csv")
 
 
 
 
 
 
 
 
 
 
 
20
 
21
+ def log_entry(tab, inputs, result, note=""):
22
  try:
23
+ write_header = not LOG_PATH.exists()
24
+ with open(LOG_PATH, "a", newline="", encoding="utf-8") as f:
25
+ w = csv.DictWriter(f, fieldnames=["timestamp","tab","inputs","result","note"])
26
+ if write_header:
27
+ w.writeheader()
28
+ w.writerow({
29
+ "timestamp": datetime.now().strftime("%Y-%m-%d %H:%M"),
30
+ "tab": tab,
31
+ "inputs": str(inputs),
32
+ "result": str(result)[:200],
33
+ "note": note
34
+ })
35
  except Exception:
36
  pass
37
 
38
+ def load_journal():
 
 
 
 
 
39
  try:
40
+ if not LOG_PATH.exists():
41
+ return pd.DataFrame(columns=["timestamp","tab","inputs","result","note"])
42
+ return pd.read_csv(LOG_PATH)
 
 
 
 
 
 
43
  except Exception:
44
+ return pd.DataFrame(columns=["timestamp","tab","inputs","result","note"])
45
 
46
+ def save_note(note, tab, last_result):
47
+ log_entry(tab, "", last_result, note)
48
+ return "✅ Saved!", load_journal()
 
 
 
 
 
 
 
49
 
50
+ MIRNA_DB = {
51
+ "BRCA2": [
52
+ {"miRNA":"hsa-miR-148a-3p","log2FC":-0.70,"padj":0.013,"targets":"DNMT1, AKT2","pathway":"Epigenetic reprogramming"},
53
+ {"miRNA":"hsa-miR-30e-5p","log2FC":-0.49,"padj":0.032,"targets":"MYC, KRAS","pathway":"Oncogene suppression"},
54
+ {"miRNA":"hsa-miR-551b-3p","log2FC":-0.59,"padj":0.048,"targets":"SMAD4, CDK6","pathway":"TGF-beta / CDK4/6"},
55
+ {"miRNA":"hsa-miR-22-3p","log2FC":-0.43,"padj":0.041,"targets":"HIF1A, PTEN","pathway":"Hypoxia / PI3K"},
56
+ {"miRNA":"hsa-miR-200c-3p","log2FC":-0.38,"padj":0.044,"targets":"ZEB1, ZEB2","pathway":"EMT suppression"},
57
+ ],
58
+ "BRCA1": [
59
+ {"miRNA":"hsa-miR-155-5p","log2FC":-0.81,"padj":0.008,"targets":"SHIP1, SOCS1","pathway":"Immune evasion"},
60
+ {"miRNA":"hsa-miR-146a-5p","log2FC":-0.65,"padj":0.019,"targets":"TRAF6, IRAK1","pathway":"NF-kB signalling"},
61
+ {"miRNA":"hsa-miR-21-5p","log2FC":-0.55,"padj":0.027,"targets":"PTEN, PDCD4","pathway":"Apoptosis"},
62
+ {"miRNA":"hsa-miR-17-5p","log2FC":-0.47,"padj":0.036,"targets":"RB1, E2F1","pathway":"Cell cycle"},
63
+ {"miRNA":"hsa-miR-34a-5p","log2FC":-0.41,"padj":0.049,"targets":"BCL2, CDK6","pathway":"p53 axis"},
64
+ ],
65
+ "TP53": [
66
+ {"miRNA":"hsa-miR-34a-5p","log2FC":-1.10,"padj":0.001,"targets":"BCL2, CDK6","pathway":"p53-miR-34 axis"},
67
+ {"miRNA":"hsa-miR-192-5p","log2FC":-0.90,"padj":0.005,"targets":"MDM2, DHFR","pathway":"p53 feedback"},
68
+ {"miRNA":"hsa-miR-145-5p","log2FC":-0.75,"padj":0.012,"targets":"MYC, EGFR","pathway":"Growth suppression"},
69
+ {"miRNA":"hsa-miR-107","log2FC":-0.62,"padj":0.023,"targets":"CDK6, HIF1B","pathway":"Hypoxia / cell cycle"},
70
+ {"miRNA":"hsa-miR-215-5p","log2FC":-0.51,"padj":0.038,"targets":"DTL, DHFR","pathway":"DNA damage response"},
71
+ ],
72
+ }
73
 
74
+ SIRNA_DB = {
75
+ "LUAD": [
76
+ {"Gene":"SPC24","dCERES":-0.175,"log2FC":1.13,"Drug_status":"Novel","siRNA":"GCAGCUGAAGAAACUGAAU"},
77
+ {"Gene":"BUB1B","dCERES":-0.119,"log2FC":1.12,"Drug_status":"Novel","siRNA":"CCAAAGAGCUGAAGAACAU"},
78
+ {"Gene":"CDC45","dCERES":-0.144,"log2FC":1.26,"Drug_status":"Novel","siRNA":"GCAUCAAGAUGAAGGAGAU"},
79
+ {"Gene":"PLK1","dCERES":-0.239,"log2FC":1.03,"Drug_status":"Clinical","siRNA":"GACGCUCAAGAUGCAGAUU"},
80
+ {"Gene":"CDK1","dCERES":-0.201,"log2FC":1.00,"Drug_status":"Clinical","siRNA":"GCAGAAGCACUGAAGAUUU"},
81
+ ],
82
+ "BRCA": [
83
+ {"Gene":"AURKA","dCERES":-0.165,"log2FC":1.20,"Drug_status":"Clinical","siRNA":"GCACUGAAGAUGCAGAAUU"},
84
+ {"Gene":"AURKB","dCERES":-0.140,"log2FC":1.15,"Drug_status":"Clinical","siRNA":"CCUGAAGACGCUCAAGGUU"},
85
+ {"Gene":"CENPW","dCERES":-0.125,"log2FC":0.95,"Drug_status":"Novel","siRNA":"GCAGAAGCACUGAAGAUUU"},
86
+ {"Gene":"RFC2","dCERES":-0.136,"log2FC":0.50,"Drug_status":"Novel","siRNA":"GCAAGAUGCAGAAGCACUU"},
87
+ {"Gene":"TYMS","dCERES":-0.131,"log2FC":0.72,"Drug_status":"Approved","siRNA":"GGACGCUCAAGAUGCAGAU"},
88
+ ],
89
+ "COAD": [
90
+ {"Gene":"KRAS","dCERES":-0.210,"log2FC":0.80,"Drug_status":"Clinical","siRNA":"GCUGGAGCUGGUGGUAGUU"},
91
+ {"Gene":"WEE1","dCERES":-0.180,"log2FC":1.05,"Drug_status":"Clinical","siRNA":"GCAGCUGAAGAAACUGAAU"},
92
+ {"Gene":"CHEK1","dCERES":-0.155,"log2FC":0.90,"Drug_status":"Clinical","siRNA":"CCAAAGAGCUGAAGAACAU"},
93
+ {"Gene":"RFC2","dCERES":-0.130,"log2FC":0.55,"Drug_status":"Novel","siRNA":"GCAUCAAGAUGAAGGAGAU"},
94
+ {"Gene":"PKMYT1","dCERES":-0.122,"log2FC":1.07,"Drug_status":"Clinical","siRNA":"GACGCUCAAGAUGCAGAUU"},
95
+ ],
96
  }
97
 
98
+ CERNA = [
99
+ {"lncRNA":"CYTOR","miRNA":"hsa-miR-138-5p","target":"AKT1","pathway":"TREM2 core signaling"},
100
+ {"lncRNA":"CYTOR","miRNA":"hsa-miR-138-5p","target":"NFKB1","pathway":"Neuroinflammation"},
101
+ {"lncRNA":"GAS5","miRNA":"hsa-miR-21-5p","target":"PTEN","pathway":"Neuroinflammation"},
102
+ {"lncRNA":"GAS5","miRNA":"hsa-miR-222-3p","target":"IL1B","pathway":"Neuroinflammation"},
103
+ {"lncRNA":"HOTAIRM1","miRNA":"hsa-miR-9-5p","target":"TREM2","pathway":"Direct TREM2 regulation"},
104
+ ]
105
+ ASO = [
106
+ {"lncRNA":"GAS5","position":119,"accessibility":0.653,"GC_pct":50,"Tm":47.2,"priority":"HIGH"},
107
+ {"lncRNA":"CYTOR","position":507,"accessibility":0.653,"GC_pct":50,"Tm":46.8,"priority":"HIGH"},
108
+ {"lncRNA":"HOTAIRM1","position":234,"accessibility":0.621,"GC_pct":44,"Tm":44.1,"priority":"MEDIUM"},
109
+ {"lncRNA":"LINC00847","position":89,"accessibility":0.598,"GC_pct":56,"Tm":48.3,"priority":"MEDIUM"},
110
+ {"lncRNA":"ZFAS1","position":312,"accessibility":0.571,"GC_pct":48,"Tm":45.5,"priority":"MEDIUM"},
111
  ]
112
 
113
+ FGFR3 = {
114
+ "P1 (hairpin loop)": [
115
+ {"Compound":"CHEMBL1575701","RNA_score":0.809,"Toxicity":0.01,"Final_score":0.793},
116
+ {"Compound":"CHEMBL15727","RNA_score":0.805,"Toxicity":0.00,"Final_score":0.789},
117
+ {"Compound":"Thioguanine","RNA_score":0.888,"Toxicity":32.5,"Final_score":0.742},
118
+ {"Compound":"Deazaguanine","RNA_score":0.888,"Toxicity":35.0,"Final_score":0.735},
119
+ {"Compound":"CHEMBL441","RNA_score":0.775,"Toxicity":5.2,"Final_score":0.721},
120
+ ],
121
+ "P10 (G-quadruplex)": [
122
+ {"Compound":"CHEMBL15727","RNA_score":0.805,"Toxicity":0.00,"Final_score":0.789},
123
+ {"Compound":"CHEMBL5411515","RNA_score":0.945,"Toxicity":37.1,"Final_score":0.761},
124
+ {"Compound":"CHEMBL90","RNA_score":0.760,"Toxicity":2.1,"Final_score":0.745},
125
+ {"Compound":"CHEMBL102","RNA_score":0.748,"Toxicity":8.4,"Final_score":0.712},
126
+ {"Compound":"Berberine","RNA_score":0.735,"Toxicity":3.2,"Final_score":0.708},
127
+ ],
128
+ }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
129
 
130
+ VARIANT_DB = {
131
+ "BRCA1:p.R1699Q": {"score":0.03,"cls":"Benign","conf":"High"},
132
+ "BRCA1:p.R1699W": {"score":0.97,"cls":"Pathogenic","conf":"High"},
133
+ "BRCA2:p.D2723A": {"score":0.999,"cls":"Pathogenic","conf":"High"},
134
+ "TP53:p.R248W": {"score":0.998,"cls":"Pathogenic","conf":"High"},
135
+ "TP53:p.R248Q": {"score":0.995,"cls":"Pathogenic","conf":"High"},
136
+ "EGFR:p.L858R": {"score":0.96,"cls":"Pathogenic","conf":"High"},
137
+ "ALK:p.F1174L": {"score":0.94,"cls":"Pathogenic","conf":"High"},
138
+ }
139
+ PLAIN = {
140
+ "Pathogenic": "This variant is likely to cause disease. Clinical follow-up is strongly recommended.",
141
+ "Likely Pathogenic":"This variant is probably harmful. Discuss with your doctor.",
142
+ "Benign": "This variant is likely harmless. Common in the general population.",
143
+ "Likely Benign": "This variant is probably harmless. No strong reason for concern.",
144
+ }
145
+ BM_W = {
146
+ "CTHRC1":0.18,"FHL2":0.15,"LDHA":0.14,"P4HA1":0.13,
147
+ "SERPINH1":0.12,"ABCA8":-0.11,"CA4":-0.10,"CKB":-0.09,
148
+ "NNMT":0.08,"CACNA2D2":-0.07
149
+ }
150
+ PROTEINS = ["albumin","apolipoprotein","fibrinogen","vitronectin",
151
+ "clusterin","igm","iga","igg","complement","transferrin",
152
+ "alpha-2-macroglobulin"]
153
 
154
+ def predict_mirna(gene):
155
+ df = pd.DataFrame(MIRNA_DB.get(gene, []))
156
+ log_entry("BRCA2 miRNA", gene, f"Found {len(df)} miRNAs for {gene}")
 
 
 
 
 
 
 
 
 
 
 
 
 
157
  return df
158
 
159
+ def predict_sirna(cancer):
160
+ df = pd.DataFrame(SIRNA_DB.get(cancer, []))
161
+ log_entry("TP53 siRNA", cancer, f"Found {len(df)} targets for {cancer}")
162
+ return df
163
 
164
+ def get_lncrna():
165
+ log_entry("lncRNA-TREM2", "load", "ceRNA+ASO tables")
166
+ return pd.DataFrame(CERNA), pd.DataFrame(ASO)
167
+
168
+ def predict_drug(pocket):
169
+ df = pd.DataFrame(FGFR3.get(pocket, []))
170
+ fig, ax = plt.subplots(figsize=(6, 4), facecolor=CARD)
171
+ ax.set_facecolor(CARD)
172
+ ax.barh(df["Compound"], df["Final_score"], color=ACC)
173
+ ax.set_xlabel("Final Score", color=TXT)
174
+ ax.tick_params(colors=TXT)
175
+ for sp in ax.spines.values():
176
+ sp.set_edgecolor("#334155")
177
+ ax.set_title(f"Top compounds — {pocket}", color=TXT, fontsize=10)
178
+ plt.tight_layout()
179
+ buf = BytesIO()
180
+ plt.savefig(buf, format="png", dpi=120, facecolor=CARD)
181
+ plt.close()
182
+ buf.seek(0)
183
+ log_entry("FGFR3 Drug", pocket, f"Top: {df.iloc[0]['Compound'] if len(df) else 'none'}")
184
+ return df, Image.open(buf)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
185
 
186
+ def predict_variant(hgvs, sift, polyphen, gnomad):
 
187
  hgvs = hgvs.strip()
188
+ if hgvs in VARIANT_DB:
189
+ r = VARIANT_DB[hgvs]
190
+ cls, conf, score = r["cls"], r["conf"], r["score"]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
191
  else:
192
+ score = 0.0
193
+ if sift < 0.05: score += 0.4
194
+ if polyphen > 0.85: score += 0.35
195
+ if gnomad < 0.0001: score += 0.25
196
+ score = round(score, 3)
197
+ cls = ("Pathogenic" if score > 0.6 else
198
+ "Likely Pathogenic" if score > 0.4 else "Benign")
199
+ conf = "High" if (sift < 0.01 or sift > 0.9) else "Moderate"
200
+ colour = "#ef4444" if "Pathogenic" in cls else "#22c55e"
201
+ icon = "⚠️ WARNING" if "Pathogenic" in cls else "✅ OK"
202
+ bar_w = int(score * 100)
203
+ explanation = PLAIN.get(cls, "")
204
+ log_entry("OpenVariant", hgvs or f"SIFT={sift}", f"{cls} score={score}")
205
+ return (
206
+ f"<div style='background:{CARD};padding:16px;border-radius:8px;"
207
+ f"font-family:sans-serif;color:{TXT}'>"
208
+ f"<h3 style='color:{colour}'>{icon} {cls}</h3>"
209
+ f"<p>Score: <b>{score:.3f}</b> &nbsp;|&nbsp; Confidence: <b>{conf}</b></p>"
210
+ f"<div style='background:#334155;border-radius:4px;height:16px'>"
211
+ f"<div style='background:{colour};height:16px;border-radius:4px;width:{bar_w}%'></div></div>"
212
+ f"<p style='margin-top:12px'>{explanation}</p>"
213
+ f"<p style='font-size:11px;color:#64748b'>Research only. Not clinical.</p>"
214
+ f"</div>"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
215
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
216
 
217
+ def predict_corona(size, zeta, peg, lipid):
218
+ score = 0
219
+ if lipid == "Ionizable": score += 2
220
+ elif lipid == "Cationic": score += 1
221
+ if abs(zeta) < 10: score += 1
222
+ if peg > 1.5: score += 2
223
+ if size < 100: score += 1
224
+ proteins = ["ApoE","Albumin","Fibrinogen","Vitronectin","ApoA-I"]
225
+ dominant = proteins[min(score, 4)]
226
+ efficacy = ("High" if score >= 4 else "Medium" if score >= 2 else "Low")
227
+ log_entry("LNP Corona", f"size={size},zeta={zeta},peg={peg},lipid={lipid}",
228
+ f"dominant={dominant},efficacy={efficacy}")
229
+ return (f"**Dominant corona protein:** {dominant}\n\n"
230
+ f"**Predicted efficacy class:** {efficacy}\n\n"
231
+ f"**Composite score:** {score}/6")
232
+
233
+ def predict_cancer(c1,c2,c3,c4,c5,c6,c7,c8,c9,c10):
234
+ vals = [c1,c2,c3,c4,c5,c6,c7,c8,c9,c10]
235
+ names = list(BM_W.keys())
236
+ weights = list(BM_W.values())
237
+ raw = sum(v*w for v,w in zip(vals, weights))
238
+ prob = 1 / (1 + np.exp(-raw * 2))
239
+ label = "CANCER" if prob > 0.5 else "HEALTHY"
240
+ colour = "#ef4444" if prob > 0.5 else "#22c55e"
241
+ contribs = [v*w for v,w in zip(vals, weights)]
242
+ cols = [ACC if c > 0 else ACC2 for c in contribs]
243
+ fig, ax = plt.subplots(figsize=(6, 3.5), facecolor=CARD)
244
+ ax.set_facecolor(CARD)
245
+ ax.barh(names, contribs, color=cols)
246
+ ax.axvline(0, color=TXT, linewidth=0.8)
247
+ ax.set_xlabel("Contribution to cancer score", color=TXT)
248
+ ax.tick_params(colors=TXT, labelsize=8)
249
+ for sp in ax.spines.values():
250
+ sp.set_edgecolor("#334155")
251
+ ax.set_title("Protein contributions", color=TXT, fontsize=10)
252
+ plt.tight_layout()
253
+ buf = BytesIO()
254
+ plt.savefig(buf, format="png", dpi=120, facecolor=CARD)
255
+ plt.close()
256
  buf.seek(0)
257
+ log_entry("Liquid Biopsy", f"CTHRC1={c1},FHL2={c2}...", f"{label} prob={prob:.2f}")
258
+ return (
259
+ f"<div style='background:{CARD};padding:12px;border-radius:8px;"
260
+ f"color:{colour};font-size:20px;font-family:sans-serif'>"
261
+ f"<b>{label}</b><br>"
262
+ f"<span style='color:{TXT};font-size:14px'>Probability: {prob:.2f}</span></div>"
263
+ ), Image.open(buf)
264
+
265
+ def predict_flow(size, zeta, peg, charge, flow_rate):
266
+ csi = ((flow_rate/40)*0.6 + (peg/5)*0.2 +
267
+ (1 if charge == "Cationic" else 0)*0.2)
268
+ csi = round(min(csi, 1.0), 3)
269
+ stability = ("High remodeling" if csi > 0.6 else
270
+ "Medium" if csi > 0.3 else "Stable")
271
+ t = np.linspace(0, 60, 200)
272
+ kf = 0.03 * (1 + flow_rate/40)
273
+ ks = 0.038 * (1 + flow_rate/40)
274
+ fig, ax = plt.subplots(figsize=(6, 3.5), facecolor=CARD)
275
+ ax.set_facecolor(CARD)
276
+ ax.plot(t, 60*np.exp(-0.03*t)+20, color="#60a5fa", ls="--", label="Albumin (static)")
277
+ ax.plot(t, 60*np.exp(-kf*t)+10, color="#60a5fa", label="Albumin (flow)")
278
+ ax.plot(t, 14*(1-np.exp(-0.038*t))+5, color=ACC, ls="--", label="ApoE (static)")
279
+ ax.plot(t, 20*(1-np.exp(-ks*t))+5, color=ACC, label="ApoE (flow)")
280
+ ax.set_xlabel("Time (min)", color=TXT)
281
+ ax.set_ylabel("% Corona", color=TXT)
282
+ ax.tick_params(colors=TXT)
283
+ ax.legend(fontsize=7, labelcolor=TXT, facecolor=CARD)
284
+ for sp in ax.spines.values():
285
+ sp.set_edgecolor("#334155")
286
+ ax.set_title("Vroman Effect", color=TXT, fontsize=9)
287
+ plt.tight_layout()
288
+ buf = BytesIO()
289
+ plt.savefig(buf, format="png", dpi=120, facecolor=CARD)
290
+ plt.close()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
291
  buf.seek(0)
292
+ log_entry("Flow Corona", f"flow={flow_rate},charge={charge}", f"CSI={csi},{stability}")
293
+ return f"**Corona Shift Index: {csi}** — {stability}", Image.open(buf)
294
+
295
+ def predict_bbb(smiles, pka, zeta):
296
+ logp = smiles.count("C")*0.3 - smiles.count("O")*0.5 + 1.5
297
+ apoe_pct = max(0, min(40, (7.0-pka)*8 + abs(zeta)*0.5 + logp*0.8))
298
+ bbb_prob = min(0.95, apoe_pct/30)
299
+ tier = ("HIGH (>20%)" if apoe_pct > 20 else
300
+ "MEDIUM (10-20%)" if apoe_pct > 10 else "LOW (<10%)")
301
+ cats = ["ApoE%","BBB","logP","pKa fit","Zeta"]
302
+ vals = [apoe_pct/40, bbb_prob, min(logp/5,1),
303
+ (7-abs(pka-6.5))/7, (10-abs(zeta))/10]
304
+ angles = np.linspace(0, 2*np.pi, len(cats), endpoint=False).tolist()
305
+ v2, a2 = vals+[vals[0]], angles+[angles[0]]
306
+ fig, ax = plt.subplots(figsize=(5, 4), subplot_kw={"polar":True}, facecolor=CARD)
307
+ ax.set_facecolor(CARD)
308
+ ax.plot(a2, v2, color=ACC, linewidth=2)
309
+ ax.fill(a2, v2, color=ACC, alpha=0.2)
310
+ ax.set_xticks(angles)
311
+ ax.set_xticklabels(cats, color=TXT, fontsize=8)
312
+ ax.tick_params(colors=TXT)
313
+ plt.tight_layout()
314
+ buf = BytesIO()
315
+ plt.savefig(buf, format="png", dpi=120, facecolor=CARD)
316
+ plt.close()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
317
  buf.seek(0)
318
+ log_entry("LNP Brain", f"pka={pka},zeta={zeta}", f"ApoE={apoe_pct:.1f}%,BBB={bbb_prob:.2f}")
319
+ return (f"**Predicted ApoE:** {apoe_pct:.1f}% — {tier}\n\n"
320
+ f"**BBB Probability:** {bbb_prob:.2f}"), Image.open(buf)
321
+
322
+ def extract_corona(text):
323
+ out = {
324
+ "nanoparticle_composition": "",
325
+ "size_nm": None, "zeta_mv": None, "PDI": None,
326
+ "protein_source": "", "corona_proteins": [], "confidence": {}
327
+ }
328
+ m = re.search(r"(\d+\.?\d*)\s*(?:nm|nanometer)", text, re.I)
329
+ if m:
330
+ out["size_nm"] = float(m.group(1))
331
+ out["confidence"]["size_nm"] = "HIGH"
332
+ m = re.search(r"([+-]?\d+\.?\d*)\s*mV", text, re.I)
333
+ if m:
334
+ out["zeta_mv"] = float(m.group(1))
335
+ out["confidence"]["zeta_mv"] = "HIGH"
336
+ m = re.search(r"PDI\s*[=:of]*\s*(\d+\.?\d*)", text, re.I)
337
+ if m:
338
+ out["PDI"] = float(m.group(1))
339
+ out["confidence"]["PDI"] = "HIGH"
340
+ for src in ["human plasma","human serum","fetal bovine serum","FBS","PBS"]:
341
+ if src.lower() in text.lower():
342
+ out["protein_source"] = src
343
+ out["confidence"]["protein_source"] = "HIGH"
344
+ break
345
+ out["corona_proteins"] = [
346
+ {"name": p, "confidence": "MEDIUM"} for p in PROTEINS if p in text.lower()
347
+ ]
348
+ for lip in ["DSPC","DOPE","MC3","DLin","cholesterol","PEG","DOTAP"]:
349
+ if lip in text:
350
+ out["nanoparticle_composition"] += lip + " "
351
+ out["nanoparticle_composition"] = out["nanoparticle_composition"].strip()
352
+ flags = []
353
+ if not out["size_nm"]: flags.append("size_nm not found")
354
+ if not out["zeta_mv"]: flags.append("zeta_mv not found")
355
+ if not out["corona_proteins"]: flags.append("no proteins detected")
356
+ summary = "All key fields extracted" if not flags else " | ".join(flags)
357
+ log_entry("AutoCorona NLP", text[:80]+"...",
358
+ f"proteins={len(out['corona_proteins'])},{summary}")
359
+ return json.dumps(out, indent=2), summary
360
+
361
+ css = (
362
+ f"body,.gradio-container{{background:{BG}!important;color:{TXT}!important}}"
363
+ f".tab-nav button{{color:{TXT}!important;background:{CARD}!important}}"
364
+ f".tab-nav button.selected{{border-bottom:2px solid {ACC}!important;color:{ACC}!important}}"
365
+ f"h1,h2,h3{{color:{ACC}!important}}"
366
+ f".gr-button-primary{{background:{ACC}!important;border:none!important}}"
367
+ f"footer{{display:none!important}}"
368
+ )
369
 
370
+ LEARNING_CASES = """
371
+ ## 🧪 Top 5 Guided Investigations
372
+ ### Case 1 Beginner 🟢
373
+ **Question:** Why is the same gene position benign vs pathogenic?
374
+ 1. OpenVariant → enter `BRCA1:p.R1699Q` → Benign
375
+ 2. Enter `BRCA1:p.R1699W` → Pathogenic
376
+ 3. Same position, different amino acid what changed?
377
+ **Key concept:** Amino acid polarity determines protein folding impact.
378
+ ---
379
+ ### Case 2 — Beginner 🟢
380
+ **Question:** How does PEG% change what protein sticks to LNPs?
381
+ 1. LNP Corona → Ionizable, Zeta=-5, Size=100, PEG=0.5% → note protein
382
+ 2. PEG=2.5% → compare
383
+ 3. LNP Brain → pKa=6.5 → compare ApoE%
384
+ **Key concept:** More PEG → less Fibrinogen, more ApoE.
385
+ ---
386
+ ### Case 3 — Intermediate 🟡
387
+ **Question:** Does blood flow change corona composition?
388
+ 1. Flow Corona → Flow=0, Ionizable
389
+ 2. Flow=40 (arterial) → compare ApoE curve
390
+ 3. At what minute does ApoE plateau?
391
+ **Key concept:** Vroman effect — albumin displaced by ApoE under flow.
392
+ ---
393
+ ### Case 4 — Intermediate 🟡
394
+ **Question:** Which cancer has the most novel siRNA targets?
395
+ 1. TP53 siRNA → LUAD → count "Novel"
396
+ 2. Repeat BRCA, COAD
397
+ 3. Pick one Novel gene → Google: "[gene] cancer therapeutic target"
398
+ ---
399
+ ### Case 5 — Advanced 🔴
400
+ **Question:** Can you identify cancer from protein levels?
401
+ 1. Liquid Biopsy → all sliders=0 → HEALTHY
402
+ 2. Set CTHRC1=2.5, FHL2=2.0, LDHA=1.8 → observe
403
+ 3. Find minimum CTHRC1 that tips to CANCER
404
+ **Key concept:** CTHRC1 weight (0.18) dominates the score.
405
+ """
406
 
407
+ with gr.Blocks(css=css, title="K R&D Lab") as demo:
 
 
 
 
408
 
409
+ gr.Markdown(
410
+ "# 🧬 K R&D Lab — Computational Biology Suite\n"
411
+ "**Oksana Kolisnyk** · ML Engineer · "
412
+ "[KOSATIKS GROUP](https://kosatiks-group.pp.ua)\n"
413
+ "> 10 open-source tools + lab journal."
414
  )
 
 
 
415
 
416
+ with gr.Tabs():
417
+
418
+ with gr.TabItem("🧬 BRCA2 miRNA"):
419
+ gr.Markdown("### Tumor Suppressor miRNAs")
420
+ g1 = gr.Dropdown(["BRCA2","BRCA1","TP53"], value="BRCA2", label="Gene")
421
+ b1 = gr.Button("Find miRNAs", variant="primary")
422
+ o1 = gr.Dataframe(label="Top 5 downregulated miRNAs")
423
+ gr.Examples([["BRCA2"],["TP53"]], inputs=[g1])
424
+ b1.click(predict_mirna, g1, o1)
425
+
426
+ with gr.TabItem("💉 TP53 siRNA"):
427
+ gr.Markdown("### Synthetic Lethal siRNA Targets")
428
+ g2 = gr.Dropdown(["LUAD","BRCA","COAD"], value="LUAD", label="Cancer type")
429
+ b2 = gr.Button("Find Targets", variant="primary")
430
+ o2 = gr.Dataframe(label="Top 5 siRNA targets")
431
+ gr.Examples([["LUAD"],["BRCA"]], inputs=[g2])
432
+ b2.click(predict_sirna, g2, o2)
433
+
434
+ with gr.TabItem("🧠 lncRNA-TREM2"):
435
+ gr.Markdown("### lncRNA Networks in Alzheimer's")
436
+ b3 = gr.Button("Load Results", variant="primary")
437
+ o3a = gr.Dataframe(label="ceRNA Network")
438
+ o3b = gr.Dataframe(label="ASO Candidates")
439
+ b3.click(get_lncrna, [], [o3a, o3b])
440
+
441
+ with gr.TabItem("💊 FGFR3 Drug"):
442
+ gr.Markdown("### RNA-Directed Drug Discovery: FGFR3")
443
+ g4 = gr.Radio(["P1 (hairpin loop)","P10 (G-quadruplex)"],
444
+ value="P1 (hairpin loop)", label="Target pocket")
445
+ b4 = gr.Button("Screen Compounds", variant="primary")
446
+ o4t = gr.Dataframe(label="Top 5 candidates")
447
+ o4p = gr.Image(label="Binding scores")
448
+ gr.Examples([["P1 (hairpin loop)"],["P10 (G-quadruplex)"]], inputs=[g4])
449
+ b4.click(predict_drug, g4, [o4t, o4p])
450
+
451
+ with gr.TabItem("🔬 OpenVariant"):
452
+ gr.Markdown("### OpenVariant — Pathogenicity Classifier\nAUC=0.939 on ClinVar 2026.")
453
+ hgvs = gr.Textbox(label="HGVS notation", placeholder="BRCA1:p.R1699Q")
454
+ gr.Markdown("**Or enter scores manually:**")
455
+ with gr.Row():
456
+ sift = gr.Slider(0, 1, value=0.5, step=0.01, label="SIFT (0=damaging)")
457
+ pp = gr.Slider(0, 1, value=0.5, step=0.01, label="PolyPhen-2")
458
+ gn = gr.Slider(0, 0.01, value=0.001, step=0.0001, label="gnomAD AF")
459
+ b5 = gr.Button("Predict Pathogenicity", variant="primary")
460
+ o5 = gr.HTML(label="Result")
461
+ gr.Examples(
462
+ [["BRCA1:p.R1699Q", 0.82, 0.05, 0.0012],
463
+ ["TP53:p.R248W", 0.00, 1.00, 0.0],
464
+ ["BRCA2:p.D2723A", 0.01, 0.98, 0.0]],
465
+ inputs=[hgvs, sift, pp, gn])
466
+ b5.click(predict_variant, [hgvs, sift, pp, gn], o5)
467
+
468
+ with gr.TabItem("🧪 LNP Corona"):
469
+ gr.Markdown("### LNP Protein Corona Prediction")
470
+ with gr.Row():
471
+ sz = gr.Slider(50, 300, value=100, step=1, label="Size (nm)")
472
+ zt = gr.Slider(-40, 10, value=-5, step=1, label="Zeta (mV)")
473
+ with gr.Row():
474
+ pg = gr.Slider(0, 5, value=1.5, step=0.1, label="PEG mol%")
475
+ lp = gr.Dropdown(["Ionizable","Cationic","Anionic","Neutral"],
476
+ value="Ionizable", label="Lipid type")
477
+ b6 = gr.Button("Predict", variant="primary")
478
+ o6 = gr.Markdown()
479
+ gr.Examples([[100,-5,1.5,"Ionizable"],[80,5,0.5,"Cationic"]], inputs=[sz,zt,pg,lp])
480
+ b6.click(predict_corona, [sz,zt,pg,lp], o6)
481
+
482
+ with gr.TabItem("🩸 Liquid Biopsy"):
483
+ gr.Markdown("### Protein Corona Cancer Diagnostics\nClassify cancer vs healthy.")
484
+ with gr.Row():
485
+ p1 = gr.Slider(-3, 3, value=0, step=0.1, label="CTHRC1")
486
+ p2 = gr.Slider(-3, 3, value=0, step=0.1, label="FHL2")
487
+ p3 = gr.Slider(-3, 3, value=0, step=0.1, label="LDHA")
488
+ p4 = gr.Slider(-3, 3, value=0, step=0.1, label="P4HA1")
489
+ p5 = gr.Slider(-3, 3, value=0, step=0.1, label="SERPINH1")
490
+ with gr.Row():
491
+ p6 = gr.Slider(-3, 3, value=0, step=0.1, label="ABCA8")
492
+ p7 = gr.Slider(-3, 3, value=0, step=0.1, label="CA4")
493
+ p8 = gr.Slider(-3, 3, value=0, step=0.1, label="CKB")
494
+ p9 = gr.Slider(-3, 3, value=0, step=0.1, label="NNMT")
495
+ p10 = gr.Slider(-3, 3, value=0, step=0.1, label="CACNA2D2")
496
+ b7 = gr.Button("Classify", variant="primary")
497
+ o7t = gr.HTML()
498
+ o7p = gr.Image(label="Feature contributions")
499
+ gr.Examples(
500
+ [[2,2,1.5,1.8,1.6,-1,-1.2,-0.8,1.4,-1.1],
501
+ [0,0,0,0,0,0,0,0,0,0]],
502
+ inputs=[p1,p2,p3,p4,p5,p6,p7,p8,p9,p10])
503
+ b7.click(predict_cancer, [p1,p2,p3,p4,p5,p6,p7,p8,p9,p10], [o7t,o7p])
504
+
505
+ with gr.TabItem("🌊 Flow Corona"):
506
+ gr.Markdown("### Corona Remodeling Under Blood Flow")
507
+ with gr.Row():
508
+ s8 = gr.Slider(50, 300, value=100, step=1, label="Size (nm)")
509
+ z8 = gr.Slider(-40, 10, value=-5, step=1, label="Zeta (mV)")
510
+ pg8 = gr.Slider(0, 5, value=1.5, step=0.1, label="PEG mol%")
511
+ with gr.Row():
512
+ ch8 = gr.Dropdown(["Ionizable","Cationic","Anionic","Neutral"],
513
+ value="Ionizable", label="Charge type")
514
+ fl8 = gr.Slider(0, 40, value=20, step=1, label="Flow rate cm/s (aorta=40)")
515
+ b8 = gr.Button("Model Vroman Effect", variant="primary")
516
+ o8t = gr.Markdown()
517
+ o8p = gr.Image(label="Kinetics plot")
518
+ gr.Examples([[100,-5,1.5,"Ionizable",40],[150,5,0.5,"Cationic",10]],
519
+ inputs=[s8,z8,pg8,ch8,fl8])
520
+ b8.click(predict_flow, [s8,z8,pg8,ch8,fl8], [o8t,o8p])
521
+
522
+ with gr.TabItem("🧠 LNP Brain"):
523
+ gr.Markdown("### LNP Brain Delivery Predictor")
524
+ smi = gr.Textbox(label="Ionizable lipid SMILES",
525
+ value="CC(C)CC(=O)OCC(COC(=O)CC(C)C)OC(=O)CC(C)C")
526
+ with gr.Row():
527
+ pk = gr.Slider(4, 8, value=6.5, step=0.1, label="pKa")
528
+ zt9 = gr.Slider(-20, 10, value=-3, step=1, label="Zeta (mV)")
529
+ b9 = gr.Button("Predict BBB Crossing", variant="primary")
530
+ o9t = gr.Markdown()
531
+ o9p = gr.Image(label="Radar profile")
532
+ gr.Examples([["CC(C)CC(=O)OCC(COC(=O)CC(C)C)OC(=O)CC(C)C", 6.5, -3]],
533
+ inputs=[smi, pk, zt9])
534
+ b9.click(predict_bbb, [smi, pk, zt9], [o9t, o9p])
535
+
536
+ with gr.TabItem("📄 AutoCorona NLP"):
537
+ gr.Markdown("### AutoCorona NLP Extraction\nPaste any paper abstract.")
538
+ txt = gr.Textbox(lines=6, label="Paper abstract", placeholder="Paste text here...")
539
+ b10 = gr.Button("Extract Data", variant="primary")
540
+ o10j = gr.Code(label="Extracted JSON", language="json")
541
+ o10f = gr.Textbox(label="Validation flags")
542
+ gr.Examples([[
543
+ "LNPs composed of MC3, DSPC, Cholesterol (50:10:40 mol%) with 1.5% PEG-DMG. "
544
+ "Hydrodynamic diameter was 98 nm, zeta potential -3.2 mV, PDI 0.12. "
545
+ "Incubated in human plasma. Corona: albumin, apolipoprotein E, fibrinogen."
546
+ ]], inputs=[txt])
547
+ b10.click(extract_corona, txt, [o10j, o10f])
548
+
549
+ with gr.TabItem("📓 Lab Journal"):
550
+ gr.Markdown("### Your Research Log\nEvery query is auto-saved.")
551
+ with gr.Row():
552
+ note_text = gr.Textbox(
553
+ label="📝 Add observation / conclusion",
554
+ placeholder="What did you discover? What's your next question?",
555
+ lines=3)
556
+ note_tab = gr.Textbox(label="Which tool?", value="General")
557
+ note_last = gr.Textbox(label="Result to annotate", visible=False)
558
+ save_btn = gr.Button("💾 Save Observation", variant="primary")
559
+ save_msg = gr.Markdown()
560
+ journal_df = gr.Dataframe(
561
+ label="📋 Full History",
562
+ value=load_journal(),
563
+ interactive=False)
564
+ refresh_btn = gr.Button("🔄 Refresh")
565
+ refresh_btn.click(load_journal, [], journal_df)
566
+ save_btn.click(save_note, [note_text, note_tab, note_last], [save_msg, journal_df])
567
+ gr.Markdown("📥 Log saved as `lab_journal.csv` in the app folder.")
568
+
569
+ with gr.TabItem("📚 Learning Mode"):
570
+ gr.Markdown(LEARNING_CASES)
571
+ gr.Markdown("---\n### 📖 Quick Reference")
572
+ gr.Markdown("""
573
+ | Tool | Predicts | Key input |
574
+ |------|----------|-----------|
575
+ | OpenVariant | Pathogenic/Benign | Gene mutation |
576
+ | LNP Corona | Dominant protein | Formulation |
577
+ | Flow Corona | Vroman kinetics | Flow rate |
578
+ | LNP Brain | ApoE% + BBB prob | pKa + zeta |
579
+ | Liquid Biopsy | Cancer/Healthy | Protein z-scores |
580
+ | BRCA2 miRNA | Downregulated miRNAs | Gene name |
581
+ | TP53 siRNA | Synthetic lethal targets | Cancer type |
582
+ | lncRNA-TREM2 | ceRNA + ASOs | — |
583
+ | FGFR3 Drug | Small molecules | Pocket type |
584
+ | AutoCorona NLP | Structured data | Abstract text |
585
+ """)
586
+ gr.Markdown("""
587
+ ### 🔗 Resources
588
+ - [PubMed](https://pubmed.ncbi.nlm.nih.gov)
589
+ - [ClinVar](https://www.ncbi.nlm.nih.gov/clinvar/)
590
+ - [UniProt](https://www.uniprot.org)
591
+ - [ChEMBL](https://www.ebi.ac.uk/chembl/)
592
+ """)
593
+
594
+ gr.Markdown(
595
+ "---\n**K R&D Lab** | Research only — not clinical | "
596
+ "[GitHub](https://github.com/TEZv/K-RnD-Lab-PHYLO-03_2026) | "
597
+ "[KOSATIKS GROUP 🦈](https://kosatiks-group.pp.ua)"
598
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
599
 
600
+ demo.launch(server_name="0.0.0.0", server_port=7860)