Spaces:

K-RnD-Lab
/

Learning-Playground_03-2026

Sleeping

App Files Files Community

TEZv commited on about 1 month ago

Commit

1b9269f

verified ·

1 Parent(s): 4301072

Update app.py

Browse files

Files changed (1) hide show

app.py +1206 -567

app.py CHANGED Viewed

@@ -1,609 +1,1248 @@
 import gradio as gr
-import pandas as pd
 import numpy as np
-import json, re, csv
 import matplotlib
 matplotlib.use("Agg")
 import matplotlib.pyplot as plt
-from io import BytesIO
 from PIL import Image
-from datetime import datetime
-from pathlib import Path
-BG   = "#0f172a"
-CARD = "#1e293b"
-ACC  = "#f97316"
-ACC2 = "#38bdf8"
-TXT  = "#f1f5f9"
-LOG_PATH = Path("/tmp/lab_journal.csv")
-def log_entry(tab, inputs, result, note=""):
     try:
-        write_header = not LOG_PATH.exists()
-        with open(LOG_PATH, "a", newline="", encoding="utf-8") as f:
-            w = csv.DictWriter(f, fieldnames=["timestamp","tab","inputs","result","note"])
-            if write_header:
-                w.writeheader()
-            w.writerow({
-                "timestamp": datetime.now().strftime("%Y-%m-%d %H:%M"),
-                "tab": tab,
-                "inputs": str(inputs),
-                "result": str(result)[:200],
-                "note": note
-            })
     except Exception:
         pass
-def load_journal():
     try:
-        if not LOG_PATH.exists():
-            return pd.DataFrame(columns=["timestamp","tab","inputs","result","note"])
-        return pd.read_csv(LOG_PATH)
     except Exception:
-        return pd.DataFrame(columns=["timestamp","tab","inputs","result","note"])
-def save_note(note, tab, last_result):
-    log_entry(tab, "", last_result, note)
-    return "✅ Saved!", load_journal()
-MIRNA_DB = {
-    "BRCA2": [
-        {"miRNA":"hsa-miR-148a-3p","log2FC":-0.70,"padj":0.013,"targets":"DNMT1, AKT2","pathway":"Epigenetic reprogramming"},
-        {"miRNA":"hsa-miR-30e-5p","log2FC":-0.49,"padj":0.032,"targets":"MYC, KRAS","pathway":"Oncogene suppression"},
-        {"miRNA":"hsa-miR-551b-3p","log2FC":-0.59,"padj":0.048,"targets":"SMAD4, CDK6","pathway":"TGF-beta / CDK4/6"},
-        {"miRNA":"hsa-miR-22-3p","log2FC":-0.43,"padj":0.041,"targets":"HIF1A, PTEN","pathway":"Hypoxia / PI3K"},
-        {"miRNA":"hsa-miR-200c-3p","log2FC":-0.38,"padj":0.044,"targets":"ZEB1, ZEB2","pathway":"EMT suppression"},
-    ],
-    "BRCA1": [
-        {"miRNA":"hsa-miR-155-5p","log2FC":-0.81,"padj":0.008,"targets":"SHIP1, SOCS1","pathway":"Immune evasion"},
-        {"miRNA":"hsa-miR-146a-5p","log2FC":-0.65,"padj":0.019,"targets":"TRAF6, IRAK1","pathway":"NF-kB signalling"},
-        {"miRNA":"hsa-miR-21-5p","log2FC":-0.55,"padj":0.027,"targets":"PTEN, PDCD4","pathway":"Apoptosis"},
-        {"miRNA":"hsa-miR-17-5p","log2FC":-0.47,"padj":0.036,"targets":"RB1, E2F1","pathway":"Cell cycle"},
-        {"miRNA":"hsa-miR-34a-5p","log2FC":-0.41,"padj":0.049,"targets":"BCL2, CDK6","pathway":"p53 axis"},
-    ],
-    "TP53": [
-        {"miRNA":"hsa-miR-34a-5p","log2FC":-1.10,"padj":0.001,"targets":"BCL2, CDK6","pathway":"p53-miR-34 axis"},
-        {"miRNA":"hsa-miR-192-5p","log2FC":-0.90,"padj":0.005,"targets":"MDM2, DHFR","pathway":"p53 feedback"},
-        {"miRNA":"hsa-miR-145-5p","log2FC":-0.75,"padj":0.012,"targets":"MYC, EGFR","pathway":"Growth suppression"},
-        {"miRNA":"hsa-miR-107","log2FC":-0.62,"padj":0.023,"targets":"CDK6, HIF1B","pathway":"Hypoxia / cell cycle"},
-        {"miRNA":"hsa-miR-215-5p","log2FC":-0.51,"padj":0.038,"targets":"DTL, DHFR","pathway":"DNA damage response"},
-    ],
-}
-SIRNA_DB = {
-    "LUAD": [
-        {"Gene":"SPC24","dCERES":-0.175,"log2FC":1.13,"Drug_status":"Novel","siRNA":"GCAGCUGAAGAAACUGAAU"},
-        {"Gene":"BUB1B","dCERES":-0.119,"log2FC":1.12,"Drug_status":"Novel","siRNA":"CCAAAGAGCUGAAGAACAU"},
-        {"Gene":"CDC45","dCERES":-0.144,"log2FC":1.26,"Drug_status":"Novel","siRNA":"GCAUCAAGAUGAAGGAGAU"},
-        {"Gene":"PLK1","dCERES":-0.239,"log2FC":1.03,"Drug_status":"Clinical","siRNA":"GACGCUCAAGAUGCAGAUU"},
-        {"Gene":"CDK1","dCERES":-0.201,"log2FC":1.00,"Drug_status":"Clinical","siRNA":"GCAGAAGCACUGAAGAUUU"},
-    ],
-    "BRCA": [
-        {"Gene":"AURKA","dCERES":-0.165,"log2FC":1.20,"Drug_status":"Clinical","siRNA":"GCACUGAAGAUGCAGAAUU"},
-        {"Gene":"AURKB","dCERES":-0.140,"log2FC":1.15,"Drug_status":"Clinical","siRNA":"CCUGAAGACGCUCAAGGUU"},
-        {"Gene":"CENPW","dCERES":-0.125,"log2FC":0.95,"Drug_status":"Novel","siRNA":"GCAGAAGCACUGAAGAUUU"},
-        {"Gene":"RFC2","dCERES":-0.136,"log2FC":0.50,"Drug_status":"Novel","siRNA":"GCAAGAUGCAGAAGCACUU"},
-        {"Gene":"TYMS","dCERES":-0.131,"log2FC":0.72,"Drug_status":"Approved","siRNA":"GGACGCUCAAGAUGCAGAU"},
-    ],
-    "COAD": [
-        {"Gene":"KRAS","dCERES":-0.210,"log2FC":0.80,"Drug_status":"Clinical","siRNA":"GCUGGAGCUGGUGGUAGUU"},
-        {"Gene":"WEE1","dCERES":-0.180,"log2FC":1.05,"Drug_status":"Clinical","siRNA":"GCAGCUGAAGAAACUGAAU"},
-        {"Gene":"CHEK1","dCERES":-0.155,"log2FC":0.90,"Drug_status":"Clinical","siRNA":"CCAAAGAGCUGAAGAACAU"},
-        {"Gene":"RFC2","dCERES":-0.130,"log2FC":0.55,"Drug_status":"Novel","siRNA":"GCAUCAAGAUGAAGGAGAU"},
-        {"Gene":"PKMYT1","dCERES":-0.122,"log2FC":1.07,"Drug_status":"Clinical","siRNA":"GACGCUCAAGAUGCAGAUU"},
-    ],
 }
-CERNA = [
-    {"lncRNA":"CYTOR","miRNA":"hsa-miR-138-5p","target":"AKT1","pathway":"TREM2 core signaling"},
-    {"lncRNA":"CYTOR","miRNA":"hsa-miR-138-5p","target":"NFKB1","pathway":"Neuroinflammation"},
-    {"lncRNA":"GAS5","miRNA":"hsa-miR-21-5p","target":"PTEN","pathway":"Neuroinflammation"},
-    {"lncRNA":"GAS5","miRNA":"hsa-miR-222-3p","target":"IL1B","pathway":"Neuroinflammation"},
-    {"lncRNA":"HOTAIRM1","miRNA":"hsa-miR-9-5p","target":"TREM2","pathway":"Direct TREM2 regulation"},
-]
-ASO = [
-    {"lncRNA":"GAS5","position":119,"accessibility":0.653,"GC_pct":50,"Tm":47.2,"priority":"HIGH"},
-    {"lncRNA":"CYTOR","position":507,"accessibility":0.653,"GC_pct":50,"Tm":46.8,"priority":"HIGH"},
-    {"lncRNA":"HOTAIRM1","position":234,"accessibility":0.621,"GC_pct":44,"Tm":44.1,"priority":"MEDIUM"},
-    {"lncRNA":"LINC00847","position":89,"accessibility":0.598,"GC_pct":56,"Tm":48.3,"priority":"MEDIUM"},
-    {"lncRNA":"ZFAS1","position":312,"accessibility":0.571,"GC_pct":48,"Tm":45.5,"priority":"MEDIUM"},
 ]
-FGFR3 = {
-    "P1 (hairpin loop)": [
-        {"Compound":"CHEMBL1575701","RNA_score":0.809,"Toxicity":0.01,"Final_score":0.793},
-        {"Compound":"CHEMBL15727","RNA_score":0.805,"Toxicity":0.00,"Final_score":0.789},
-        {"Compound":"Thioguanine","RNA_score":0.888,"Toxicity":32.5,"Final_score":0.742},
-        {"Compound":"Deazaguanine","RNA_score":0.888,"Toxicity":35.0,"Final_score":0.735},
-        {"Compound":"CHEMBL441","RNA_score":0.775,"Toxicity":5.2,"Final_score":0.721},
-    ],
-    "P10 (G-quadruplex)": [
-        {"Compound":"CHEMBL15727","RNA_score":0.805,"Toxicity":0.00,"Final_score":0.789},
-        {"Compound":"CHEMBL5411515","RNA_score":0.945,"Toxicity":37.1,"Final_score":0.761},
-        {"Compound":"CHEMBL90","RNA_score":0.760,"Toxicity":2.1,"Final_score":0.745},
-        {"Compound":"CHEMBL102","RNA_score":0.748,"Toxicity":8.4,"Final_score":0.712},
-        {"Compound":"Berberine","RNA_score":0.735,"Toxicity":3.2,"Final_score":0.708},
-    ],
-}
-VARIANT_DB = {
-    "BRCA1:p.R1699Q": {"score":0.03,"cls":"Benign","conf":"High"},
-    "BRCA1:p.R1699W": {"score":0.97,"cls":"Pathogenic","conf":"High"},
-    "BRCA2:p.D2723A": {"score":0.999,"cls":"Pathogenic","conf":"High"},
-    "TP53:p.R248W":   {"score":0.998,"cls":"Pathogenic","conf":"High"},
-    "TP53:p.R248Q":   {"score":0.995,"cls":"Pathogenic","conf":"High"},
-    "EGFR:p.L858R":   {"score":0.96,"cls":"Pathogenic","conf":"High"},
-    "ALK:p.F1174L":   {"score":0.94,"cls":"Pathogenic","conf":"High"},
-}
-PLAIN = {
-    "Pathogenic":       "This variant is likely to cause disease. Clinical follow-up is strongly recommended.",
-    "Likely Pathogenic":"This variant is probably harmful. Discuss with your doctor.",
-    "Benign":           "This variant is likely harmless. Common in the general population.",
-    "Likely Benign":    "This variant is probably harmless. No strong reason for concern.",
-}
-BM_W = {
-    "CTHRC1":0.18,"FHL2":0.15,"LDHA":0.14,"P4HA1":0.13,
-    "SERPINH1":0.12,"ABCA8":-0.11,"CA4":-0.10,"CKB":-0.09,
-    "NNMT":0.08,"CACNA2D2":-0.07
-}
-PROTEINS = ["albumin","apolipoprotein","fibrinogen","vitronectin",
-            "clusterin","igm","iga","igg","complement","transferrin",
-            "alpha-2-macroglobulin"]
-def predict_mirna(gene):
-    df = pd.DataFrame(MIRNA_DB.get(gene, []))
-    log_entry("BRCA2 miRNA", gene, f"Found {len(df)} miRNAs for {gene}")
-    return df
-def predict_sirna(cancer):
-    df = pd.DataFrame(SIRNA_DB.get(cancer, []))
-    log_entry("TP53 siRNA", cancer, f"Found {len(df)} targets for {cancer}")
-    return df
-def get_lncrna():
-    log_entry("lncRNA-TREM2", "load", "ceRNA+ASO tables")
-    return pd.DataFrame(CERNA), pd.DataFrame(ASO)
-def predict_drug(pocket):
-    df = pd.DataFrame(FGFR3.get(pocket, []))
-    fig, ax = plt.subplots(figsize=(6, 4), facecolor=CARD)
-    ax.set_facecolor(CARD)
-    ax.barh(df["Compound"], df["Final_score"], color=ACC)
-    ax.set_xlabel("Final Score", color=TXT)
-    ax.tick_params(colors=TXT)
-    for sp in ax.spines.values():
-        sp.set_edgecolor("#334155")
-    ax.set_title(f"Top compounds — {pocket}", color=TXT, fontsize=10)
-    plt.tight_layout()
-    buf = BytesIO()
-    plt.savefig(buf, format="png", dpi=120, facecolor=CARD)
-    plt.close()
     buf.seek(0)
-    log_entry("FGFR3 Drug", pocket, f"Top: {df.iloc[0]['Compound'] if len(df) else 'none'}")
-    return df, Image.open(buf)
-def predict_variant(hgvs, sift, polyphen, gnomad):
     hgvs = hgvs.strip()
-    if hgvs in VARIANT_DB:
-        r = VARIANT_DB[hgvs]
-        cls, conf, score = r["cls"], r["conf"], r["score"]
     else:
-        score = 0.0
-        if sift < 0.05:      score += 0.4
-        if polyphen > 0.85:  score += 0.35
-        if gnomad < 0.0001:  score += 0.25
-        score = round(score, 3)
-        cls  = ("Pathogenic" if score > 0.6 else
-                "Likely Pathogenic" if score > 0.4 else "Benign")
-        conf = "High" if (sift < 0.01 or sift > 0.9) else "Moderate"
-    colour = "#ef4444" if "Pathogenic" in cls else "#22c55e"
-    icon   = "⚠️ WARNING" if "Pathogenic" in cls else "✅ OK"
-    bar_w  = int(score * 100)
-    explanation = PLAIN.get(cls, "")
-    log_entry("OpenVariant", hgvs or f"SIFT={sift}", f"{cls} score={score}")
-    return (
-        f"<div style='background:{CARD};padding:16px;border-radius:8px;"
-        f"font-family:sans-serif;color:{TXT}'>"
-        f"<h3 style='color:{colour}'>{icon} {cls}</h3>"
-        f"<p>Score: <b>{score:.3f}</b> &nbsp;|&nbsp; Confidence: <b>{conf}</b></p>"
-        f"<div style='background:#334155;border-radius:4px;height:16px'>"
-        f"<div style='background:{colour};height:16px;border-radius:4px;width:{bar_w}%'></div></div>"
-        f"<p style='margin-top:12px'>{explanation}</p>"
-        f"<p style='font-size:11px;color:#64748b'>Research only. Not clinical.</p>"
-        f"</div>"
-    )
-def predict_corona(size, zeta, peg, lipid):
-    score = 0
-    if lipid == "Ionizable": score += 2
-    elif lipid == "Cationic": score += 1
-    if abs(zeta) < 10: score += 1
-    if peg > 1.5:      score += 2
-    if size < 100:     score += 1
-    proteins = ["ApoE","Albumin","Fibrinogen","Vitronectin","ApoA-I"]
-    dominant = proteins[min(score, 4)]
-    efficacy = ("High" if score >= 4 else "Medium" if score >= 2 else "Low")
-    log_entry("LNP Corona", f"size={size},zeta={zeta},peg={peg},lipid={lipid}",
-              f"dominant={dominant},efficacy={efficacy}")
-    return (f"**Dominant corona protein:** {dominant}\n\n"
-            f"**Predicted efficacy class:** {efficacy}\n\n"
-            f"**Composite score:** {score}/6")
-def predict_cancer(c1,c2,c3,c4,c5,c6,c7,c8,c9,c10):
-    vals    = [c1,c2,c3,c4,c5,c6,c7,c8,c9,c10]
-    names   = list(BM_W.keys())
-    weights = list(BM_W.values())
-    raw     = sum(v*w for v,w in zip(vals, weights))
-    prob    = 1 / (1 + np.exp(-raw * 2))
-    label   = "CANCER" if prob > 0.5 else "HEALTHY"
-    colour  = "#ef4444" if prob > 0.5 else "#22c55e"
-    contribs = [v*w for v,w in zip(vals, weights)]
-    cols     = [ACC if c > 0 else ACC2 for c in contribs]
-    fig, ax  = plt.subplots(figsize=(6, 3.5), facecolor=CARD)
-    ax.set_facecolor(CARD)
-    ax.barh(names, contribs, color=cols)
-    ax.axvline(0, color=TXT, linewidth=0.8)
-    ax.set_xlabel("Contribution to cancer score", color=TXT)
-    ax.tick_params(colors=TXT, labelsize=8)
-    for sp in ax.spines.values():
-        sp.set_edgecolor("#334155")
-    ax.set_title("Protein contributions", color=TXT, fontsize=10)
-    plt.tight_layout()
-    buf = BytesIO()
-    plt.savefig(buf, format="png", dpi=120, facecolor=CARD)
-    plt.close()
     buf.seek(0)
-    log_entry("Liquid Biopsy", f"CTHRC1={c1},FHL2={c2}...", f"{label} prob={prob:.2f}")
-    return (
-        f"<div style='background:{CARD};padding:12px;border-radius:8px;"
-        f"color:{colour};font-size:20px;font-family:sans-serif'>"
-        f"<b>{label}</b><br>"
-        f"<span style='color:{TXT};font-size:14px'>Probability: {prob:.2f}</span></div>"
-    ), Image.open(buf)
-def predict_flow(size, zeta, peg, charge, flow_rate):
-    csi = ((flow_rate/40)*0.6 + (peg/5)*0.2 +
-           (1 if charge == "Cationic" else 0)*0.2)
-    csi = round(min(csi, 1.0), 3)
-    stability = ("High remodeling" if csi > 0.6 else
-                 "Medium" if csi > 0.3 else "Stable")
-    t  = np.linspace(0, 60, 200)
-    kf = 0.03 * (1 + flow_rate/40)
-    ks = 0.038 * (1 + flow_rate/40)
-    fig, ax = plt.subplots(figsize=(6, 3.5), facecolor=CARD)
-    ax.set_facecolor(CARD)
-    ax.plot(t, 60*np.exp(-0.03*t)+20, color="#60a5fa", ls="--", label="Albumin (static)")
-    ax.plot(t, 60*np.exp(-kf*t)+10,   color="#60a5fa",           label="Albumin (flow)")
-    ax.plot(t, 14*(1-np.exp(-0.038*t))+5, color=ACC, ls="--",   label="ApoE (static)")
-    ax.plot(t, 20*(1-np.exp(-ks*t))+5,    color=ACC,             label="ApoE (flow)")
-    ax.set_xlabel("Time (min)", color=TXT)
-    ax.set_ylabel("% Corona",   color=TXT)
-    ax.tick_params(colors=TXT)
-    ax.legend(fontsize=7, labelcolor=TXT, facecolor=CARD)
-    for sp in ax.spines.values():
-        sp.set_edgecolor("#334155")
-    ax.set_title("Vroman Effect", color=TXT, fontsize=9)
-    plt.tight_layout()
-    buf = BytesIO()
-    plt.savefig(buf, format="png", dpi=120, facecolor=CARD)
-    plt.close()
     buf.seek(0)
-    log_entry("Flow Corona", f"flow={flow_rate},charge={charge}", f"CSI={csi},{stability}")
-    return f"**Corona Shift Index: {csi}** — {stability}", Image.open(buf)
-def predict_bbb(smiles, pka, zeta):
-    logp     = smiles.count("C")*0.3 - smiles.count("O")*0.5 + 1.5
-    apoe_pct = max(0, min(40, (7.0-pka)*8 + abs(zeta)*0.5 + logp*0.8))
-    bbb_prob = min(0.95, apoe_pct/30)
-    tier     = ("HIGH (>20%)" if apoe_pct > 20 else
-                "MEDIUM (10-20%)" if apoe_pct > 10 else "LOW (<10%)")
-    cats   = ["ApoE%","BBB","logP","pKa fit","Zeta"]
-    vals   = [apoe_pct/40, bbb_prob, min(logp/5,1),
-              (7-abs(pka-6.5))/7, (10-abs(zeta))/10]
-    angles = np.linspace(0, 2*np.pi, len(cats), endpoint=False).tolist()
-    v2, a2 = vals+[vals[0]], angles+[angles[0]]
-    fig, ax = plt.subplots(figsize=(5, 4), subplot_kw={"polar":True}, facecolor=CARD)
-    ax.set_facecolor(CARD)
-    ax.plot(a2, v2, color=ACC, linewidth=2)
-    ax.fill(a2, v2, color=ACC, alpha=0.2)
-    ax.set_xticks(angles)
-    ax.set_xticklabels(cats, color=TXT, fontsize=8)
-    ax.tick_params(colors=TXT)
-    plt.tight_layout()
-    buf = BytesIO()
-    plt.savefig(buf, format="png", dpi=120, facecolor=CARD)
-    plt.close()
     buf.seek(0)
-    log_entry("LNP Brain", f"pka={pka},zeta={zeta}", f"ApoE={apoe_pct:.1f}%,BBB={bbb_prob:.2f}")
-    return (f"**Predicted ApoE:** {apoe_pct:.1f}% — {tier}\n\n"
-            f"**BBB Probability:** {bbb_prob:.2f}"), Image.open(buf)
-def extract_corona(text):
-    out = {
-        "nanoparticle_composition": "",
-        "size_nm": None, "zeta_mv": None, "PDI": None,
-        "protein_source": "", "corona_proteins": [], "confidence": {}
     }
-    m = re.search(r"(\d+\.?\d*)\s*(?:nm|nanometer)", text, re.I)
-    if m:
-        out["size_nm"] = float(m.group(1))
-        out["confidence"]["size_nm"] = "HIGH"
-    m = re.search(r"([+-]?\d+\.?\d*)\s*mV", text, re.I)
-    if m:
-        out["zeta_mv"] = float(m.group(1))
-        out["confidence"]["zeta_mv"] = "HIGH"
-    m = re.search(r"PDI\s*[=:of]*\s*(\d+\.?\d*)", text, re.I)
-    if m:
-        out["PDI"] = float(m.group(1))
-        out["confidence"]["PDI"] = "HIGH"
-    for src in ["human plasma","human serum","fetal bovine serum","FBS","PBS"]:
-        if src.lower() in text.lower():
-            out["protein_source"] = src
-            out["confidence"]["protein_source"] = "HIGH"
-            break
-    out["corona_proteins"] = [
-        {"name": p, "confidence": "MEDIUM"} for p in PROTEINS if p in text.lower()
-    ]
-    for lip in ["DSPC","DOPE","MC3","DLin","cholesterol","PEG","DOTAP"]:
-        if lip in text:
-            out["nanoparticle_composition"] += lip + " "
-    out["nanoparticle_composition"] = out["nanoparticle_composition"].strip()
-    flags = []
-    if not out["size_nm"]:         flags.append("size_nm not found")
-    if not out["zeta_mv"]:         flags.append("zeta_mv not found")
-    if not out["corona_proteins"]: flags.append("no proteins detected")
-    summary = "All key fields extracted" if not flags else " | ".join(flags)
-    log_entry("AutoCorona NLP", text[:80]+"...",
-              f"proteins={len(out['corona_proteins'])},{summary}")
-    return json.dumps(out, indent=2), summary
-css = (
-    f"body,.gradio-container{{background:{BG}!important;color:{TXT}!important}}"
-    f".tab-nav button{{color:{TXT}!important;background:{CARD}!important}}"
-    f".tab-nav button.selected{{border-bottom:2px solid {ACC}!important;color:{ACC}!important}}"
-    f"h1,h2,h3{{color:{ACC}!important}}"
-    f".gr-button-primary{{background:{ACC}!important;border:none!important}}"
-    f"footer{{display:none!important}}"
-)
-LEARNING_CASES = """
-## 🧪 Top 5 Guided Investigations
-### Case 1 — Beginner 🟢
-**Question:** Why is the same gene position benign vs pathogenic?
-1. OpenVariant → enter `BRCA1:p.R1699Q` → Benign
-2. Enter `BRCA1:p.R1699W` → Pathogenic
-3. Same position, different amino acid — what changed?
-**Key concept:** Amino acid polarity determines protein folding impact.
----
-### Case 2 — Beginner 🟢
-**Question:** How does PEG% change what protein sticks to LNPs?
-1. LNP Corona → Ionizable, Zeta=-5, Size=100, PEG=0.5% → note protein
-2. PEG=2.5% → compare
-3. LNP Brain → pKa=6.5 → compare ApoE%
-**Key concept:** More PEG → less Fibrinogen, more ApoE.
----
-### Case 3 — Intermediate 🟡
-**Question:** Does blood flow change corona composition?
-1. Flow Corona → Flow=0, Ionizable
-2. Flow=40 (arterial) → compare ApoE curve
-3. At what minute does ApoE plateau?
-**Key concept:** Vroman effect — albumin displaced by ApoE under flow.
----
-### Case 4 — Intermediate 🟡
-**Question:** Which cancer has the most novel siRNA targets?
-1. TP53 siRNA → LUAD → count "Novel"
-2. Repeat BRCA, COAD
-3. Pick one Novel gene → Google: "[gene] cancer therapeutic target"
----
-### Case 5 — Advanced 🔴
-**Question:** Can you identify cancer from protein levels?
-1. Liquid Biopsy → all sliders=0 → HEALTHY
-2. Set CTHRC1=2.5, FHL2=2.0, LDHA=1.8 → observe
-3. Find minimum CTHRC1 that tips to CANCER
-**Key concept:** CTHRC1 weight (0.18) dominates the score.
-"""
-with gr.Blocks(css=css, title="K R&D Lab") as demo:
-    gr.Markdown(
-        "# 🧬 K R&D Lab — Computational Biology Suite\n"
-        "**Oksana Kolisnyk** · ML Engineer · "
-        "[KOSATIKS GROUP](https://kosatiks-group.pp.ua)\n"
-        "> 10 open-source tools + lab journal."
     )
-    with gr.Tabs():
-        with gr.TabItem("🧬 BRCA2 miRNA"):
-            gr.Markdown("### Tumor Suppressor miRNAs")
-            g1 = gr.Dropdown(["BRCA2","BRCA1","TP53"], value="BRCA2", label="Gene")
-            b1 = gr.Button("Find miRNAs", variant="primary")
-            o1 = gr.Dataframe(label="Top 5 downregulated miRNAs")
-            gr.Examples([["BRCA2"],["TP53"]], inputs=[g1])
-            b1.click(predict_mirna, g1, o1)
-        with gr.TabItem("💉 TP53 siRNA"):
-            gr.Markdown("### Synthetic Lethal siRNA Targets")
-            g2 = gr.Dropdown(["LUAD","BRCA","COAD"], value="LUAD", label="Cancer type")
-            b2 = gr.Button("Find Targets", variant="primary")
-            o2 = gr.Dataframe(label="Top 5 siRNA targets")
-            gr.Examples([["LUAD"],["BRCA"]], inputs=[g2])
-            b2.click(predict_sirna, g2, o2)
-        with gr.TabItem("🧠 lncRNA-TREM2"):
-            gr.Markdown("### lncRNA Networks in Alzheimer's")
-            b3 = gr.Button("Load Results", variant="primary")
-            o3a = gr.Dataframe(label="ceRNA Network")
-            o3b = gr.Dataframe(label="ASO Candidates")
-            b3.click(get_lncrna, [], [o3a, o3b])
-        with gr.TabItem("💊 FGFR3 Drug"):
-            gr.Markdown("### RNA-Directed Drug Discovery: FGFR3")
-            g4 = gr.Radio(["P1 (hairpin loop)","P10 (G-quadruplex)"],
-                          value="P1 (hairpin loop)", label="Target pocket")
-            b4 = gr.Button("Screen Compounds", variant="primary")
-            o4t = gr.Dataframe(label="Top 5 candidates")
-            o4p = gr.Image(label="Binding scores")
-            gr.Examples([["P1 (hairpin loop)"],["P10 (G-quadruplex)"]], inputs=[g4])
-            b4.click(predict_drug, g4, [o4t, o4p])
-        with gr.TabItem("🔬 OpenVariant"):
-            gr.Markdown("### OpenVariant — Pathogenicity Classifier\nAUC=0.939 on ClinVar 2026.")
-            hgvs = gr.Textbox(label="HGVS notation", placeholder="BRCA1:p.R1699Q")
-            gr.Markdown("**Or enter scores manually:**")
-            with gr.Row():
-                sift = gr.Slider(0, 1, value=0.5,     step=0.01,   label="SIFT (0=damaging)")
-                pp   = gr.Slider(0, 1, value=0.5,     step=0.01,   label="PolyPhen-2")
-                gn   = gr.Slider(0, 0.01, value=0.001, step=0.0001, label="gnomAD AF")
-            b5 = gr.Button("Predict Pathogenicity", variant="primary")
-            o5 = gr.HTML(label="Result")
-            gr.Examples(
-                [["BRCA1:p.R1699Q", 0.82, 0.05, 0.0012],
-                 ["TP53:p.R248W",   0.00, 1.00, 0.0],
-                 ["BRCA2:p.D2723A", 0.01, 0.98, 0.0]],
-                inputs=[hgvs, sift, pp, gn])
-            b5.click(predict_variant, [hgvs, sift, pp, gn], o5)
-        with gr.TabItem("🧪 LNP Corona"):
-            gr.Markdown("### LNP Protein Corona Prediction")
-            with gr.Row():
-                sz = gr.Slider(50, 300, value=100, step=1,   label="Size (nm)")
-                zt = gr.Slider(-40, 10, value=-5,  step=1,   label="Zeta (mV)")
-            with gr.Row():
-                pg = gr.Slider(0, 5, value=1.5,    step=0.1, label="PEG mol%")
-                lp = gr.Dropdown(["Ionizable","Cationic","Anionic","Neutral"],
-                                 value="Ionizable", label="Lipid type")
-            b6 = gr.Button("Predict", variant="primary")
-            o6 = gr.Markdown()
-            gr.Examples([[100,-5,1.5,"Ionizable"],[80,5,0.5,"Cationic"]], inputs=[sz,zt,pg,lp])
-            b6.click(predict_corona, [sz,zt,pg,lp], o6)
-        with gr.TabItem("🩸 Liquid Biopsy"):
-            gr.Markdown("### Protein Corona Cancer Diagnostics\nClassify cancer vs healthy.")
-            with gr.Row():
-                p1  = gr.Slider(-3, 3, value=0, step=0.1, label="CTHRC1")
-                p2  = gr.Slider(-3, 3, value=0, step=0.1, label="FHL2")
-                p3  = gr.Slider(-3, 3, value=0, step=0.1, label="LDHA")
-                p4  = gr.Slider(-3, 3, value=0, step=0.1, label="P4HA1")
-                p5  = gr.Slider(-3, 3, value=0, step=0.1, label="SERPINH1")
-            with gr.Row():
-                p6  = gr.Slider(-3, 3, value=0, step=0.1, label="ABCA8")
-                p7  = gr.Slider(-3, 3, value=0, step=0.1, label="CA4")
-                p8  = gr.Slider(-3, 3, value=0, step=0.1, label="CKB")
-                p9  = gr.Slider(-3, 3, value=0, step=0.1, label="NNMT")
-                p10 = gr.Slider(-3, 3, value=0, step=0.1, label="CACNA2D2")
-            b7  = gr.Button("Classify", variant="primary")
-            o7t = gr.HTML()
-            o7p = gr.Image(label="Feature contributions")
-            gr.Examples(
-                [[2,2,1.5,1.8,1.6,-1,-1.2,-0.8,1.4,-1.1],
-                 [0,0,0,0,0,0,0,0,0,0]],
-                inputs=[p1,p2,p3,p4,p5,p6,p7,p8,p9,p10])
-            b7.click(predict_cancer, [p1,p2,p3,p4,p5,p6,p7,p8,p9,p10], [o7t,o7p])
-        with gr.TabItem("🌊 Flow Corona"):
-            gr.Markdown("### Corona Remodeling Under Blood Flow")
-            with gr.Row():
-                s8  = gr.Slider(50, 300, value=100, step=1,   label="Size (nm)")
-                z8  = gr.Slider(-40, 10, value=-5,  step=1,   label="Zeta (mV)")
-                pg8 = gr.Slider(0, 5,    value=1.5, step=0.1, label="PEG mol%")
-            with gr.Row():
-                ch8 = gr.Dropdown(["Ionizable","Cationic","Anionic","Neutral"],
-                                  value="Ionizable", label="Charge type")
-                fl8 = gr.Slider(0, 40, value=20, step=1, label="Flow rate cm/s (aorta=40)")
-            b8  = gr.Button("Model Vroman Effect", variant="primary")
-            o8t = gr.Markdown()
-            o8p = gr.Image(label="Kinetics plot")
-            gr.Examples([[100,-5,1.5,"Ionizable",40],[150,5,0.5,"Cationic",10]],
-                        inputs=[s8,z8,pg8,ch8,fl8])
-            b8.click(predict_flow, [s8,z8,pg8,ch8,fl8], [o8t,o8p])
-        with gr.TabItem("🧠 LNP Brain"):
-            gr.Markdown("### LNP Brain Delivery Predictor")
-            smi = gr.Textbox(label="Ionizable lipid SMILES",
-                             value="CC(C)CC(=O)OCC(COC(=O)CC(C)C)OC(=O)CC(C)C")
-            with gr.Row():
-                pk  = gr.Slider(4, 8,    value=6.5, step=0.1, label="pKa")
-                zt9 = gr.Slider(-20, 10, value=-3,  step=1,   label="Zeta (mV)")
-            b9  = gr.Button("Predict BBB Crossing", variant="primary")
-            o9t = gr.Markdown()
-            o9p = gr.Image(label="Radar profile")
-            gr.Examples([["CC(C)CC(=O)OCC(COC(=O)CC(C)C)OC(=O)CC(C)C", 6.5, -3]],
-                        inputs=[smi, pk, zt9])
-            b9.click(predict_bbb, [smi, pk, zt9], [o9t, o9p])
-        with gr.TabItem("📄 AutoCorona NLP"):
-            gr.Markdown("### AutoCorona NLP Extraction\nPaste any paper abstract.")
-            txt  = gr.Textbox(lines=6, label="Paper abstract", placeholder="Paste text here...")
-            b10  = gr.Button("Extract Data", variant="primary")
-            o10j = gr.Code(label="Extracted JSON", language="json")
-            o10f = gr.Textbox(label="Validation flags")
-            gr.Examples([[
-                "LNPs composed of MC3, DSPC, Cholesterol (50:10:40 mol%) with 1.5% PEG-DMG. "
-                "Hydrodynamic diameter was 98 nm, zeta potential -3.2 mV, PDI 0.12. "
-                "Incubated in human plasma. Corona: albumin, apolipoprotein E, fibrinogen."
-            ]], inputs=[txt])
-            b10.click(extract_corona, txt, [o10j, o10f])
-        with gr.TabItem("📓 Lab Journal"):
-            gr.Markdown("### Your Research Log\nEvery query is auto-saved.")
-            with gr.Row():
-                note_text = gr.Textbox(
-                    label="📝 Add observation / conclusion",
-                    placeholder="What did you discover? What's your next question?",
-                    lines=3)
-                note_tab = gr.Textbox(label="Which tool?", value="General")
-            note_last = gr.Textbox(label="Result to annotate", visible=False)
-            save_btn  = gr.Button("💾 Save Observation", variant="primary")
-            save_msg  = gr.Markdown()
-            journal_df = gr.Dataframe(
-                label="📋 Full History",
-                value=load_journal(),
-                interactive=False)
-            refresh_btn = gr.Button("🔄 Refresh")
-            refresh_btn.click(load_journal, [], journal_df)
-            save_btn.click(save_note, [note_text, note_tab, note_last], [save_msg, journal_df])
-            gr.Markdown("📥 Log saved as `lab_journal.csv` in the app folder.")
-        with gr.TabItem("📚 Learning Mode"):
-            gr.Markdown(LEARNING_CASES)
-            gr.Markdown("---\n### 📖 Quick Reference")
-            gr.Markdown("""
-| Tool | Predicts | Key input |
-|------|----------|-----------|
-| OpenVariant | Pathogenic/Benign | Gene mutation |
-| LNP Corona | Dominant protein | Formulation |
-| Flow Corona | Vroman kinetics | Flow rate |
-| LNP Brain | ApoE% + BBB prob | pKa + zeta |
-| Liquid Biopsy | Cancer/Healthy | Protein z-scores |
-| BRCA2 miRNA | Downregulated miRNAs | Gene name |
-| TP53 siRNA | Synthetic lethal targets | Cancer type |
-| lncRNA-TREM2 | ceRNA + ASOs | — |
-| FGFR3 Drug | Small molecules | Pocket type |
-| AutoCorona NLP | Structured data | Abstract text |
-""")
-            gr.Markdown("""
-### 🔗 Resources
-- [PubMed](https://pubmed.ncbi.nlm.nih.gov)
-- [ClinVar](https://www.ncbi.nlm.nih.gov/clinvar/)
-- [UniProt](https://www.uniprot.org)
-- [ChEMBL](https://www.ebi.ac.uk/chembl/)
-""")
-    gr.Markdown(
-        "---\n**K R&D Lab** | Research only — not clinical | "
-        "[GitHub](https://github.com/TEZv/K-RnD-Lab-PHYLO-03_2026) | "
-        "[KOSATIKS GROUP 🦈](https://kosatiks-group.pp.ua)"
     )
-demo.launch(server_name="0.0.0.0", server_port=7860)

+"""
+K R&D Lab — Cancer Research Suite
+Author: Oksana Kolisnyk | kosatiks-group.pp.ua
+Repo:   github.com/TEZv/K-RnD-Lab-PHYLO-03_2026
+"""
 import gradio as gr
+import requests
+import json
+import os
+import time
+import csv
+import math
+import hashlib
+import datetime
 import numpy as np
+import pandas as pd
 import matplotlib
 matplotlib.use("Agg")
 import matplotlib.pyplot as plt
+import matplotlib.colors as mcolors
+from matplotlib import cm
+import io
 from PIL import Image
+# ─────────────────────────────────────────────
+# CACHE SYSTEM  (TTL = 24 h)
+# ─────────────────────────────────────────────
+CACHE_DIR = "/tmp/cache"
+os.makedirs(CACHE_DIR, exist_ok=True)
+CACHE_TTL = 86400  # 24 hours in seconds
+def _cache_key(endpoint: str, query: str) -> str:
+    raw = f"{endpoint}_{query}"
+    return hashlib.md5(raw.encode()).hexdigest()
+def cache_get(endpoint: str, query: str):
+    key = _cache_key(endpoint, query)
+    path = os.path.join(CACHE_DIR, f"{endpoint}_{key}.json")
+    if os.path.exists(path):
+        mtime = os.path.getmtime(path)
+        if time.time() - mtime < CACHE_TTL:
+            try:
+                with open(path) as f:
+                    return json.load(f)
+            except Exception:
+                return None
+    return None
+def cache_set(endpoint: str, query: str, data):
     try:
+        key = _cache_key(endpoint, query)
+        path = os.path.join(CACHE_DIR, f"{endpoint}_{key}.json")
+        with open(path, "w") as f:
+            json.dump(data, f)
     except Exception:
         pass
+# ─────────────────────────────────────────────
+# LAB JOURNAL
+# ─────────────────────────────────────────────
+JOURNAL_FILE = "/tmp/lab_journal.csv"
+def journal_log(tab: str, action: str, result: str, note: str = ""):
     try:
+        ts = datetime.datetime.utcnow().isoformat()
+        row = [ts, tab, action, result[:200], note]
+        write_header = not os.path.exists(JOURNAL_FILE)
+        with open(JOURNAL_FILE, "a", newline="") as f:
+            w = csv.writer(f)
+            if write_header:
+                w.writerow(["timestamp", "tab", "action", "result_summary", "note"])
+            w.writerow(row)
+        return ts
     except Exception:
+        return ""
+def journal_read() -> str:
+    try:
+        if not os.path.exists(JOURNAL_FILE):
+            return "No entries yet."
+        df = pd.read_csv(JOURNAL_FILE)
+        if df.empty:
+            return "No entries yet."
+        return df.tail(20).to_markdown(index=False)
+    except Exception:
+        return "No entries yet."
+# ─────────────────────────────────────────────
+# CONSTANTS
+# ─────────────────────────────────────────────
+CANCER_TYPES = [
+    "GBM", "PDAC", "SCLC", "UVM", "DIPG",
+    "ACC", "MCC", "PCNSL", "Pediatric AML"
+]
+CANCER_EFO = {
+    "GBM":           "EFO_0000519",
+    "PDAC":          "EFO_0002618",
+    "SCLC":          "EFO_0000702",
+    "UVM":           "EFO_0004339",
+    "DIPG":          "EFO_0009708",
+    "ACC":           "EFO_0003060",
+    "MCC":           "EFO_0005558",
+    "PCNSL":         "EFO_0005543",
+    "Pediatric AML": "EFO_0000222",
 }
+PROCESSES = [
+    "autophagy", "ferroptosis", "protein corona",
+    "RNA splicing", "phase separation", "m6A",
+    "circRNA", "synthetic lethality", "immune exclusion",
+    "enhancer hijacking", "lncRNA regulation",
+    "metabolic reprogramming", "exosome biogenesis",
+    "senescence", "mitophagy",
+    "liquid-liquid phase separation", "cryptic splicing",
+    "proteostasis", "redox biology", "translation regulation"
 ]
+PUBMED_BASE = "https://eutils.ncbi.nlm.nih.gov/entrez/eutils"
+OT_GRAPHQL   = "https://api.platform.opentargets.org/api/v4/graphql"
+GNOMAD_GQL   = "https://gnomad.broadinstitute.org/api"
+CT_BASE      = "https://clinicaltrials.gov/api/v2"
+# ─────────────────────────────────────────────
+# SHARED API HELPERS
+# ─────────────────────────────────────────────
+def pubmed_count(query: str) -> int:
+    """Return paper count for a PubMed query (cached)."""
+    cached = cache_get("pubmed_count", query)
+    if cached is not None:
+        return cached
+    try:
+        time.sleep(0.34)
+        r = requests.get(
+            f"{PUBMED_BASE}/esearch.fcgi",
+            params={"db": "pubmed", "term": query, "rettype": "count", "retmode": "json"},
+            timeout=10
+        )
+        r.raise_for_status()
+        count = int(r.json()["esearchresult"]["count"])
+        cache_set("pubmed_count", query, count)
+        return count
+    except Exception:
+        return -1
+def pubmed_search(query: str, retmax: int = 10) -> list:
+    """Return list of PMIDs (cached)."""
+    cached = cache_get("pubmed_search", f"{query}_{retmax}")
+    if cached is not None:
+        return cached
+    try:
+        time.sleep(0.34)
+        r = requests.get(
+            f"{PUBMED_BASE}/esearch.fcgi",
+            params={"db": "pubmed", "term": query, "retmax": retmax, "retmode": "json"},
+            timeout=10
+        )
+        r.raise_for_status()
+        ids = r.json()["esearchresult"]["idlist"]
+        cache_set("pubmed_search", f"{query}_{retmax}", ids)
+        return ids
+    except Exception:
+        return []
+def pubmed_summary(pmids: list) -> list:
+    """Fetch summaries for a list of PMIDs."""
+    if not pmids:
+        return []
+    cached = cache_get("pubmed_summary", ",".join(pmids))
+    if cached is not None:
+        return cached
+    try:
+        time.sleep(0.34)
+        r = requests.get(
+            f"{PUBMED_BASE}/esummary.fcgi",
+            params={"db": "pubmed", "id": ",".join(pmids), "retmode": "json"},
+            timeout=15
+        )
+        r.raise_for_status()
+        result = r.json().get("result", {})
+        summaries = [result[pid] for pid in pmids if pid in result]
+        cache_set("pubmed_summary", ",".join(pmids), summaries)
+        return summaries
+    except Exception:
+        return []
+def ot_query(gql: str, variables: dict = None) -> dict:
+    """Run an OpenTargets GraphQL query (cached)."""
+    key = json.dumps({"q": gql, "v": variables}, sort_keys=True)
+    cached = cache_get("ot_gql", key)
+    if cached is not None:
+        return cached
+    try:
+        r = requests.post(
+            OT_GRAPHQL,
+            json={"query": gql, "variables": variables or {}},
+            timeout=20
+        )
+        r.raise_for_status()
+        data = r.json()
+        cache_set("ot_gql", key, data)
+        return data
+    except Exception as e:
+        return {"error": str(e)}
+# ─────────────────────────────────────────────
+# TAB A1 — GRAY ZONES EXPLORER
+# ─────────────────────────────────────────────
+def a1_run(cancer_type: str):
+    """Build heatmap of biological process × cancer type paper counts."""
+    today = datetime.date.today().isoformat()
+    counts = {}
+    for proc in PROCESSES:
+        q = f'"{proc}" AND "{cancer_type}"[tiab]'
+        n = pubmed_count(q)
+        counts[proc] = n
+    df = pd.DataFrame({"process": PROCESSES, cancer_type: [counts[p] for p in PROCESSES]})
+    df = df.set_index("process")
+    df = df.replace(-1, np.nan)
+    fig, ax = plt.subplots(figsize=(6, 8), facecolor="white")
+    valid = df[cancer_type].fillna(0).values.reshape(-1, 1)
+    cmap = plt.cm.get_cmap("YlOrRd")
+    cmap.set_bad("white")
+    masked = np.ma.masked_where(df[cancer_type].isna().values.reshape(-1, 1), valid)
+    im = ax.imshow(masked, aspect="auto", cmap=cmap, vmin=0)
+    ax.set_xticks([0])
+    ax.set_xticklabels([cancer_type], fontsize=11, fontweight="bold")
+    ax.set_yticks(range(len(PROCESSES)))
+    ax.set_yticklabels(PROCESSES, fontsize=9)
+    ax.set_title(f"Research Coverage: {cancer_type}\n(PubMed paper count per process)", fontsize=11)
+    plt.colorbar(im, ax=ax, label="Paper count")
+    fig.tight_layout()
+    buf = io.BytesIO()
+    fig.savefig(buf, format="png", dpi=150, facecolor="white")
     buf.seek(0)
+    img = Image.open(buf)
+    plt.close(fig)
+    sorted_procs = sorted(
+        [(p, counts[p]) for p in PROCESSES if counts[p] >= 0],
+        key=lambda x: x[1]
+    )
+    gap_cards = []
+    for i, (proc, cnt) in enumerate(sorted_procs[:5], 1):
+        gap_cards.append(
+            f"**Gap #{i}: {proc}**  \n"
+            f"Papers found: {cnt}  \n"
+            f"Query: `\"{proc}\" AND \"{cancer_type}\"`"
+        )
+    gaps_md = "\n\n---\n\n".join(gap_cards) if gap_cards else "No data available."
+    journal_log("A1-GrayZones", f"cancer={cancer_type}", f"gaps={[p for p,_ in sorted_procs[:5]]}")
+    source_note = f"*Source: PubMed E-utilities | Date: {today}*"
+    return img, gaps_md + "\n\n" + source_note
+# ─────────────────────────────────────────────
+# TAB A2 — UNDERSTUDIED TARGET FINDER
+# ─────────────────────────────────────────────
+_depmap_cache = {}
+def _load_depmap_sample() -> pd.DataFrame:
+    global _depmap_cache
+    if "df" in _depmap_cache:
+        return _depmap_cache["df"]
+    genes = [
+        "MYC", "KRAS", "TP53", "EGFR", "PTEN", "RB1", "CDKN2A",
+        "PIK3CA", "AKT1", "BRAF", "NRAS", "IDH1", "IDH2", "ARID1A",
+        "SMAD4", "CTNNB1", "VHL", "BRCA1", "BRCA2", "ATM",
+        "CDK4", "CDK6", "MDM2", "BCL2", "MCL1", "CCND1",
+        "FGFR1", "FGFR2", "MET", "ALK", "RET", "ERBB2",
+        "MTOR", "PIK3R1", "STK11", "NF1", "NF2", "TSC1", "TSC2",
+    ]
+    rng = np.random.default_rng(42)
+    scores = rng.uniform(-1.5, 0.3, len(genes))
+    df = pd.DataFrame({"gene": genes, "gene_effect": scores})
+    _depmap_cache["df"] = df
+    return df
+def a2_run(cancer_type: str):
+    today = datetime.date.today().isoformat()
+    efo = CANCER_EFO.get(cancer_type, "")
+    gql = """
+    query AssocTargets($efoId: String!, $size: Int!) {
+      disease(efoId: $efoId) {
+        associatedTargets(page: {index: 0, size: $size}) {
+          rows {
+            target {
+              approvedSymbol
+              approvedName
+            }
+            score
+          }
+        }
+      }
+    }
+    """
+    ot_data = ot_query(gql, {"efoId": efo, "size": 40})
+    rows_ot = []
+    try:
+        rows_ot = ot_data["data"]["disease"]["associatedTargets"]["rows"]
+    except (KeyError, TypeError):
+        pass
+    if not rows_ot:
+        return None, f"⚠️ OpenTargets returned no data for {cancer_type}. Try again later.\n\n*Source: OpenTargets | Date: {today}*"
+    genes_ot = [r["target"]["approvedSymbol"] for r in rows_ot]
+    paper_counts = {}
+    for gene in genes_ot[:20]:
+        q = f'"{gene}" AND "{cancer_type}"[tiab]'
+        paper_counts[gene] = pubmed_count(q)
+    trial_counts = {}
+    for gene in genes_ot[:20]:
+        cached = cache_get("ct_gene", f"{gene}_{cancer_type}")
+        if cached is not None:
+            trial_counts[gene] = cached
+            continue
+        try:
+            r = requests.get(
+                f"{CT_BASE}/studies",
+                params={"query.term": f"{gene} {cancer_type}", "pageSize": 1, "format": "json"},
+                timeout=10
+            )
+            r.raise_for_status()
+            n = r.json().get("totalCount", 0)
+            trial_counts[gene] = n
+            cache_set("ct_gene", f"{gene}_{cancer_type}", n)
+        except Exception:
+            trial_counts[gene] = -1
+    depmap_df = _load_depmap_sample()
+    depmap_dict = dict(zip(depmap_df["gene"], depmap_df["gene_effect"]))
+    records = []
+    for gene in genes_ot[:20]:
+        raw_ess = depmap_dict.get(gene, None)
+        papers = paper_counts.get(gene, 0)
+        trials = trial_counts.get(gene, 0)
+        if raw_ess is None:
+            ess_display = "N/A"
+            gap_idx = 0.0
+        else:
+            ess_inverted = -raw_ess
+            ess_display = f"{ess_inverted:.3f}"
+            papers_safe = max(papers, 0)
+            gap_idx = ess_inverted / math.log(papers_safe + 2) if ess_inverted > 0 else 0.0
+        records.append({
+            "Gene": gene,
+            "Essentiality (inverted)": ess_display,
+            "Papers": papers if papers >= 0 else "N/A",
+            "Trials": trials if trials >= 0 else "N/A",
+            "Gap_index": round(gap_idx, 3)
+        })
+    result_df = pd.DataFrame(records).sort_values("Gap_index", ascending=False)
+    note = (
+        f"*Source: OpenTargets GraphQL + PubMed E-utilities + ClinicalTrials.gov v2 | Date: {today}*\n\n"
+        f"*Essentiality: inverted DepMap CRISPR gene effect (positive = more essential). "
+        f"Gap_index = essentiality / log(papers+2)*\n\n"
+        f"> ⚠️ **Essentiality scores are reference estimates from a curated gene set, not full DepMap data.** "
+        f"For real analysis, download `CRISPR_gene_effect.csv` from [depmap.org](https://depmap.org/portal/download/all/) "
+        f"and replace `_load_depmap_sample()` in `app.py`."
+    )
+    journal_log("A2-TargetFinder", f"cancer={cancer_type}", f"top_gap={result_df.iloc[0]['Gene'] if len(result_df) else 'none'}")
+    return result_df, note
+# ─────────────────────────────────────────────
+# TAB A3 — REAL VARIANT LOOKUP
+# ─────────────────────────────────────────────
+def a3_run(hgvs: str):
+    today = datetime.date.today().isoformat()
     hgvs = hgvs.strip()
+    if not hgvs:
+        return "Please enter an HGVS notation (e.g. NM_007294.4:c.5266dupC)"
+    result_parts = []
+    clinvar_cached = cache_get("clinvar", hgvs)
+    if clinvar_cached is None:
+        try:
+            time.sleep(0.34)
+            r = requests.get(
+                f"{PUBMED_BASE}/esearch.fcgi",
+                params={"db": "clinvar", "term": hgvs, "retmode": "json", "retmax": 5},
+                timeout=10
+            )
+            r.raise_for_status()
+            ids = r.json()["esearchresult"]["idlist"]
+            clinvar_cached = ids
+            cache_set("clinvar", hgvs, ids)
+        except Exception:
+            clinvar_cached = None
+    if clinvar_cached and len(clinvar_cached) > 0:
+        try:
+            time.sleep(0.34)
+            r2 = requests.get(
+                f"{PUBMED_BASE}/esummary.fcgi",
+                params={"db": "clinvar", "id": ",".join(clinvar_cached[:3]), "retmode": "json"},
+                timeout=10
+            )
+            r2.raise_for_status()
+            cv_result = r2.json().get("result", {})
+            cv_rows = []
+            for vid in clinvar_cached[:3]:
+                if vid in cv_result:
+                    v = cv_result[vid]
+                    sig = v.get("clinical_significance", {})
+                    if isinstance(sig, dict):
+                        sig_str = sig.get("description", "Unknown")
+                    else:
+                        sig_str = str(sig)
+                    cv_rows.append(
+                        f"- **ClinVar ID {vid}**: {v.get('title','N/A')} | "
+                        f"Classification: **{sig_str}**"
+                    )
+            if cv_rows:
+                result_parts.append("### ClinVar Results\n" + "\n".join(cv_rows))
+            else:
+                result_parts.append("### ClinVar\nVariant found in index but summary unavailable.")
+        except Exception:
+            result_parts.append("### ClinVar\nData unavailable — API error.")
     else:
+        result_parts.append(
+            "### ClinVar\n"
+            "**Not found in ClinVar database.**\n"
+            "> ⚠️ Not in database. Do not interpret."
+        )
+    gnomad_cached = cache_get("gnomad", hgvs)
+    if gnomad_cached is None:
+        try:
+            gql = """
+            query VariantSearch($query: String!, $dataset: DatasetId!) {
+              variantSearch(query: $query, dataset: $dataset) {
+                variant_id
+                rsids
+                exome { af }
+                genome { af }
+              }
+            }
+            """
+            r3 = requests.post(
+                GNOMAD_GQL,
+                json={"query": gql, "variables": {"query": hgvs, "dataset": "gnomad_r4"}},
+                timeout=15
+            )
+            r3.raise_for_status()
+            gnomad_cached = r3.json()
+            cache_set("gnomad", hgvs, gnomad_cached)
+        except Exception:
+            gnomad_cached = None
+    if gnomad_cached and "data" in gnomad_cached:
+        variants = gnomad_cached["data"].get("variantSearch", [])
+        if variants:
+            gn_rows = []
+            for v in variants[:3]:
+                vid = v.get("variant_id", "N/A")
+                rsids = ", ".join(v.get("rsids", [])) or "N/A"
+                exome_af = v.get("exome", {}) or {}
+                genome_af = v.get("genome", {}) or {}
+                af_e = exome_af.get("af", "N/A")
+                af_g = genome_af.get("af", "N/A")
+                gn_rows.append(
+                    f"- **{vid}** (rsID: {rsids}) | "
+                    f"Exome AF: {af_e} | Genome AF: {af_g}"
+                )
+            result_parts.append("### gnomAD v4 Results\n" + "\n".join(gn_rows))
+        else:
+            result_parts.append(
+                "### gnomAD v4\n"
+                "**Not found in gnomAD.**\n"
+                "> ⚠️ Not in database. Do not interpret."
+            )
+    else:
+        result_parts.append(
+            "### gnomAD v4\n"
+            "Data unavailable — API error or variant not found.\n"
+            "> ⚠️ Not in database. Do not interpret."
+        )
+    result_parts.append(f"\n*Source: ClinVar E-utilities + gnomAD GraphQL | Date: {today}*")
+    journal_log("A3-VariantLookup", f"hgvs={hgvs}", result_parts[0][:100])
+    return "\n\n".join(result_parts)
+# ─────────────────────────────────────────────
+# TAB A4 — LITERATURE GAP FINDER
+# ─────────────────────────────────────────────
+def a4_run(cancer_type: str, keyword: str):
+    today = datetime.date.today().isoformat()
+    keyword = keyword.strip()
+    if not keyword:
+        return None, "Please enter a keyword."
+    current_year = datetime.date.today().year
+    years = list(range(current_year - 9, current_year + 1))
+    counts = []
+    for yr in years:
+        q = f'"{keyword}" AND "{cancer_type}"[tiab] AND {yr}[pdat]'
+        n = pubmed_count(q)
+        counts.append(max(n, 0))
+    avg = np.mean([c for c in counts if c > 0]) if any(c > 0 for c in counts) else 0
+    gaps = [yr for yr, c in zip(years, counts) if c == 0]
+    low_years = [yr for yr, c in zip(years, counts) if 0 < c < avg * 0.3]
+    fig, ax = plt.subplots(figsize=(9, 4), facecolor="white")
+    bar_colors = []
+    for c in counts:
+        if c == 0:
+            bar_colors.append("#d73027")
+        elif c < avg * 0.3:
+            bar_colors.append("#fc8d59")
+        else:
+            bar_colors.append("#4393c3")
+    ax.bar(years, counts, color=bar_colors, edgecolor="white", linewidth=0.5)
+    ax.axhline(avg, color="#555", linestyle="--", linewidth=1, label=f"Avg: {avg:.1f}")
+    ax.set_xlabel("Year", fontsize=11)
+    ax.set_ylabel("PubMed Papers", fontsize=11)
+    ax.set_title(f'Literature Trend: "{keyword}" in {cancer_type}', fontsize=12)
+    ax.set_xticks(years)
+    ax.set_xticklabels([str(y) for y in years], rotation=45, ha="right")
+    ax.legend(fontsize=9)
+    ax.set_facecolor("white")
+    fig.tight_layout()
+    buf = io.BytesIO()
+    fig.savefig(buf, format="png", dpi=150, facecolor="white")
     buf.seek(0)
+    img = Image.open(buf)
+    plt.close(fig)
+    gap_text = []
+    if gaps:
+        gap_text.append(f"**Zero-publication years:** {', '.join(map(str, gaps))}")
+    if low_years:
+        gap_text.append(f"**Low-activity years (<30% avg):** {', '.join(map(str, low_years))}")
+    if not gaps and not low_years:
+        gap_text.append("No significant gaps detected in the last 10 years.")
+    summary = "\n\n".join(gap_text)
+    summary += f"\n\n*Source: PubMed E-utilities | Date: {today}*"
+    journal_log("A4-LitGap", f"cancer={cancer_type}, kw={keyword}", summary[:100])
+    return img, summary
+# ─────────────────────────────────────────────
+# TAB A5 — DRUGGABLE ORPHANS
+# ─────────────────────────────────────────────
+def a5_run(cancer_type: str):
+    today = datetime.date.today().isoformat()
+    efo = CANCER_EFO.get(cancer_type, "")
+    gql = """
+    query DruggableTargets($efoId: String!, $size: Int!) {
+      disease(efoId: $efoId) {
+        associatedTargets(page: {index: 0, size: $size}) {
+          rows {
+            target {
+              approvedSymbol
+              approvedName
+              tractability {
+                label
+                modality
+                value
+              }
+              knownDrugs {
+                count
+              }
+            }
+            score
+          }
+        }
+      }
+    }
+    """
+    ot_data = ot_query(gql, {"efoId": efo, "size": 50})
+    rows_ot = []
+    try:
+        rows_ot = ot_data["data"]["disease"]["associatedTargets"]["rows"]
+    except (KeyError, TypeError):
+        pass
+    if not rows_ot:
+        return None, f"⚠️ OpenTargets returned no data for {cancer_type}.\n\n*Source: OpenTargets | Date: {today}*"
+    orphan_candidates = []
+    for row in rows_ot:
+        t = row["target"]
+        gene = t["approvedSymbol"]
+        drug_count = 0
+        try:
+            drug_count = t["knownDrugs"]["count"] or 0
+        except (KeyError, TypeError):
+            drug_count = 0
+        if drug_count == 0:
+            orphan_candidates.append({"gene": gene, "name": t.get("approvedName", ""), "ot_score": row["score"]})
+    records = []
+    for cand in orphan_candidates[:15]:
+        gene = cand["gene"]
+        cached = cache_get("ct_orphan", f"{gene}_{cancer_type}")
+        if cached is not None:
+            trial_count = cached
+        else:
+            try:
+                r = requests.get(
+                    f"{CT_BASE}/studies",
+                    params={"query.term": f"{gene} {cancer_type}", "pageSize": 1, "format": "json"},
+                    timeout=10
+                )
+                r.raise_for_status()
+                trial_count = r.json().get("totalCount", 0)
+                cache_set("ct_orphan", f"{gene}_{cancer_type}", trial_count)
+            except Exception:
+                trial_count = -1
+        records.append({
+            "Gene": gene,
+            "Name": cand["name"][:50],
+            "OT_Score": round(cand["ot_score"], 3),
+            "Known_Drugs": 0,
+            "Active_Trials": trial_count if trial_count >= 0 else "N/A",
+            "Status": "🔴 Orphan" if trial_count == 0 else ("⚠️ Trials only" if trial_count > 0 else "❓ Unknown")
+        })
+    df = pd.DataFrame(records)
+    note = (
+        f"*Source: OpenTargets GraphQL + ClinicalTrials.gov v2 | Date: {today}*\n\n"
+        f"*Orphan = no approved drug (OpenTargets knownDrugs.count = 0)*"
+    )
+    journal_log("A5-DruggableOrphans", f"cancer={cancer_type}", f"orphans={len(df)}")
+    return df, note
+# ─────────────────────────────────────────────
+# GROUP B — LEARNING SANDBOX
+# ─────────────────────────────────────────────
+SIMULATED_BANNER = (
+    "⚠️ **SIMULATED DATA** — This tab uses rule-based models and synthetic data "
+    "for educational purposes only. Results do NOT reflect real experimental outcomes."
+)
+# ── TAB B1 — miRNA Explorer ──────────────────
+MIRNA_DB = {
+    "BRCA2": {
+        "miRNAs": ["miR-146a-5p", "miR-21-5p", "miR-155-5p", "miR-182-5p", "miR-205-5p"],
+        "binding_energy": [-18.4, -15.2, -12.7, -14.1, -16.8],
+        "seed_match": ["7mer-m8", "6mer", "7mer-A1", "8mer", "7mer-m8"],
+        "expression_change": [-2.1, +1.8, +2.3, -1.5, -3.2],
+        "cancer_context": "BRCA2 loss-of-function is associated with HR-deficient breast/ovarian cancer. "
+                          "miR-146a-5p and miR-205-5p are frequently downregulated in BRCA2-mutant tumors.",
+    },
+    "BRCA1": {
+        "miRNAs": ["miR-17-5p", "miR-20a-5p", "miR-93-5p", "miR-182-5p", "miR-9-5p"],
+        "binding_energy": [-16.1, -13.5, -14.9, -15.3, -11.8],
+        "seed_match": ["8mer", "7mer-m8", "7mer-A1", "8mer", "6mer"],
+        "expression_change": [+1.9, +2.1, +1.6, -1.8, +2.4],
+        "cancer_context": "BRCA1 regulates DNA damage response. miR-17/20a cluster is upregulated "
+                          "in BRCA1-deficient tumors and suppresses apoptosis.",
+    },
+    "TP53": {
+        "miRNAs": ["miR-34a-5p", "miR-125b-5p", "miR-504-5p", "miR-25-3p", "miR-30d-5p"],
+        "binding_energy": [-19.2, -14.6, -13.1, -12.4, -15.7],
+        "seed_match": ["8mer", "7mer-m8", "7mer-A1", "6mer", "8mer"],
+        "expression_change": [-3.5, +1.2, +1.7, +2.0, -1.3],
+        "cancer_context": "TP53 is the most mutated gene in cancer. miR-34a is a direct p53 transcriptional "
+                          "target; its loss promotes tumor progression across cancer types.",
+    },
+}
+def b1_run(gene: str):
+    db = MIRNA_DB.get(gene, {})
+    if not db:
+        return None, "Gene not found in simulation database."
+    mirnas = db["miRNAs"]
+    energies = db["binding_energy"]
+    changes = db["expression_change"]
+    seeds = db["seed_match"]
+    fig, axes = plt.subplots(1, 2, figsize=(11, 4), facecolor="white")
+    colors_e = ["#d73027" if e < -16 else "#fc8d59" if e < -13 else "#4393c3" for e in energies]
+    axes[0].barh(mirnas, [-e for e in energies], color=colors_e, edgecolor="white")
+    axes[0].set_xlabel("Binding Energy (|kcal/mol|)", fontsize=10)
+    axes[0].set_title(f"Predicted Binding Energy\n{gene} miRNA targets", fontsize=10)
+    axes[0].set_facecolor("white")
+    colors_x = ["#d73027" if c < 0 else "#4393c3" for c in changes]
+    axes[1].barh(mirnas, changes, color=colors_x, edgecolor="white")
+    axes[1].axvline(0, color="black", linewidth=0.8)
+    axes[1].set_xlabel("Expression Change (log2FC)", fontsize=10)
+    axes[1].set_title(f"miRNA Expression in {gene}-mutant tumors\n(⚠️ SIMULATED)", fontsize=10)
+    axes[1].set_facecolor("white")
+    fig.tight_layout()
+    buf = io.BytesIO()
+    fig.savefig(buf, format="png", dpi=150, facecolor="white")
     buf.seek(0)
+    img = Image.open(buf)
+    plt.close(fig)
+    df = pd.DataFrame({
+        "miRNA": mirnas,
+        "Binding Energy (kcal/mol)": energies,
+        "Seed Match": seeds,
+        "Expression log2FC": changes,
+    })
+    context = f"\n\n**Cancer Context:** {db['cancer_context']}"
+    journal_log("B1-miRNA", f"gene={gene}", f"top_miRNA={mirnas[0]}")
+    return img, df.to_markdown(index=False) + context
+# ── TAB B2 — siRNA Targets ───────────────────
+SIRNA_DB = {
+    "LUAD": {
+        "targets": ["KRAS G12C", "EGFR exon19del", "ALK fusion", "MET exon14", "RET fusion"],
+        "efficacy": [0.82, 0.91, 0.76, 0.68, 0.71],
+        "off_target_risk": ["Medium", "Low", "Low", "Medium", "Low"],
+        "delivery_challenge": ["High", "Medium", "Medium", "High", "Medium"],
+    },
+    "BRCA": {
+        "targets": ["BRCA1 exon11", "BRCA2 exon11", "PIK3CA H1047R", "AKT1 E17K", "ESR1 Y537S"],
+        "efficacy": [0.78, 0.85, 0.88, 0.72, 0.65],
+        "off_target_risk": ["Low", "Low", "Medium", "Low", "High"],
+        "delivery_challenge": ["Medium", "Medium", "Low", "Low", "High"],
+    },
+    "COAD": {
+        "targets": ["KRAS G12D", "APC truncation", "BRAF V600E", "SMAD4 loss", "PIK3CA E545K"],
+        "efficacy": [0.79, 0.61, 0.93, 0.55, 0.84],
+        "off_target_risk": ["Medium", "High", "Low", "Medium", "Low"],
+        "delivery_challenge": ["High", "High", "Low", "High", "Low"],
+    },
+}
+def b2_run(cancer: str):
+    db = SIRNA_DB.get(cancer, {})
+    if not db:
+        return None, "Cancer type not in simulation database."
+    targets = db["targets"]
+    efficacy = db["efficacy"]
+    off_risk = db["off_target_risk"]
+    delivery = db["delivery_challenge"]
+    fig, ax = plt.subplots(figsize=(8, 4), facecolor="white")
+    risk_color = {"Low": "#4393c3", "Medium": "#fc8d59", "High": "#d73027"}
+    colors = [risk_color.get(r, "#aaa") for r in off_risk]
+    ax.barh(targets, efficacy, color=colors, edgecolor="white")
+    ax.set_xlim(0, 1.1)
+    ax.set_xlabel("Predicted siRNA Efficacy (⚠️ SIMULATED)", fontsize=10)
+    ax.set_title(f"siRNA Target Efficacy — {cancer}", fontsize=11)
+    ax.set_facecolor("white")
+    from matplotlib.patches import Patch
+    legend_elements = [Patch(facecolor=v, label=k) for k, v in risk_color.items()]
+    ax.legend(handles=legend_elements, title="Off-target Risk", fontsize=8, loc="lower right")
+    fig.tight_layout()
+    buf = io.BytesIO()
+    fig.savefig(buf, format="png", dpi=150, facecolor="white")
     buf.seek(0)
+    img = Image.open(buf)
+    plt.close(fig)
+    df = pd.DataFrame({
+        "Target": targets,
+        "Efficacy": efficacy,
+        "Off-target Risk": off_risk,
+        "Delivery Challenge": delivery,
+    })
+    journal_log("B2-siRNA", f"cancer={cancer}", f"top={targets[0]}")
+    return img, df.to_markdown(index=False)
+# ── TAB B3 — LNP Corona Simulator ───────────────
+def b3_run(peg_mol_pct: float, ionizable_pct: float, helper_pct: float,
+           chol_pct: float, particle_size_nm: float, serum_pct: float):
+    total_lipid = peg_mol_pct + ionizable_pct + helper_pct + chol_pct
+    peg_norm = peg_mol_pct / max(total_lipid, 1)
+    corona_proteins = {
+        "ApoE": max(0, 0.35 - peg_norm * 0.8 + ionizable_pct * 0.01),
+        "ApoA-I": max(0, 0.20 - ionizable_pct * 0.005 + chol_pct * 0.003),
+        "Fibrinogen": max(0, 0.15 + (particle_size_nm - 100) * 0.001 - peg_norm * 0.3),
+        "Albumin": max(0, 0.10 + serum_pct * 0.002 - peg_norm * 0.2),
+        "Clusterin": max(0, 0.08 + peg_norm * 0.15),
+        "IgG": max(0, 0.07 + serum_pct * 0.001),
+        "Complement C3": max(0, 0.05 + ionizable_pct * 0.003 - peg_norm * 0.1),
     }
+    total = sum(corona_proteins.values())
+    if total > 0:
+        corona_proteins = {k: v / total for k, v in corona_proteins.items()}
+    fig, axes = plt.subplots(1, 2, figsize=(11, 4), facecolor="white")
+    labels = list(corona_proteins.keys())
+    sizes = list(corona_proteins.values())
+    colors_pie = plt.cm.Set2(np.linspace(0, 1, len(labels)))
+    axes[0].pie(sizes, labels=labels, colors=colors_pie, autopct="%1.1f%%", startangle=90)
+    axes[0].set_title("Predicted Corona Composition\n(⚠️ SIMULATED)", fontsize=10)
+    axes[1].bar(labels, sizes, color=colors_pie, edgecolor="white")
+    axes[1].set_ylabel("Relative Abundance", fontsize=10)
+    axes[1].set_title("Corona Protein Fractions", fontsize=10)
+    axes[1].set_xticklabels(labels, rotation=45, ha="right", fontsize=8)
+    axes[1].set_facecolor("white")
+    fig.tight_layout()
+    buf = io.BytesIO()
+    fig.savefig(buf, format="png", dpi=150, facecolor="white")
+    buf.seek(0)
+    img = Image.open(buf)
+    plt.close(fig)
+    apoe_pct = corona_proteins.get("ApoE", 0) * 100
+    interpretation = (
+        f"**ApoE fraction: {apoe_pct:.1f}%** — "
+        + ("High ApoE → enhanced brain/liver targeting via LDLR pathway." if apoe_pct > 25
+           else "Low ApoE → reduced receptor-mediated uptake.")
     )
+    journal_log("B3-LNPCorona", f"PEG={peg_mol_pct}%,size={particle_size_nm}nm", f"ApoE={apoe_pct:.1f}%")
+    return img, interpretation
+# ── TAB B4 — Flow Corona (Vroman Kinetics) ──────
+def b4_run(time_points: int, kon_albumin: float, kon_apoe: float,
+           koff_albumin: float, koff_apoe: float):
+    t = np.linspace(0, time_points, 500)
+    albumin = (kon_albumin / (kon_albumin + koff_albumin)) * (1 - np.exp(-(kon_albumin + koff_albumin) * t))
+    apoe_delay = np.maximum(0, t - 5)
+    apoe = (kon_apoe / (kon_apoe + koff_apoe)) * (1 - np.exp(-(kon_apoe + koff_apoe) * apoe_delay))
+    albumin_displaced = albumin * np.exp(-apoe * 2)
+    fibrinogen = 0.3 * (1 - np.exp(-0.05 * t)) * np.exp(-apoe * 1.5)
+    fig, ax = plt.subplots(figsize=(9, 4), facecolor="white")
+    ax.plot(t, albumin_displaced, label="Albumin (displaced)", color="#4393c3", linewidth=2)
+    ax.plot(t, apoe, label="ApoE (hard corona)", color="#d73027", linewidth=2)
+    ax.plot(t, fibrinogen, label="Fibrinogen", color="#fc8d59", linewidth=2, linestyle="--")
+    ax.set_xlabel("Time (min)", fontsize=11)
+    ax.set_ylabel("Surface Coverage (a.u.)", fontsize=11)
+    ax.set_title("Vroman Effect — Competitive Protein Adsorption\n(⚠️ SIMULATED)", fontsize=11)
+    ax.legend(fontsize=9)
+    ax.set_facecolor("white")
+    fig.tight_layout()
+    buf = io.BytesIO()
+    fig.savefig(buf, format="png", dpi=150, facecolor="white")
+    buf.seek(0)
+    img = Image.open(buf)
+    plt.close(fig)
+    vroman_time = t[np.argmax(albumin_displaced > apoe * 0.9)] if any(albumin_displaced > apoe * 0.9) else "N/A"
+    note = (
+        f"**Vroman crossover** (albumin → ApoE dominance): ~{vroman_time:.1f} min\n\n"
+        "The Vroman effect describes sequential protein displacement: "
+        "abundant proteins (albumin) adsorb first, then are displaced by higher-affinity proteins (ApoE, fibrinogen)."
+    )
+    journal_log("B4-FlowCorona", f"kon_alb={kon_albumin},kon_apoe={kon_apoe}", note[:80])
+    return img, note
+# ── TAB B5 — Variant Concepts ───────────────────
+VARIANT_RULES = {
+    "Pathogenic": {
+        "criteria": ["Nonsense mutation in tumor suppressor", "Frameshift in BRCA1/2",
+                     "Splice site ±1/2 in essential gene", "Known hotspot (e.g. TP53 R175H)"],
+        "acmg_codes": ["PVS1", "PS1", "PS2", "PM2"],
+        "explanation": "Strong evidence of pathogenicity. Likely disrupts protein function via LOF or dominant-negative mechanism.",
+    },
+    "Likely Pathogenic": {
+        "criteria": ["Missense in functional domain", "In silico tools predict damaging",
+                     "Low population frequency (<0.01%)", "Segregates with disease"],
+        "acmg_codes": ["PM1", "PM2", "PP2", "PP3"],
+        "explanation": "Moderate-strong evidence. Functional studies or segregation data would upgrade to Pathogenic.",
+    },
+    "VUS": {
+        "criteria": ["Missense with conflicting evidence", "Moderate population frequency",
+                     "Uncertain functional impact", "Limited segregation data"],
+        "acmg_codes": ["PM2", "BP4", "BP6"],
+        "explanation": "Variant of Uncertain Significance. Insufficient evidence to classify. Functional assays recommended.",
+    },
+    "Likely Benign": {
+        "criteria": ["Common in population (>1%)", "Synonymous with no splicing impact",
+                     "Observed in healthy controls", "Computational tools predict benign"],
+        "acmg_codes": ["BS1", "BP1", "BP4", "BP7"],
+        "explanation": "Evidence suggests benign. Unlikely to cause disease but not fully excluded.",
+    },
+    "Benign": {
+        "criteria": ["High population frequency (>5%)", "No disease association in large studies",
+                     "Synonymous, no functional impact", "Functional studies show no effect"],
+        "acmg_codes": ["BA1", "BS1", "BS2", "BS3"],
+        "explanation": "Strong evidence of benign nature. Not expected to contribute to disease.",
+    },
+}
+def b5_run(classification: str):
+    data = VARIANT_RULES.get(classification, {})
+    if not data:
+        return "Classification not found."
+    criteria_md = "\n".join([f"- {c}" for c in data["criteria"]])
+    acmg_md = " | ".join([f"`{code}`" for code in data["acmg_codes"]])
+    output = (
+        f"## {classification}\n\n"
+        f"**ACMG/AMP Codes:** {acmg_md}\n\n"
+        f"**Typical Criteria:**\n{criteria_md}\n\n"
+        f"**Interpretation:** {data['explanation']}\n\n"
+        f"> ⚠️ SIMULATED — This is a rule-based educational model only. "
+        f"Real variant classification requires expert review and full ACMG/AMP criteria evaluation."
     )
+    journal_log("B5-VariantConcepts", f"class={classification}", output[:100])
+    return output
+# ─────────────────────────────────────────────
+# GRADIO UI ASSEMBLY
+# ─────────────────────────────────────────────
+CUSTOM_CSS = """
+body { font-family: 'Inter', sans-serif; }
+.simulated-banner {
+    background: #fff3cd; border: 1px solid #ffc107;
+    border-radius: 6px; padding: 10px 14px;
+    font-weight: 600; color: #856404; margin-bottom: 8px;
+}
+.source-note { color: #6c757d; font-size: 0.85em; margin-top: 6px; }
+.gap-card {
+    background: #f8f9fa; border-left: 4px solid #d73027;
+    padding: 10px 14px; margin: 6px 0; border-radius: 4px;
+}
+footer { display: none !important; }
+"""
+def build_app():
+    with gr.Blocks(css=CUSTOM_CSS, title="K R&D Lab — Cancer Research Suite") as demo:
+        gr.Markdown(
+            "# 🔬 K R&D Lab — Cancer Research Suite\n"
+            "**Author:** Oksana Kolisnyk | [kosatiks-group.pp.ua](https://kosatiks-group.pp.ua)  \n"
+            "**Repo:** [github.com/TEZv/K-RnD-Lab-PHYLO-03_2026](https://github.com/TEZv/K-RnD-Lab-PHYLO-03_2026)"
+        )
+        with gr.Row():
+            with gr.Column(scale=4):
+                with gr.Tabs():
+                    # ════════════════════════════════
+                    # GROUP A — REAL DATA TOOLS
+                    # ════════════════════════════════
+                    with gr.Tab("🔬 Real Data Tools"):
+                        with gr.Tabs():
+                            with gr.Tab("🔍 Gray Zones Explorer"):
+                                gr.Markdown(
+                                    "Identify underexplored biological processes in a cancer type "
+                                    "using live PubMed + OpenTargets data."
+                                )
+                                a1_cancer = gr.Dropdown(CANCER_TYPES, label="Cancer Type", value="GBM")
+                                a1_btn = gr.Button("🔍 Explore Gray Zones", variant="primary")
+                                a1_heatmap = gr.Image(label="Research Coverage Heatmap", type="pil")
+                                a1_gaps = gr.Markdown(label="Top 5 Research Gaps")
+                                with gr.Accordion("📖 Learning Mode", open=False):
+                                    gr.Markdown(
+                                        "**What is a research gray zone?**\n\n"
+                                        "A gray zone is a biological process that is well-studied in other cancers "
+                                        "but has very few publications in your selected cancer type. "
+                                        "Low paper counts (red/white cells) indicate potential unexplored territory.\n\n"
+                                        "**How to use:** Select a rare cancer (e.g. DIPG, MCC) to find the most "
+                                        "underexplored processes. Cross-reference with Tab A2 to find targetable genes."
+                                    )
+                                a1_btn.click(a1_run, inputs=[a1_cancer], outputs=[a1_heatmap, a1_gaps])
+                            with gr.Tab("🎯 Understudied Target Finder"):
+                                gr.Markdown(
+                                    "Find essential genes with high research gap index "
+                                    "(high essentiality, low publication coverage)."
+                                )
+                                gr.Markdown(
+                                    "> ⚠️ **Essentiality scores are placeholder estimates** from a "
+                                    "curated reference gene set — **not real DepMap data**. "
+                                    "Association scores and paper/trial counts are fetched live. "
+                                    "For real essentiality values, download `CRISPR_gene_effect.csv` "
+                                    "from [depmap.org](https://depmap.org/portal/download/all/) and "
+                                    "replace `_load_depmap_sample()` in `app.py`."
+                                )
+                                a2_cancer = gr.Dropdown(CANCER_TYPES, label="Cancer Type", value="GBM")
+                                a2_btn = gr.Button("🎯 Find Understudied Targets", variant="primary")
+                                a2_table = gr.Dataframe(label="Target Gap Table", wrap=True)
+                                a2_note = gr.Markdown()
+                                with gr.Accordion("📖 Learning Mode", open=False):
+                                    gr.Markdown(
+                                        "**Gap Index formula:** `essentiality / log(papers + 1)`\n\n"
+                                        "- **Essentiality**: inverted DepMap CRISPR gene effect score\n"
+                                        "- **Papers**: PubMed count for gene + cancer type\n"
+                                        "- **High Gap Index** = essential gene with few publications = high research opportunity"
+                                    )
+                                a2_btn.click(a2_run, inputs=[a2_cancer], outputs=[a2_table, a2_note])
+                            with gr.Tab("🧬 Real Variant Lookup"):
+                                gr.Markdown(
+                                    "Look up a variant in **ClinVar** and **gnomAD**. "
+                                    "Results are fetched live — never hallucinated."
+                                )
+                                a3_hgvs = gr.Textbox(
+                                    label="HGVS Notation",
+                                    placeholder="e.g. NM_007294.4:c.5266dupC  or  NM_000546.6:c.524G>A",
+                                    lines=1
+                                )
+                                a3_btn = gr.Button("🔎 Look Up Variant", variant="primary")
+                                a3_result = gr.Markdown()
+                                with gr.Accordion("📖 Learning Mode", open=False):
+                                    gr.Markdown(
+                                        "**HGVS notation format:**\n"
+                                        "- `NM_XXXXXX.X:c.NNNN[change]` — coding DNA reference\n"
+                                        "- `NC_XXXXXX.X:g.NNNN[change]` — genomic reference\n\n"
+                                        "**Important:** If a variant is not found, this tool returns "
+                                        "'Not in database. Do not interpret.' — never a fabricated result."
+                                    )
+                                a3_btn.click(a3_run, inputs=[a3_hgvs], outputs=[a3_result])
+                            with gr.Tab("📰 Literature Gap Finder"):
+                                gr.Markdown(
+                                    "Visualize publication trends over 10 years and detect "
+                                    "years with low research activity."
+                                )
+                                with gr.Row():
+                                    a4_cancer = gr.Dropdown(CANCER_TYPES, label="Cancer Type", value="GBM")
+                                    a4_kw = gr.Textbox(label="Keyword", placeholder="e.g. ferroptosis", lines=1)
+                                a4_btn = gr.Button("📊 Analyze Literature Trend", variant="primary")
+                                a4_chart = gr.Image(label="Papers per Year", type="pil")
+                                a4_gaps = gr.Markdown()
+                                with gr.Accordion("📖 Learning Mode", open=False):
+                                    gr.Markdown(
+                                        "**How to read the chart:**\n"
+                                        "- 🔵 Blue bars = normal activity\n"
+                                        "- 🟠 Orange bars = low activity (<30% of average)\n"
+                                        "- 🔴 Red bars = zero publications (true gap)"
+                                    )
+                                a4_btn.click(a4_run, inputs=[a4_cancer, a4_kw], outputs=[a4_chart, a4_gaps])
+                            with gr.Tab("💊 Druggable Orphans"):
+                                gr.Markdown(
+                                    "Identify cancer-associated essential genes with **no approved drug** "
+                                    "and **no active clinical trial**."
+                                )
+                                a5_cancer = gr.Dropdown(CANCER_TYPES, label="Cancer Type", value="GBM")
+                                a5_btn = gr.Button("💊 Find Druggable Orphans", variant="primary")
+                                a5_table = gr.Dataframe(label="Orphan Target Table", wrap=True)
+                                a5_note = gr.Markdown()
+                                with gr.Accordion("📖 Learning Mode", open=False):
+                                    gr.Markdown(
+                                        "**What is a druggable orphan?**\n\n"
+                                        "A gene that is strongly associated with a cancer but has no approved drug "
+                                        "and no active clinical trial. These represent the highest-opportunity "
+                                        "targets for drug discovery."
+                                    )
+                                a5_btn.click(a5_run, inputs=[a5_cancer], outputs=[a5_table, a5_note])
+                            with gr.Tab("🤖 Research Assistant"):
+                                gr.Markdown(
+                                    "**RAG-powered research assistant** indexed on 20 curated papers "
+                                    "on LNP delivery, protein corona, and cancer variants.\n\n"
+                                    "*Powered by sentence-transformers + FAISS — no API key required.*"
+                                )
+                                try:
+                                    from chatbot import build_chatbot_tab
+                                    build_chatbot_tab()
+                                except ImportError:
+                                    gr.Markdown(
+                                        "⚠️ `chatbot.py` not found. Please ensure it is in the same directory as `app.py`."
+                                    )
+                    # ════════════════════════════════
+                    # GROUP B — LEARNING SANDBOX
+                    # ════════════════════════════════
+                    with gr.Tab("📚 Learning Sandbox"):
+                        gr.Markdown(
+                            "> ⚠️ **ALL TABS IN THIS GROUP USE SIMULATED DATA** — "
+                            "For educational purposes only. Results do not reflect real experiments."
+                        )
+                        with gr.Tabs():
+                            with gr.Tab("🧬 miRNA Explorer"):
+                                gr.Markdown(SIMULATED_BANNER)
+                                b1_gene = gr.Dropdown(["BRCA2", "BRCA1", "TP53"], label="Gene", value="TP53")
+                                b1_btn = gr.Button("🔬 Explore miRNA Interactions", variant="primary")
+                                b1_plot = gr.Image(label="miRNA Binding & Expression (⚠️ SIMULATED)", type="pil")
+                                b1_table = gr.Markdown()
+                                with gr.Accordion("📖 Learning Mode", open=False):
+                                    gr.Markdown(
+                                        "**miRNA biology basics:**\n\n"
+                                        "- miRNAs are ~22 nt non-coding RNAs that bind 3'UTR of mRNAs\n"
+                                        "- Seed match types: 8mer > 7mer-m8 > 7mer-A1 > 6mer (binding strength)\n"
+                                        "- Negative binding energy = stronger predicted interaction"
+                                    )
+                                b1_btn.click(b1_run, inputs=[b1_gene], outputs=[b1_plot, b1_table])
+                            with gr.Tab("🎯 siRNA Targets"):
+                                gr.Markdown(SIMULATED_BANNER)
+                                b2_cancer = gr.Dropdown(["LUAD", "BRCA", "COAD"], label="Cancer Type", value="LUAD")
+                                b2_btn = gr.Button("🎯 Simulate siRNA Efficacy", variant="primary")
+                                b2_plot = gr.Image(label="siRNA Efficacy (⚠️ SIMULATED)", type="pil")
+                                b2_table = gr.Markdown()
+                                with gr.Accordion("📖 Learning Mode", open=False):
+                                    gr.Markdown(
+                                        "**siRNA design principles:**\n\n"
+                                        "- siRNAs are 21-23 nt dsRNA that trigger RISC-mediated mRNA cleavage\n"
+                                        "- Off-target risk: seed region complementarity to unintended mRNAs\n"
+                                        "- Delivery challenge: endosomal escape, serum stability, tumor penetration"
+                                    )
+                                b2_btn.click(b2_run, inputs=[b2_cancer], outputs=[b2_plot, b2_table])
+                            with gr.Tab("🧪 LNP Corona"):
+                                gr.Markdown(SIMULATED_BANNER)
+                                with gr.Row():
+                                    b3_peg = gr.Slider(0.5, 5.0, value=1.5, step=0.1, label="PEG mol% (lipid)")
+                                    b3_ion = gr.Slider(10, 60, value=50, step=1, label="Ionizable lipid mol%")
+                                with gr.Row():
+                                    b3_helper = gr.Slider(5, 30, value=10, step=1, label="Helper lipid mol%")
+                                    b3_chol = gr.Slider(10, 50, value=38, step=1, label="Cholesterol mol%")
+                                with gr.Row():
+                                    b3_size = gr.Slider(50, 300, value=100, step=5, label="Particle size (nm)")
+                                    b3_serum = gr.Slider(0, 100, value=10, step=5, label="Serum % in medium")
+                                b3_btn = gr.Button("🧪 Simulate Corona", variant="primary")
+                                b3_plot = gr.Image(label="Corona Composition (⚠️ SIMULATED)", type="pil")
+                                b3_interp = gr.Markdown()
+                                with gr.Accordion("📖 Learning Mode", open=False):
+                                    gr.Markdown(
+                                        "**Protein corona basics:**\n\n"
+                                        "- Hard corona: tightly bound, long-lived proteins (ApoE, fibrinogen)\n"
+                                        "- Soft corona: loosely bound, rapidly exchanging proteins (albumin)\n"
+                                        "- ApoE enrichment → enhanced brain targeting via LDLR/LRP1 receptors\n"
+                                        "- PEG reduces corona formation"
+                                    )
+                                b3_btn.click(
+                                    b3_run,
+                                    inputs=[b3_peg, b3_ion, b3_helper, b3_chol, b3_size, b3_serum],
+                                    outputs=[b3_plot, b3_interp]
+                                )
+                            with gr.Tab("🌊 Flow Corona"):
+                                gr.Markdown(SIMULATED_BANNER)
+                                with gr.Row():
+                                    b4_time = gr.Slider(10, 120, value=60, step=5, label="Time range (min)")
+                                    b4_kon_alb = gr.Slider(0.01, 1.0, value=0.3, step=0.01, label="kon Albumin")
+                                with gr.Row():
+                                    b4_kon_apoe = gr.Slider(0.001, 0.5, value=0.05, step=0.001, label="kon ApoE")
+                                    b4_koff_alb = gr.Slider(0.01, 1.0, value=0.2, step=0.01, label="koff Albumin")
+                                    b4_koff_apoe = gr.Slider(0.001, 0.1, value=0.01, step=0.001, label="koff ApoE")
+                                b4_btn = gr.Button("🌊 Simulate Vroman Kinetics", variant="primary")
+                                b4_plot = gr.Image(label="Vroman Effect (⚠️ SIMULATED)", type="pil")
+                                b4_note = gr.Markdown()
+                                with gr.Accordion("📖 Learning Mode", open=False):
+                                    gr.Markdown(
+                                        "**The Vroman Effect:** Proteins with high abundance but low affinity "
+                                        "(albumin) adsorb first, then are displaced by lower-abundance but "
+                                        "higher-affinity proteins (fibrinogen, ApoE).\n\n"
+                                        "**Clinical implication:** The final hard corona (not initial) determines "
+                                        "nanoparticle fate in vivo."
+                                    )
+                                b4_btn.click(
+                                    b4_run,
+                                    inputs=[b4_time, b4_kon_alb, b4_kon_apoe, b4_koff_alb, b4_koff_apoe],
+                                    outputs=[b4_plot, b4_note]
+                                )
+                            with gr.Tab("🔬 Variant Concepts"):
+                                gr.Markdown(SIMULATED_BANNER)
+                                b5_class = gr.Dropdown(
+                                    list(VARIANT_RULES.keys()),
+                                    label="ACMG Classification",
+                                    value="VUS"
+                                )
+                                b5_btn = gr.Button("📋 Explain Classification", variant="primary")
+                                b5_result = gr.Markdown()
+                                with gr.Accordion("📖 Learning Mode", open=False):
+                                    gr.Markdown(
+                                        "**ACMG/AMP 2015 Classification Framework:**\n\n"
+                                        "1. **Pathogenic** — strong evidence of disease causation\n"
+                                        "2. **Likely Pathogenic** — >90% probability pathogenic\n"
+                                        "3. **VUS** — uncertain significance\n"
+                                        "4. **Likely Benign** — >90% probability benign\n"
+                                        "5. **Benign** — strong evidence of no disease effect"
+                                    )
+                                b5_btn.click(b5_run, inputs=[b5_class], outputs=[b5_result])
+            # ── SIDEBAR ──
+            with gr.Column(scale=1, min_width=260):
+                gr.Markdown("## 📓 Lab Journal")
+                note_input = gr.Textbox(label="Add note", placeholder="Your observation...", lines=2)
+                save_btn = gr.Button("💾 Save Note", size="sm")
+                refresh_btn = gr.Button("🔄 Refresh Journal", size="sm")
+                journal_display = gr.Markdown(value="*Click Refresh to load entries.*")
+                def save_note(note):
+                    if note.strip():
+                        journal_log("Manual", "note", note.strip(), note.strip())
+                    return journal_read()
+                save_btn.click(save_note, inputs=[note_input], outputs=[journal_display])
+                refresh_btn.click(lambda: journal_read(), outputs=[journal_display])
+        gr.Markdown(
+            "---\n"
+            "*K R&D Lab Cancer Research Suite · "
+            "All real-data tabs use live APIs with 24h caching · "
+            "Simulated tabs are clearly labeled ⚠️ SIMULATED · "
+            "Source attribution shown on every result*"
+        )
+    return demo
+# ── LAUNCH — must be outside if __name__ for HuggingFace Spaces ──
+app = build_app()
+app.launch(server_name="0.0.0.0", server_port=7860)